Compare commits
No commits in common. "47f56b57c85b6b6b6965107b92491d661eab10c7" and "fe495656514196fca6fc4e9ac013093b30dd24c3" have entirely different histories.
47f56b57c8
...
fe49565651
@ -33,22 +33,7 @@ def _embed_args_hash(ea: Optional[Dict[str, Any]]) -> str:
|
|||||||
s = json.dumps(ea or {}, sort_keys=True, default=str)
|
s = json.dumps(ea or {}, sort_keys=True, default=str)
|
||||||
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
||||||
|
|
||||||
|
from prefect import flow, task
|
||||||
def _flow_run_name() -> str:
|
|
||||||
"""Name each Prefect run after the stem of its output fig, so runs are
|
|
||||||
searchable / hoverable instead of wearing Prefect's auto-generated
|
|
||||||
adjective-animal names."""
|
|
||||||
p = runtime.flow_run.parameters or {}
|
|
||||||
gen = (p.get("generator_path") or "").rsplit(".", 1)[-1] or "?"
|
|
||||||
emb = (p.get("embedder") or "").rsplit(".", 1)[-1] or "?"
|
|
||||||
N = p.get("num_points", "?")
|
|
||||||
T = p.get("num_timesteps", "?")
|
|
||||||
J = p.get("jitter_scale", "?")
|
|
||||||
s = p.get("seed", "?")
|
|
||||||
tag = _embed_args_hash(p.get("embed_args"))
|
|
||||||
return f"{gen}_{emb}_N{N}_T{T}_J{J}_s{s}_{tag}"
|
|
||||||
|
|
||||||
from prefect import flow, runtime, task
|
|
||||||
from prefect.artifacts import create_markdown_artifact, create_table_artifact
|
from prefect.artifacts import create_markdown_artifact, create_table_artifact
|
||||||
from prefect.cache_policies import INPUTS, NO_CACHE
|
from prefect.cache_policies import INPUTS, NO_CACHE
|
||||||
from prefect_ray import RayTaskRunner
|
from prefect_ray import RayTaskRunner
|
||||||
@ -269,7 +254,7 @@ _DEFAULT_EMBED_COLUMNS: List[str] = ["feature_0", "feature_2", "feature_1"]
|
|||||||
_DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
|
_DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
|
||||||
|
|
||||||
|
|
||||||
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}), flow_run_name=_flow_run_name)
|
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}))
|
||||||
def embedding_flow(
|
def embedding_flow(
|
||||||
num_points: int = 5000,
|
num_points: int = 5000,
|
||||||
num_timesteps: int = 48,
|
num_timesteps: int = 48,
|
||||||
|
|||||||
@ -1,119 +0,0 @@
|
|||||||
"""Rename pre-hash embedder figs to include the embed_args hash suffix.
|
|
||||||
|
|
||||||
Walks figs/ for `.html` files matching the old stem shape (no hash tail) that
|
|
||||||
represent an embedder run (not Reference), reads the sibling
|
|
||||||
`<stem>.metrics.json` to recover `meta.embed_args`, computes the hash, and
|
|
||||||
renames the .html + .metrics.json in place.
|
|
||||||
|
|
||||||
Default is a dry-run — pass `--apply` to actually rename. Reference files are
|
|
||||||
left alone (they have no embed_args). Missing metrics.json → warn and skip.
|
|
||||||
Target-name collision → warn and skip.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.venv/bin/python scripts/backfill_hashes.py [--apply] [--figs-dir PATH]
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
# Reach up to the project root so we can reuse the canonical hash helper.
|
|
||||||
_ROOT = Path(__file__).resolve().parent.parent
|
|
||||||
sys.path.insert(0, str(_ROOT))
|
|
||||||
from app.web.main import embed_args_hash # noqa: E402
|
|
||||||
|
|
||||||
_LEGACY_STEM = re.compile(
|
|
||||||
r"^(?P<base>make_[A-Za-z_]+?_[A-Za-z]+_N\d+_T\d+_J[\d.]+_s\d+)$"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def plan_renames(figs_dir: Path):
|
|
||||||
for html in sorted(figs_dir.glob("*.html")):
|
|
||||||
stem = html.stem
|
|
||||||
m = _LEGACY_STEM.match(stem)
|
|
||||||
if not m:
|
|
||||||
# Either already hashed or doesn't match our scheme at all.
|
|
||||||
continue
|
|
||||||
# Skip Reference runs — they have no embed_args.
|
|
||||||
if "_Reference_" in stem:
|
|
||||||
continue
|
|
||||||
metrics = figs_dir / f"{stem}.metrics.json"
|
|
||||||
if not metrics.is_file():
|
|
||||||
yield (html, None, "missing metrics.json — can't compute hash")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
ea = json.loads(metrics.read_text(encoding="utf-8"))["meta"]["embed_args"]
|
|
||||||
except (KeyError, json.JSONDecodeError) as e:
|
|
||||||
yield (html, None, f"bad metrics.json: {e}")
|
|
||||||
continue
|
|
||||||
new_stem = f"{stem}_{embed_args_hash(ea)}"
|
|
||||||
new_html = figs_dir / f"{new_stem}.html"
|
|
||||||
if new_html.exists():
|
|
||||||
yield (html, None, f"target exists: {new_html.name}")
|
|
||||||
continue
|
|
||||||
yield (html, new_stem, None)
|
|
||||||
|
|
||||||
|
|
||||||
def apply_rename(figs_dir: Path, old_stem: str, new_stem: str) -> list[str]:
|
|
||||||
"""Rename every sidecar sharing the old stem. Returns the renamed files."""
|
|
||||||
renamed = []
|
|
||||||
for suffix in (".html", ".metrics.json", ".frames.json"):
|
|
||||||
src = figs_dir / f"{old_stem}{suffix}"
|
|
||||||
if not src.exists():
|
|
||||||
continue
|
|
||||||
dst = figs_dir / f"{new_stem}{suffix}"
|
|
||||||
src.rename(dst)
|
|
||||||
renamed.append(f"{src.name} -> {dst.name}")
|
|
||||||
return renamed
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
|
||||||
ap = argparse.ArgumentParser(description=__doc__)
|
|
||||||
ap.add_argument("--apply", action="store_true", help="actually rename (default: dry-run)")
|
|
||||||
ap.add_argument("--figs-dir", default=str(_ROOT / "figs"), help="path to figs/ directory")
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
figs_dir = Path(args.figs_dir).resolve()
|
|
||||||
if not figs_dir.is_dir():
|
|
||||||
print(f"no such directory: {figs_dir}", file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
planned, skipped = [], []
|
|
||||||
for html, new_stem, reason in plan_renames(figs_dir):
|
|
||||||
if new_stem is None:
|
|
||||||
skipped.append((html.name, reason))
|
|
||||||
else:
|
|
||||||
planned.append((html.stem, new_stem))
|
|
||||||
|
|
||||||
print(f"scanning {figs_dir}")
|
|
||||||
print(f" {len(planned)} to rename, {len(skipped)} skipped\n")
|
|
||||||
|
|
||||||
for old, new in planned:
|
|
||||||
print(f" rename {old} -> {new}")
|
|
||||||
if skipped:
|
|
||||||
print("\n skipped:")
|
|
||||||
for name, reason in skipped:
|
|
||||||
print(f" {name} ({reason})")
|
|
||||||
|
|
||||||
if not planned:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if not args.apply:
|
|
||||||
print("\n(dry run — pass --apply to rename)")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("\napplying...")
|
|
||||||
for old, new in planned:
|
|
||||||
moved = apply_rename(figs_dir, old, new)
|
|
||||||
for line in moved:
|
|
||||||
print(f" {line}")
|
|
||||||
print(f"done — renamed {len(planned)} run(s)")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.exit(main())
|
|
||||||
Loading…
Reference in New Issue
Block a user