"""Rename pre-hash embedder figs to include the embed_args hash suffix. Walks figs/ for `.html` files matching the old stem shape (no hash tail) that represent an embedder run (not Reference), reads the sibling `.metrics.json` to recover `meta.embed_args`, computes the hash, and renames the .html + .metrics.json in place. Default is a dry-run — pass `--apply` to actually rename. Reference files are left alone (they have no embed_args). Missing metrics.json → warn and skip. Target-name collision → warn and skip. Usage: .venv/bin/python scripts/backfill_hashes.py [--apply] [--figs-dir PATH] """ from __future__ import annotations import argparse import json import re import sys from pathlib import Path # Reach up to the project root so we can reuse the canonical hash helper. _ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(_ROOT)) from app.web.main import embed_args_hash # noqa: E402 _LEGACY_STEM = re.compile( r"^(?Pmake_[A-Za-z_]+?_[A-Za-z]+_N\d+_T\d+_J[\d.]+_s\d+)$" ) def plan_renames(figs_dir: Path): for html in sorted(figs_dir.glob("*.html")): stem = html.stem m = _LEGACY_STEM.match(stem) if not m: # Either already hashed or doesn't match our scheme at all. continue # Skip Reference runs — they have no embed_args. if "_Reference_" in stem: continue metrics = figs_dir / f"{stem}.metrics.json" if not metrics.is_file(): yield (html, None, "missing metrics.json — can't compute hash") continue try: ea = json.loads(metrics.read_text(encoding="utf-8"))["meta"]["embed_args"] except (KeyError, json.JSONDecodeError) as e: yield (html, None, f"bad metrics.json: {e}") continue new_stem = f"{stem}_{embed_args_hash(ea)}" new_html = figs_dir / f"{new_stem}.html" if new_html.exists(): yield (html, None, f"target exists: {new_html.name}") continue yield (html, new_stem, None) def apply_rename(figs_dir: Path, old_stem: str, new_stem: str) -> list[str]: """Rename every sidecar sharing the old stem. Returns the renamed files.""" renamed = [] for suffix in (".html", ".metrics.json", ".frames.json"): src = figs_dir / f"{old_stem}{suffix}" if not src.exists(): continue dst = figs_dir / f"{new_stem}{suffix}" src.rename(dst) renamed.append(f"{src.name} -> {dst.name}") return renamed def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--apply", action="store_true", help="actually rename (default: dry-run)") ap.add_argument("--figs-dir", default=str(_ROOT / "figs"), help="path to figs/ directory") args = ap.parse_args() figs_dir = Path(args.figs_dir).resolve() if not figs_dir.is_dir(): print(f"no such directory: {figs_dir}", file=sys.stderr) return 2 planned, skipped = [], [] for html, new_stem, reason in plan_renames(figs_dir): if new_stem is None: skipped.append((html.name, reason)) else: planned.append((html.stem, new_stem)) print(f"scanning {figs_dir}") print(f" {len(planned)} to rename, {len(skipped)} skipped\n") for old, new in planned: print(f" rename {old} -> {new}") if skipped: print("\n skipped:") for name, reason in skipped: print(f" {name} ({reason})") if not planned: return 0 if not args.apply: print("\n(dry run — pass --apply to rename)") return 0 print("\napplying...") for old, new in planned: moved = apply_rename(figs_dir, old, new) for line in moved: print(f" {line}") print(f"done — renamed {len(planned)} run(s)") return 0 if __name__ == "__main__": sys.exit(main())