filenames + run names: J in sci notation (5E-3 not 0.005)
Periods in filenames are avoidable and the Prefect UI dislikes them in run names. Uses a shared sci_notation helper in main.py mirrored in the flow. Stem regex (main + parser) now matches J<digits.Ee+-> to accept both old decimal-J and new sci-J filenames so the two transition together. J tag in Prefect tag list also uses the sci form, so chip filters stay consistent. Backfill script extended to find pre-transition (decimal-J) files on disk via a second base-stem variant, then rename them to the sci form. backfill_tags re-patches existing runs so their J tag matches the new canonical form. All 13 existing figs + runs renamed / retagged in-place.
This commit is contained in:
parent
56279dbb1b
commit
e94d28b8fc
@ -475,6 +475,18 @@ def run_args_hash(
|
||||
embed_args_hash = run_args_hash
|
||||
|
||||
|
||||
def sci_notation(v: Any) -> str:
|
||||
"""Float → compact sci notation without a period (0.005 → '5E-3').
|
||||
Used in stems and Prefect run names so filenames + UI avoid periods."""
|
||||
try:
|
||||
f = float(v)
|
||||
except (TypeError, ValueError):
|
||||
return str(v)
|
||||
m, e = f"{f:.3e}".split("e")
|
||||
m = m.rstrip("0").rstrip(".")
|
||||
return f"{m}E{int(e)}"
|
||||
|
||||
|
||||
def synthesize_output_paths(
|
||||
generator_path: str,
|
||||
embedder: str,
|
||||
@ -487,8 +499,9 @@ def synthesize_output_paths(
|
||||
) -> Tuple[str, str]:
|
||||
gen = generator_path.split(".")[-1]
|
||||
emb = embedder.split(".")[-1]
|
||||
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
base = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}"
|
||||
j = sci_notation(jitter_scale)
|
||||
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{j}_s{seed}.html"
|
||||
base = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{j}_s{seed}"
|
||||
if embed_args is None:
|
||||
embf = f"{base}.html"
|
||||
else:
|
||||
@ -977,7 +990,7 @@ async def metrics_json() -> JSONResponse:
|
||||
|
||||
|
||||
_STEM_RE = re.compile(
|
||||
r"^make_[A-Za-z_]+?_[A-Za-z]+_N\d+_T\d+_J[\d.]+_s\d+(?:_[0-9a-f]{8})?$"
|
||||
r"^make_[A-Za-z_]+?_[A-Za-z]+_N\d+_T\d+_J[\d.Ee+\-]+_s\d+(?:_[0-9a-f]{8})?$"
|
||||
)
|
||||
|
||||
# Map short generator name ("make_blobs") to its DATASET_META entry.
|
||||
@ -1020,7 +1033,7 @@ def build_run_tags(
|
||||
f"algorithm:{(embedder or '').rsplit('.', 1)[-1]}",
|
||||
f"N:{int(num_points)}",
|
||||
f"T:{int(num_timesteps)}",
|
||||
f"J:{jitter_scale}",
|
||||
f"J:{sci_notation(jitter_scale)}",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@ from pathlib import Path
|
||||
|
||||
_STEM_RE = re.compile(
|
||||
r"^(?P<gen>make_.+?)_(?P<emb>[A-Za-z]+)_N(?P<n>\d+)_T(?P<t>\d+)"
|
||||
r"_J(?P<j>[\d.]+)_s(?P<s>\d+)(?:_(?P<h>[0-9a-f]{8}))?$"
|
||||
r"_J(?P<j>[\d.Ee+\-]+)_s(?P<s>\d+)(?:_(?P<h>[0-9a-f]{8}))?$"
|
||||
)
|
||||
|
||||
# plotly's typed-array dtype -> (struct format char, item size bytes)
|
||||
|
||||
@ -43,6 +43,19 @@ def _run_args_hash(
|
||||
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
||||
|
||||
|
||||
def _sci(v: Any) -> str:
|
||||
"""Float → compact sci notation without a period (e.g. 0.005 → 5E-3,
|
||||
0.01 → 1E-2). Keeps Prefect's UI happy — it doesn't like periods in
|
||||
run names."""
|
||||
try:
|
||||
f = float(v)
|
||||
except (TypeError, ValueError):
|
||||
return str(v)
|
||||
m, e = f"{f:.3e}".split("e")
|
||||
m = m.rstrip("0").rstrip(".")
|
||||
return f"{m}E{int(e)}"
|
||||
|
||||
|
||||
def _flow_run_name() -> str:
|
||||
"""Name each Prefect run after the stem of its output fig, so runs are
|
||||
searchable / hoverable instead of wearing Prefect's auto-generated
|
||||
@ -52,7 +65,7 @@ def _flow_run_name() -> str:
|
||||
emb = (p.get("embedder") or "").rsplit(".", 1)[-1] or "?"
|
||||
N = p.get("num_points", "?")
|
||||
T = p.get("num_timesteps", "?")
|
||||
J = p.get("jitter_scale", "?")
|
||||
J = _sci(p.get("jitter_scale", "?"))
|
||||
s = p.get("seed", "?")
|
||||
tag = _run_args_hash(p.get("embed_args"), p.get("generator_kwargs"))
|
||||
return f"{gen}_{emb}_N{N}_T{T}_J{J}_s{s}_{tag}"
|
||||
@ -315,12 +328,13 @@ def embedding_flow(
|
||||
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
_generator = generator_path.split(".")[-1]
|
||||
_j = _sci(jitter_scale)
|
||||
output_ref: str = (
|
||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{_j}_s{seed}.html"
|
||||
)
|
||||
_args_tag = _run_args_hash(embed_args, user_generator_kwargs)
|
||||
output_embed: str = (
|
||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}_{_args_tag}.html"
|
||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{_j}_s{seed}_{_args_tag}.html"
|
||||
)
|
||||
output_metrics: str = output_embed[:-5] + ".metrics.json"
|
||||
output_frames: str = output_embed[:-5] + ".frames.json"
|
||||
|
||||
@ -30,7 +30,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
_ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(_ROOT))
|
||||
from app.web.main import PREFECT, run_args_hash # noqa: E402
|
||||
from app.web.main import PREFECT, run_args_hash, sci_notation # noqa: E402
|
||||
|
||||
|
||||
def _legacy_hash(ea: Optional[Dict[str, Any]]) -> str:
|
||||
@ -38,30 +38,45 @@ def _legacy_hash(ea: Optional[Dict[str, Any]]) -> str:
|
||||
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
||||
|
||||
|
||||
def _base_stem(params: Dict[str, Any]) -> Optional[str]:
|
||||
def _base_stems(params: Dict[str, Any]) -> List[str]:
|
||||
"""Return the stem prefix(es) for this run's params: both the current
|
||||
sci-J form and the legacy decimal-J form, so we can find pre-transition
|
||||
files on disk too."""
|
||||
try:
|
||||
gen = (params.get("generator_path") or "").rsplit(".", 1)[-1]
|
||||
emb = (params.get("embedder") or "").rsplit(".", 1)[-1]
|
||||
N = int(params["num_points"])
|
||||
T = int(params.get("num_timesteps", params.get("num_snapshots")))
|
||||
J = float(params["jitter_scale"])
|
||||
Jf = float(params["jitter_scale"])
|
||||
s = int(params["seed"])
|
||||
except (KeyError, TypeError, ValueError):
|
||||
return None
|
||||
return []
|
||||
if not gen or not emb:
|
||||
return None
|
||||
return f"{gen}_{emb}_N{N}_T{T}_J{J}_s{s}"
|
||||
return []
|
||||
out = [f"{gen}_{emb}_N{N}_T{T}_J{sci_notation(Jf)}_s{s}"]
|
||||
legacy = f"{gen}_{emb}_N{N}_T{T}_J{Jf}_s{s}"
|
||||
if legacy not in out:
|
||||
out.append(legacy)
|
||||
return out
|
||||
|
||||
|
||||
def _candidate_names(base: str, ea: Dict[str, Any], gk: Dict[str, Any]) -> List[str]:
|
||||
target = f"{base}_{run_args_hash(ea, gk)}.html"
|
||||
legacy = f"{base}_{_legacy_hash(ea)}.html"
|
||||
no_hash = f"{base}.html"
|
||||
# Preserve order: target first so we short-circuit on already-backfilled.
|
||||
def _candidate_names(bases: List[str], ea: Dict[str, Any], gk: Dict[str, Any]) -> List[str]:
|
||||
# Target = current sci-J base + new-scheme hash.
|
||||
if not bases:
|
||||
return []
|
||||
target_base = bases[0]
|
||||
target = f"{target_base}_{run_args_hash(ea, gk)}.html"
|
||||
out = [target]
|
||||
for x in (legacy, no_hash):
|
||||
if x not in out:
|
||||
out.append(x)
|
||||
# Fall back to every (base, hash) combination we might find on disk.
|
||||
hashes = [run_args_hash(ea, gk), _legacy_hash(ea)]
|
||||
for b in bases:
|
||||
for h in hashes:
|
||||
x = f"{b}_{h}.html"
|
||||
if x not in out:
|
||||
out.append(x)
|
||||
no_hash = f"{b}.html"
|
||||
if no_hash not in out:
|
||||
out.append(no_hash)
|
||||
return out
|
||||
|
||||
|
||||
@ -125,13 +140,13 @@ def main() -> int:
|
||||
params = r.get("parameters") or {}
|
||||
ea = params.get("embed_args") or {}
|
||||
gk = params.get("generator_kwargs") or {}
|
||||
base = _base_stem(params)
|
||||
if not base:
|
||||
bases = _base_stems(params)
|
||||
if not bases:
|
||||
continue
|
||||
target = f"{base}_{run_args_hash(ea, gk)}.html"
|
||||
target = f"{bases[0]}_{run_args_hash(ea, gk)}.html"
|
||||
if target in seen_targets:
|
||||
continue # later duplicate — the stale-marking logic will handle it
|
||||
for candidate in _candidate_names(base, ea, gk):
|
||||
for candidate in _candidate_names(bases, ea, gk):
|
||||
if (figs_dir / candidate).exists():
|
||||
if candidate == target:
|
||||
# Already at target; just ensure metrics.json carries gk.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user