filenames + run names: J in sci notation (5E-3 not 0.005)
Periods in filenames are avoidable and the Prefect UI dislikes them in run names. Uses a shared sci_notation helper in main.py mirrored in the flow. Stem regex (main + parser) now matches J<digits.Ee+-> to accept both old decimal-J and new sci-J filenames so the two transition together. J tag in Prefect tag list also uses the sci form, so chip filters stay consistent. Backfill script extended to find pre-transition (decimal-J) files on disk via a second base-stem variant, then rename them to the sci form. backfill_tags re-patches existing runs so their J tag matches the new canonical form. All 13 existing figs + runs renamed / retagged in-place.
This commit is contained in:
parent
56279dbb1b
commit
e94d28b8fc
@ -475,6 +475,18 @@ def run_args_hash(
|
|||||||
embed_args_hash = run_args_hash
|
embed_args_hash = run_args_hash
|
||||||
|
|
||||||
|
|
||||||
|
def sci_notation(v: Any) -> str:
|
||||||
|
"""Float → compact sci notation without a period (0.005 → '5E-3').
|
||||||
|
Used in stems and Prefect run names so filenames + UI avoid periods."""
|
||||||
|
try:
|
||||||
|
f = float(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return str(v)
|
||||||
|
m, e = f"{f:.3e}".split("e")
|
||||||
|
m = m.rstrip("0").rstrip(".")
|
||||||
|
return f"{m}E{int(e)}"
|
||||||
|
|
||||||
|
|
||||||
def synthesize_output_paths(
|
def synthesize_output_paths(
|
||||||
generator_path: str,
|
generator_path: str,
|
||||||
embedder: str,
|
embedder: str,
|
||||||
@ -487,8 +499,9 @@ def synthesize_output_paths(
|
|||||||
) -> Tuple[str, str]:
|
) -> Tuple[str, str]:
|
||||||
gen = generator_path.split(".")[-1]
|
gen = generator_path.split(".")[-1]
|
||||||
emb = embedder.split(".")[-1]
|
emb = embedder.split(".")[-1]
|
||||||
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
j = sci_notation(jitter_scale)
|
||||||
base = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}"
|
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{j}_s{seed}.html"
|
||||||
|
base = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{j}_s{seed}"
|
||||||
if embed_args is None:
|
if embed_args is None:
|
||||||
embf = f"{base}.html"
|
embf = f"{base}.html"
|
||||||
else:
|
else:
|
||||||
@ -977,7 +990,7 @@ async def metrics_json() -> JSONResponse:
|
|||||||
|
|
||||||
|
|
||||||
_STEM_RE = re.compile(
|
_STEM_RE = re.compile(
|
||||||
r"^make_[A-Za-z_]+?_[A-Za-z]+_N\d+_T\d+_J[\d.]+_s\d+(?:_[0-9a-f]{8})?$"
|
r"^make_[A-Za-z_]+?_[A-Za-z]+_N\d+_T\d+_J[\d.Ee+\-]+_s\d+(?:_[0-9a-f]{8})?$"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Map short generator name ("make_blobs") to its DATASET_META entry.
|
# Map short generator name ("make_blobs") to its DATASET_META entry.
|
||||||
@ -1020,7 +1033,7 @@ def build_run_tags(
|
|||||||
f"algorithm:{(embedder or '').rsplit('.', 1)[-1]}",
|
f"algorithm:{(embedder or '').rsplit('.', 1)[-1]}",
|
||||||
f"N:{int(num_points)}",
|
f"N:{int(num_points)}",
|
||||||
f"T:{int(num_timesteps)}",
|
f"T:{int(num_timesteps)}",
|
||||||
f"J:{jitter_scale}",
|
f"J:{sci_notation(jitter_scale)}",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -20,7 +20,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
_STEM_RE = re.compile(
|
_STEM_RE = re.compile(
|
||||||
r"^(?P<gen>make_.+?)_(?P<emb>[A-Za-z]+)_N(?P<n>\d+)_T(?P<t>\d+)"
|
r"^(?P<gen>make_.+?)_(?P<emb>[A-Za-z]+)_N(?P<n>\d+)_T(?P<t>\d+)"
|
||||||
r"_J(?P<j>[\d.]+)_s(?P<s>\d+)(?:_(?P<h>[0-9a-f]{8}))?$"
|
r"_J(?P<j>[\d.Ee+\-]+)_s(?P<s>\d+)(?:_(?P<h>[0-9a-f]{8}))?$"
|
||||||
)
|
)
|
||||||
|
|
||||||
# plotly's typed-array dtype -> (struct format char, item size bytes)
|
# plotly's typed-array dtype -> (struct format char, item size bytes)
|
||||||
|
|||||||
@ -43,6 +43,19 @@ def _run_args_hash(
|
|||||||
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
||||||
|
|
||||||
|
|
||||||
|
def _sci(v: Any) -> str:
|
||||||
|
"""Float → compact sci notation without a period (e.g. 0.005 → 5E-3,
|
||||||
|
0.01 → 1E-2). Keeps Prefect's UI happy — it doesn't like periods in
|
||||||
|
run names."""
|
||||||
|
try:
|
||||||
|
f = float(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return str(v)
|
||||||
|
m, e = f"{f:.3e}".split("e")
|
||||||
|
m = m.rstrip("0").rstrip(".")
|
||||||
|
return f"{m}E{int(e)}"
|
||||||
|
|
||||||
|
|
||||||
def _flow_run_name() -> str:
|
def _flow_run_name() -> str:
|
||||||
"""Name each Prefect run after the stem of its output fig, so runs are
|
"""Name each Prefect run after the stem of its output fig, so runs are
|
||||||
searchable / hoverable instead of wearing Prefect's auto-generated
|
searchable / hoverable instead of wearing Prefect's auto-generated
|
||||||
@ -52,7 +65,7 @@ def _flow_run_name() -> str:
|
|||||||
emb = (p.get("embedder") or "").rsplit(".", 1)[-1] or "?"
|
emb = (p.get("embedder") or "").rsplit(".", 1)[-1] or "?"
|
||||||
N = p.get("num_points", "?")
|
N = p.get("num_points", "?")
|
||||||
T = p.get("num_timesteps", "?")
|
T = p.get("num_timesteps", "?")
|
||||||
J = p.get("jitter_scale", "?")
|
J = _sci(p.get("jitter_scale", "?"))
|
||||||
s = p.get("seed", "?")
|
s = p.get("seed", "?")
|
||||||
tag = _run_args_hash(p.get("embed_args"), p.get("generator_kwargs"))
|
tag = _run_args_hash(p.get("embed_args"), p.get("generator_kwargs"))
|
||||||
return f"{gen}_{emb}_N{N}_T{T}_J{J}_s{s}_{tag}"
|
return f"{gen}_{emb}_N{N}_T{T}_J{J}_s{s}_{tag}"
|
||||||
@ -315,12 +328,13 @@ def embedding_flow(
|
|||||||
|
|
||||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||||
_generator = generator_path.split(".")[-1]
|
_generator = generator_path.split(".")[-1]
|
||||||
|
_j = _sci(jitter_scale)
|
||||||
output_ref: str = (
|
output_ref: str = (
|
||||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{_j}_s{seed}.html"
|
||||||
)
|
)
|
||||||
_args_tag = _run_args_hash(embed_args, user_generator_kwargs)
|
_args_tag = _run_args_hash(embed_args, user_generator_kwargs)
|
||||||
output_embed: str = (
|
output_embed: str = (
|
||||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}_{_args_tag}.html"
|
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{_j}_s{seed}_{_args_tag}.html"
|
||||||
)
|
)
|
||||||
output_metrics: str = output_embed[:-5] + ".metrics.json"
|
output_metrics: str = output_embed[:-5] + ".metrics.json"
|
||||||
output_frames: str = output_embed[:-5] + ".frames.json"
|
output_frames: str = output_embed[:-5] + ".frames.json"
|
||||||
|
|||||||
@ -30,7 +30,7 @@ from typing import Any, Dict, List, Optional
|
|||||||
|
|
||||||
_ROOT = Path(__file__).resolve().parent.parent
|
_ROOT = Path(__file__).resolve().parent.parent
|
||||||
sys.path.insert(0, str(_ROOT))
|
sys.path.insert(0, str(_ROOT))
|
||||||
from app.web.main import PREFECT, run_args_hash # noqa: E402
|
from app.web.main import PREFECT, run_args_hash, sci_notation # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def _legacy_hash(ea: Optional[Dict[str, Any]]) -> str:
|
def _legacy_hash(ea: Optional[Dict[str, Any]]) -> str:
|
||||||
@ -38,30 +38,45 @@ def _legacy_hash(ea: Optional[Dict[str, Any]]) -> str:
|
|||||||
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
return hashlib.sha1(s.encode()).hexdigest()[:8]
|
||||||
|
|
||||||
|
|
||||||
def _base_stem(params: Dict[str, Any]) -> Optional[str]:
|
def _base_stems(params: Dict[str, Any]) -> List[str]:
|
||||||
|
"""Return the stem prefix(es) for this run's params: both the current
|
||||||
|
sci-J form and the legacy decimal-J form, so we can find pre-transition
|
||||||
|
files on disk too."""
|
||||||
try:
|
try:
|
||||||
gen = (params.get("generator_path") or "").rsplit(".", 1)[-1]
|
gen = (params.get("generator_path") or "").rsplit(".", 1)[-1]
|
||||||
emb = (params.get("embedder") or "").rsplit(".", 1)[-1]
|
emb = (params.get("embedder") or "").rsplit(".", 1)[-1]
|
||||||
N = int(params["num_points"])
|
N = int(params["num_points"])
|
||||||
T = int(params.get("num_timesteps", params.get("num_snapshots")))
|
T = int(params.get("num_timesteps", params.get("num_snapshots")))
|
||||||
J = float(params["jitter_scale"])
|
Jf = float(params["jitter_scale"])
|
||||||
s = int(params["seed"])
|
s = int(params["seed"])
|
||||||
except (KeyError, TypeError, ValueError):
|
except (KeyError, TypeError, ValueError):
|
||||||
return None
|
return []
|
||||||
if not gen or not emb:
|
if not gen or not emb:
|
||||||
return None
|
return []
|
||||||
return f"{gen}_{emb}_N{N}_T{T}_J{J}_s{s}"
|
out = [f"{gen}_{emb}_N{N}_T{T}_J{sci_notation(Jf)}_s{s}"]
|
||||||
|
legacy = f"{gen}_{emb}_N{N}_T{T}_J{Jf}_s{s}"
|
||||||
|
if legacy not in out:
|
||||||
|
out.append(legacy)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _candidate_names(base: str, ea: Dict[str, Any], gk: Dict[str, Any]) -> List[str]:
|
def _candidate_names(bases: List[str], ea: Dict[str, Any], gk: Dict[str, Any]) -> List[str]:
|
||||||
target = f"{base}_{run_args_hash(ea, gk)}.html"
|
# Target = current sci-J base + new-scheme hash.
|
||||||
legacy = f"{base}_{_legacy_hash(ea)}.html"
|
if not bases:
|
||||||
no_hash = f"{base}.html"
|
return []
|
||||||
# Preserve order: target first so we short-circuit on already-backfilled.
|
target_base = bases[0]
|
||||||
|
target = f"{target_base}_{run_args_hash(ea, gk)}.html"
|
||||||
out = [target]
|
out = [target]
|
||||||
for x in (legacy, no_hash):
|
# Fall back to every (base, hash) combination we might find on disk.
|
||||||
|
hashes = [run_args_hash(ea, gk), _legacy_hash(ea)]
|
||||||
|
for b in bases:
|
||||||
|
for h in hashes:
|
||||||
|
x = f"{b}_{h}.html"
|
||||||
if x not in out:
|
if x not in out:
|
||||||
out.append(x)
|
out.append(x)
|
||||||
|
no_hash = f"{b}.html"
|
||||||
|
if no_hash not in out:
|
||||||
|
out.append(no_hash)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
@ -125,13 +140,13 @@ def main() -> int:
|
|||||||
params = r.get("parameters") or {}
|
params = r.get("parameters") or {}
|
||||||
ea = params.get("embed_args") or {}
|
ea = params.get("embed_args") or {}
|
||||||
gk = params.get("generator_kwargs") or {}
|
gk = params.get("generator_kwargs") or {}
|
||||||
base = _base_stem(params)
|
bases = _base_stems(params)
|
||||||
if not base:
|
if not bases:
|
||||||
continue
|
continue
|
||||||
target = f"{base}_{run_args_hash(ea, gk)}.html"
|
target = f"{bases[0]}_{run_args_hash(ea, gk)}.html"
|
||||||
if target in seen_targets:
|
if target in seen_targets:
|
||||||
continue # later duplicate — the stale-marking logic will handle it
|
continue # later duplicate — the stale-marking logic will handle it
|
||||||
for candidate in _candidate_names(base, ea, gk):
|
for candidate in _candidate_names(bases, ea, gk):
|
||||||
if (figs_dir / candidate).exists():
|
if (figs_dir / candidate).exists():
|
||||||
if candidate == target:
|
if candidate == target:
|
||||||
# Already at target; just ensure metrics.json carries gk.
|
# Already at target; just ensure metrics.json carries gk.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user