labels: distinguish swiss_roll vs swiss_roll_hole in the UI

- _run_view uses a new _dataset_id(path, kwargs) that matches DATASET_META
  by (path, cleaned kwargs) and returns the catalogue key — so the runs
  list / filter chips show 'swiss_roll' vs 'swiss_roll_hole' rather than
  collapsing both to 'make_swiss_roll'.
- _enrich_with_labels replaces the stem-derived meta.generator with the
  matched DATASET_META key, so the compare panel header + diff-highlight
  also distinguish the two variants.
This commit is contained in:
Michael Pilosov 2026-04-22 17:12:46 -06:00
parent d052ec4223
commit 4576088c73

View File

@ -663,7 +663,9 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]:
"ref_exists": ref_exists,
"emb_exists": emb_exists,
"embedder_short": (params.get("embedder") or "").split(".")[-1],
"generator_short": (params.get("generator_path") or "").split(".")[-1],
"generator_short": _dataset_id(
params.get("generator_path") or "", params.get("generator_kwargs") or {}
),
}
@ -926,6 +928,25 @@ def _clean_gen_kwargs(gk: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
return {k: v for k, v in gk.items() if k not in _TRANSIENT_GEN_KWARGS}
def _dataset_id(generator_path: str, generator_kwargs: Optional[Dict[str, Any]]) -> str:
"""Human-scale identifier for a run's dataset — e.g. 'swiss_roll' vs
'swiss_roll_hole' by matching (path, cleaned kwargs) against
DATASET_META. Falls back to the path short-name when no match."""
gen_short = (generator_path or "").rsplit(".", 1)[-1]
gk = _clean_gen_kwargs(generator_kwargs)
candidates = [
(k, m) for k, m in DATASET_META.items()
if m["path"].rsplit(".", 1)[-1] == gen_short
]
if not candidates:
return gen_short
if gk is not None:
for k, m in candidates:
if m["kwargs"] == gk:
return k
return candidates[0][0]
def _lookup_dataset_meta(
generator_short: str, generator_kwargs: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
@ -976,6 +997,12 @@ def _enrich_with_labels(d: Dict[str, Any]) -> Dict[str, Any]:
dm = _lookup_dataset_meta(gen_short, gk)
if not dm:
return d
# Replace the stem-derived generator short (ambiguous for swiss_roll vs
# hole) with the matched DATASET_META id for the panel header.
for key, entry in DATASET_META.items():
if entry is dm:
d["meta"]["generator"] = key
break
kwargs_to_use = _clean_gen_kwargs(gk) if gk is not None else dm["kwargs"]
try:
mod_path, cls_name = dm["path"].rsplit(".", 1)