labels: strip transient n_samples/random_state from generator_kwargs before regen

Sidecars written by the pre-fix flow contain merged generator_kwargs
(n_samples + random_state=0 mixed in with the user-supplied form). The
enrichment call passes n_samples/random_state explicitly, so an old
sidecar's gk caused a TypeError (duplicate kwarg) that the try/except
swallowed — leaving labels empty and coloring falling back to a plain
ramp. Strip those keys before DATASET_META matching and the regen
call; matches work naturally against the stripped dict.
This commit is contained in:
Michael Pilosov 2026-04-22 17:09:26 -06:00
parent c12d2cda6c
commit d052ec4223

View File

@ -914,6 +914,18 @@ for _m in DATASET_META.values():
_GEN_TO_META.setdefault(_m["path"].rsplit(".", 1)[-1], _m)
# Kwargs the flow injects / we supply explicitly — never part of the
# dataset's semantic identity, so strip them before DATASET_META matching
# and before regenerating labels.
_TRANSIENT_GEN_KWARGS = {"n_samples", "random_state"}
def _clean_gen_kwargs(gk: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
if gk is None:
return None
return {k: v for k, v in gk.items() if k not in _TRANSIENT_GEN_KWARGS}
def _lookup_dataset_meta(
generator_short: str, generator_kwargs: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
@ -926,9 +938,10 @@ def _lookup_dataset_meta(
]
if not candidates:
return None
if generator_kwargs is not None:
gk = _clean_gen_kwargs(generator_kwargs)
if gk is not None:
for m in candidates:
if m["kwargs"] == generator_kwargs:
if m["kwargs"] == gk:
return m
return candidates[0]
@ -963,7 +976,7 @@ def _enrich_with_labels(d: Dict[str, Any]) -> Dict[str, Any]:
dm = _lookup_dataset_meta(gen_short, gk)
if not dm:
return d
kwargs_to_use = gk if gk is not None else dm["kwargs"]
kwargs_to_use = _clean_gen_kwargs(gk) if gk is not None else dm["kwargs"]
try:
mod_path, cls_name = dm["path"].rsplit(".", 1)
fn = getattr(importlib.import_module(mod_path), cls_name)