diff --git a/app/web/main.py b/app/web/main.py index c6e3c33..391b32b 100644 --- a/app/web/main.py +++ b/app/web/main.py @@ -914,6 +914,18 @@ for _m in DATASET_META.values(): _GEN_TO_META.setdefault(_m["path"].rsplit(".", 1)[-1], _m) +# Kwargs the flow injects / we supply explicitly — never part of the +# dataset's semantic identity, so strip them before DATASET_META matching +# and before regenerating labels. +_TRANSIENT_GEN_KWARGS = {"n_samples", "random_state"} + + +def _clean_gen_kwargs(gk: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + if gk is None: + return None + return {k: v for k, v in gk.items() if k not in _TRANSIENT_GEN_KWARGS} + + def _lookup_dataset_meta( generator_short: str, generator_kwargs: Optional[Dict[str, Any]] ) -> Optional[Dict[str, Any]]: @@ -926,9 +938,10 @@ def _lookup_dataset_meta( ] if not candidates: return None - if generator_kwargs is not None: + gk = _clean_gen_kwargs(generator_kwargs) + if gk is not None: for m in candidates: - if m["kwargs"] == generator_kwargs: + if m["kwargs"] == gk: return m return candidates[0] @@ -963,7 +976,7 @@ def _enrich_with_labels(d: Dict[str, Any]) -> Dict[str, Any]: dm = _lookup_dataset_meta(gen_short, gk) if not dm: return d - kwargs_to_use = gk if gk is not None else dm["kwargs"] + kwargs_to_use = _clean_gen_kwargs(gk) if gk is not None else dm["kwargs"] try: mod_path, cls_name = dm["path"].rsplit(".", 1) fn = getattr(importlib.import_module(mod_path), cls_name)