From d052ec422326108bc7a637f46cd11b9f8a0022c4 Mon Sep 17 00:00:00 2001 From: Michael Pilosov Date: Wed, 22 Apr 2026 17:09:26 -0600 Subject: [PATCH] labels: strip transient n_samples/random_state from generator_kwargs before regen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sidecars written by the pre-fix flow contain merged generator_kwargs (n_samples + random_state=0 mixed in with the user-supplied form). The enrichment call passes n_samples/random_state explicitly, so an old sidecar's gk caused a TypeError (duplicate kwarg) that the try/except swallowed — leaving labels empty and coloring falling back to a plain ramp. Strip those keys before DATASET_META matching and the regen call; matches work naturally against the stripped dict. --- app/web/main.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/app/web/main.py b/app/web/main.py index c6e3c33..391b32b 100644 --- a/app/web/main.py +++ b/app/web/main.py @@ -914,6 +914,18 @@ for _m in DATASET_META.values(): _GEN_TO_META.setdefault(_m["path"].rsplit(".", 1)[-1], _m) +# Kwargs the flow injects / we supply explicitly — never part of the +# dataset's semantic identity, so strip them before DATASET_META matching +# and before regenerating labels. +_TRANSIENT_GEN_KWARGS = {"n_samples", "random_state"} + + +def _clean_gen_kwargs(gk: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + if gk is None: + return None + return {k: v for k, v in gk.items() if k not in _TRANSIENT_GEN_KWARGS} + + def _lookup_dataset_meta( generator_short: str, generator_kwargs: Optional[Dict[str, Any]] ) -> Optional[Dict[str, Any]]: @@ -926,9 +938,10 @@ def _lookup_dataset_meta( ] if not candidates: return None - if generator_kwargs is not None: + gk = _clean_gen_kwargs(generator_kwargs) + if gk is not None: for m in candidates: - if m["kwargs"] == generator_kwargs: + if m["kwargs"] == gk: return m return candidates[0] @@ -963,7 +976,7 @@ def _enrich_with_labels(d: Dict[str, Any]) -> Dict[str, Any]: dm = _lookup_dataset_meta(gen_short, gk) if not dm: return d - kwargs_to_use = gk if gk is not None else dm["kwargs"] + kwargs_to_use = _clean_gen_kwargs(gk) if gk is not None else dm["kwargs"] try: mod_path, cls_name = dm["path"].rsplit(".", 1) fn = getattr(importlib.import_module(mod_path), cls_name)