From 7a6e92b31cb15bb07f9ddadf237af5b604146a0e Mon Sep 17 00:00:00 2001 From: Michael Pilosov Date: Tue, 21 Apr 2026 18:22:51 -0600 Subject: [PATCH] normalization for relative jitter --- flows/embedding_flow.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/flows/embedding_flow.py b/flows/embedding_flow.py index b01f5c6..75f287c 100644 --- a/flows/embedding_flow.py +++ b/flows/embedding_flow.py @@ -17,6 +17,7 @@ from prefect.cache_policies import INPUTS, NO_CACHE from prefect_ray import RayTaskRunner import pandas as pd +from sklearn.preprocessing import StandardScaler import embedding_utils as E from joblib import cpu_count @@ -44,6 +45,10 @@ def generate_initial_frame_task( generator_func = E.dynamic_import(generator_path) data, labels = generator_func(**generator_kwargs) + # Per-feature z-score so jitter_scale has consistent meaning across + # generators and reducers see comparably-scaled inputs. + data = StandardScaler().fit_transform(data) + df = pd.DataFrame( { "feature_0": data[:, 0],