normalization for relative jitter

This commit is contained in:
Michael Pilosov 2026-04-21 18:22:51 -06:00
parent 058db256a3
commit 7a6e92b31c

View File

@ -17,6 +17,7 @@ from prefect.cache_policies import INPUTS, NO_CACHE
from prefect_ray import RayTaskRunner from prefect_ray import RayTaskRunner
import pandas as pd import pandas as pd
from sklearn.preprocessing import StandardScaler
import embedding_utils as E import embedding_utils as E
from joblib import cpu_count from joblib import cpu_count
@ -44,6 +45,10 @@ def generate_initial_frame_task(
generator_func = E.dynamic_import(generator_path) generator_func = E.dynamic_import(generator_path)
data, labels = generator_func(**generator_kwargs) data, labels = generator_func(**generator_kwargs)
# Per-feature z-score so jitter_scale has consistent meaning across
# generators and reducers see comparably-scaled inputs.
data = StandardScaler().fit_transform(data)
df = pd.DataFrame( df = pd.DataFrame(
{ {
"feature_0": data[:, 0], "feature_0": data[:, 0],