diff --git a/app/web/main.py b/app/web/main.py index 6cae799..7b82b5e 100644 --- a/app/web/main.py +++ b/app/web/main.py @@ -361,14 +361,14 @@ def synthesize_output_paths( generator_path: str, embedder: str, num_points: int, - num_snapshots: int, + num_timesteps: int, jitter_scale: float, seed: int, ) -> Tuple[str, str]: gen = generator_path.split(".")[-1] emb = embedder.split(".")[-1] - ref = f"{gen}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html" - embf = f"{gen}_{emb}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html" + ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html" + embf = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html" return ref, embf @@ -473,7 +473,10 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]: params.get("generator_path", "sklearn.datasets.make_s_curve"), params.get("embedder", "sklearn.decomposition.FactorAnalysis"), int(params.get("num_points", 5000)), - int(params.get("num_snapshots", 48)), + # Fallback to the old num_snapshots key for runs dispatched + # before the T-rename, so historical figs still resolve after + # `rename 's/_S/_T/' figs/*.html`. + int(params.get("num_timesteps", params.get("num_snapshots", 48))), float(params.get("jitter_scale", 0.01)), int(params.get("seed", 42)), ) @@ -594,7 +597,7 @@ async def submit(request: Request) -> HTMLResponse: try: num_points = int(data.get("num_points", "5000") or 5000) - num_snapshots = int(data.get("num_snapshots", "48") or 48) + num_timesteps = int(data.get("num_timesteps", "48") or 48) jitter_scale = float(data.get("jitter_scale", "0.01") or 0.01) seed = int(data.get("seed", "42") or 42) except ValueError as e: @@ -606,7 +609,7 @@ async def submit(request: Request) -> HTMLResponse: parameters: Dict[str, Any] = { "num_points": num_points, - "num_snapshots": num_snapshots, + "num_timesteps": num_timesteps, "jitter_scale": jitter_scale, "seed": seed, "generator_path": generator_path, @@ -633,7 +636,7 @@ async def submit(request: Request) -> HTMLResponse: ) ref_file, emb_file = synthesize_output_paths( - generator_path, reducer, num_points, num_snapshots, jitter_scale, seed + generator_path, reducer, num_points, num_timesteps, jitter_scale, seed ) RUN_OUTPUTS[run["id"]] = {"ref": ref_file, "embed": emb_file} diff --git a/app/web/static/dataset-picker.js b/app/web/static/dataset-picker.js index a0588e4..d78d645 100644 --- a/app/web/static/dataset-picker.js +++ b/app/web/static/dataset-picker.js @@ -172,7 +172,7 @@ async function main() { const hidden = { datasetId: document.getElementById('dataset_id'), numPoints: document.getElementById('num_points'), - numSnapshots: document.getElementById('num_snapshots'), + numTimesteps: document.getElementById('num_timesteps'), jitterScale: document.getElementById('jitter_scale'), }; @@ -260,12 +260,12 @@ async function main() { }); applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value)); - // n frames: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48), + // timesteps: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48), // so toggling changes cycle length without rerolling. cycleStartMs is // shared so all cards animate in lockstep. const fInputs = document.querySelectorAll('input[name="f"]'); function applyF(n) { - hidden.numSnapshots.value = String(n); + hidden.numTimesteps.value = String(n); const start = performance.now(); for (const s of scenes) { s.numFrames = n; diff --git a/app/web/templates/_runs.html b/app/web/templates/_runs.html index 673783f..993b7c8 100644 --- a/app/web/templates/_runs.html +++ b/app/web/templates/_runs.html @@ -25,7 +25,7 @@ {% if r.params %}
N {{ r.params.get('num_points', '?') }} - S {{ r.params.get('num_snapshots', '?') }} + T {{ r.params.get('num_timesteps', '?') }} J {{ r.params.get('jitter_scale', '?') }} s {{ r.params.get('seed', '?') }} {% set ea = r.params.get('embed_args') or {} %} diff --git a/app/web/templates/index.html b/app/web/templates/index.html index 0e11bb3..5872252 100644 --- a/app/web/templates/index.html +++ b/app/web/templates/index.html @@ -60,8 +60,8 @@
- n frames -
+ timesteps +
@@ -98,7 +98,7 @@ - + diff --git a/flows/embedding_flow.py b/flows/embedding_flow.py index 75f287c..0cfe553 100644 --- a/flows/embedding_flow.py +++ b/flows/embedding_flow.py @@ -64,9 +64,9 @@ def generate_initial_frame_task( @task(cache_policy=INPUTS, cache_expiration=timedelta(hours=12)) def generate_snapshots_task( - initial_df: pd.DataFrame, num_snapshots: int, jitter_scale: float, seed: int = 42 + initial_df: pd.DataFrame, num_timesteps: int, jitter_scale: float, seed: int = 42 ) -> List[pd.DataFrame]: - return E.generate_jittered_snapshots(initial_df, num_snapshots, jitter_scale, seed) + return E.generate_jittered_snapshots(initial_df, num_timesteps, jitter_scale, seed) @task( @@ -138,7 +138,7 @@ _DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30} @flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4})) def embedding_flow( num_points: int = 5000, - num_snapshots: int = 48, + num_timesteps: int = 48, jitter_scale: float = 0.01, seed: int = 42, generator_path: str = "sklearn.datasets.make_s_curve", @@ -166,10 +166,10 @@ def embedding_flow( Path(output_dir).mkdir(parents=True, exist_ok=True) _generator = generator_path.split(".")[-1] output_ref: str = ( - f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html" + f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html" ) output_embed: str = ( - f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html" + f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html" ) title_ref = f"Reference: {_generator}, N={num_points} with {jitter_scale} noise" title_embed = f"Embedding: {embedder.split('.')[-1]} on {_generator}, N={num_points} with {jitter_scale} noise" @@ -186,27 +186,27 @@ def embedding_flow( # Generate snapshots snapshots = generate_snapshots_task.submit( initial_df=initial_frame.result(), - num_snapshots=num_snapshots, + num_timesteps=num_timesteps, jitter_scale=jitter_scale, seed=seed, ) snapshot_list = snapshots.result() - # Generate corresponding dates (assuming daily snapshots for simplicity) + # One date per timestep (monthly, starting at 2000-01-01 for cosmetic reasons) dates = [ f"{year}-{month:02d}-01" - for year in range(2000, 2001 + math.floor(num_snapshots / 12)) + for year in range(2000, 2001 + math.floor(num_timesteps / 12)) for month in range(1, 13) - ][:num_snapshots] + ][:num_timesteps] # Apply embeddings in parallel using Prefect's mapping embeddings = create_embedding.map( snapshot=snapshot_list, time_idx=dates, - embed_columns=[embed_columns] * num_snapshots, - embedder=[embedder] * num_snapshots, - embed_args=[merged_embed_args] * num_snapshots, - id_column=[id_column] * num_snapshots, + embed_columns=[embed_columns] * num_timesteps, + embedder=[embedder] * num_timesteps, + embed_args=[merged_embed_args] * num_timesteps, + id_column=[id_column] * num_timesteps, ) # Collect all embeddings diff --git a/flows/embedding_utils.py b/flows/embedding_utils.py index e19b9df..b3f9888 100644 --- a/flows/embedding_utils.py +++ b/flows/embedding_utils.py @@ -425,18 +425,18 @@ def generate_initial_frame( def generate_jittered_snapshots( initial_df: pd.DataFrame, - num_snapshots: int, + num_timesteps: int, jitter_scale: float = 0.1, seed: int = 42, ) -> List[pd.DataFrame]: """ - Generate snapshots by applying random jitter to the initial frame and randomly adding/removing points. + Generate one jittered snapshot per timestep, with random point add/remove. Parameters: - initial_df: pd.DataFrame The initial DataFrame to apply jitter. - - num_snapshots: int - Number of snapshots to generate. + - num_timesteps: int + Number of timesteps (one snapshot produced per timestep). - jitter_scale: float Standard deviation of the Gaussian noise added for jitter. - seed: int @@ -450,7 +450,7 @@ def generate_jittered_snapshots( snapshots = [] current_df = initial_df.copy() - for i in range(num_snapshots): + for i in range(num_timesteps): # Apply jitter (set to 0 for testing) jitter = np.random.normal( loc=0.0,