rename snapshots -> timesteps

2026-04-21 19:55:01 -06:00 · 2026-04-21 19:55:01 -06:00 · 92069a3c91
commit 92069a3c91
parent afc9b5b2f2
6 changed files with 35 additions and 32 deletions
--- a/app/web/main.py
+++ b/app/web/main.py
@ -361,14 +361,14 @@ def synthesize_output_paths(
    generator_path: str,
    embedder: str,
    num_points: int,
-    num_snapshots: int,
+    num_timesteps: int,
    jitter_scale: float,
    seed: int,
 ) -> Tuple[str, str]:
    gen = generator_path.split(".")[-1]
    emb = embedder.split(".")[-1]
-    ref = f"{gen}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
-    embf = f"{gen}_{emb}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
+    ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
+    embf = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
    return ref, embf


@ -473,7 +473,10 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]:
                params.get("generator_path", "sklearn.datasets.make_s_curve"),
                params.get("embedder", "sklearn.decomposition.FactorAnalysis"),
                int(params.get("num_points", 5000)),
-                int(params.get("num_snapshots", 48)),
+                # Fallback to the old num_snapshots key for runs dispatched
+                # before the T-rename, so historical figs still resolve after
+                # `rename 's/_S/_T/' figs/*.html`.
+                int(params.get("num_timesteps", params.get("num_snapshots", 48))),
                float(params.get("jitter_scale", 0.01)),
                int(params.get("seed", 42)),
            )
@ -594,7 +597,7 @@ async def submit(request: Request) -> HTMLResponse:

    try:
        num_points = int(data.get("num_points", "5000") or 5000)
-        num_snapshots = int(data.get("num_snapshots", "48") or 48)
+        num_timesteps = int(data.get("num_timesteps", "48") or 48)
        jitter_scale = float(data.get("jitter_scale", "0.01") or 0.01)
        seed = int(data.get("seed", "42") or 42)
    except ValueError as e:
@ -606,7 +609,7 @@ async def submit(request: Request) -> HTMLResponse:

    parameters: Dict[str, Any] = {
        "num_points": num_points,
-        "num_snapshots": num_snapshots,
+        "num_timesteps": num_timesteps,
        "jitter_scale": jitter_scale,
        "seed": seed,
        "generator_path": generator_path,
@ -633,7 +636,7 @@ async def submit(request: Request) -> HTMLResponse:
        )

    ref_file, emb_file = synthesize_output_paths(
-        generator_path, reducer, num_points, num_snapshots, jitter_scale, seed
+        generator_path, reducer, num_points, num_timesteps, jitter_scale, seed
    )
    RUN_OUTPUTS[run["id"]] = {"ref": ref_file, "embed": emb_file}

--- a/app/web/static/dataset-picker.js
+++ b/app/web/static/dataset-picker.js
@ -172,7 +172,7 @@ async function main() {
  const hidden = {
    datasetId: document.getElementById('dataset_id'),
    numPoints: document.getElementById('num_points'),
-    numSnapshots: document.getElementById('num_snapshots'),
+    numTimesteps: document.getElementById('num_timesteps'),
    jitterScale: document.getElementById('jitter_scale'),
  };

@ -260,12 +260,12 @@ async function main() {
  });
  applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value));

-  // n frames: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
+  // timesteps: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
  // so toggling changes cycle length without rerolling. cycleStartMs is
  // shared so all cards animate in lockstep.
  const fInputs = document.querySelectorAll('input[name="f"]');
  function applyF(n) {
-    hidden.numSnapshots.value = String(n);
+    hidden.numTimesteps.value = String(n);
    const start = performance.now();
    for (const s of scenes) {
      s.numFrames = n;
--- a/app/web/templates/_runs.html
+++ b/app/web/templates/_runs.html
@ -25,7 +25,7 @@
          {% if r.params %}
          <div class="paramline">
            <span><span class="k">N</span>&nbsp;{{ r.params.get('num_points', '?') }}</span>
-            <span><span class="k">S</span>&nbsp;{{ r.params.get('num_snapshots', '?') }}</span>
+            <span><span class="k">T</span>&nbsp;{{ r.params.get('num_timesteps', '?') }}</span>
            <span><span class="k">J</span>&nbsp;{{ r.params.get('jitter_scale', '?') }}</span>
            <span><span class="k">s</span>&nbsp;{{ r.params.get('seed', '?') }}</span>
            {% set ea = r.params.get('embed_args') or {} %}
--- a/app/web/templates/index.html
+++ b/app/web/templates/index.html
@ -60,8 +60,8 @@
        <label><input type="radio" name="j" value="0.01"><span>0.010</span></label>
      </div>

-      <span class="ctl-label">n frames</span>
-      <div class="segmented" role="radiogroup" aria-label="number of frames">
+      <span class="ctl-label">timesteps</span>
+      <div class="segmented" role="radiogroup" aria-label="number of timesteps">
        <label><input type="radio" name="f" value="12"><span>12</span></label>
        <label><input type="radio" name="f" value="24" checked><span>24</span></label>
        <label><input type="radio" name="f" value="48"><span>48</span></label>
@ -98,7 +98,7 @@
      <!-- Picker-driven hidden fields. Values are written by dataset-picker.js. -->
      <input type="hidden" name="dataset_id" id="dataset_id" value="" />
      <input type="hidden" name="num_points" id="num_points" value="500" />
-      <input type="hidden" name="num_snapshots" id="num_snapshots" value="24" />
+      <input type="hidden" name="num_timesteps" id="num_timesteps" value="24" />
      <input type="hidden" name="jitter_scale" id="jitter_scale" value="0.005" />
      <input type="hidden" name="seed" id="seed" value="42" />

--- a/flows/embedding_flow.py
+++ b/flows/embedding_flow.py
@ -64,9 +64,9 @@ def generate_initial_frame_task(

@task(cache_policy=INPUTS, cache_expiration=timedelta(hours=12))
 def generate_snapshots_task(
-    initial_df: pd.DataFrame, num_snapshots: int, jitter_scale: float, seed: int = 42
+    initial_df: pd.DataFrame, num_timesteps: int, jitter_scale: float, seed: int = 42
 ) -> List[pd.DataFrame]:
-    return E.generate_jittered_snapshots(initial_df, num_snapshots, jitter_scale, seed)
+    return E.generate_jittered_snapshots(initial_df, num_timesteps, jitter_scale, seed)


@task(
@ -138,7 +138,7 @@ _DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}))
 def embedding_flow(
    num_points: int = 5000,
-    num_snapshots: int = 48,
+    num_timesteps: int = 48,
    jitter_scale: float = 0.01,
    seed: int = 42,
    generator_path: str = "sklearn.datasets.make_s_curve",
@ -166,10 +166,10 @@ def embedding_flow(
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    _generator = generator_path.split(".")[-1]
    output_ref: str = (
-        f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
+        f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
    )
    output_embed: str = (
-        f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
+        f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
    )
    title_ref = f"Reference: {_generator}, N={num_points} with {jitter_scale} noise"
    title_embed = f"Embedding: {embedder.split('.')[-1]} on {_generator}, N={num_points} with {jitter_scale} noise"
@ -186,27 +186,27 @@ def embedding_flow(
    # Generate snapshots
    snapshots = generate_snapshots_task.submit(
        initial_df=initial_frame.result(),
-        num_snapshots=num_snapshots,
+        num_timesteps=num_timesteps,
        jitter_scale=jitter_scale,
        seed=seed,
    )
    snapshot_list = snapshots.result()

-    # Generate corresponding dates (assuming daily snapshots for simplicity)
+    # One date per timestep (monthly, starting at 2000-01-01 for cosmetic reasons)
    dates = [
        f"{year}-{month:02d}-01"
-        for year in range(2000, 2001 + math.floor(num_snapshots / 12))
+        for year in range(2000, 2001 + math.floor(num_timesteps / 12))
        for month in range(1, 13)
-    ][:num_snapshots]
+    ][:num_timesteps]

    # Apply embeddings in parallel using Prefect's mapping
    embeddings = create_embedding.map(
        snapshot=snapshot_list,
        time_idx=dates,
-        embed_columns=[embed_columns] * num_snapshots,
-        embedder=[embedder] * num_snapshots,
-        embed_args=[merged_embed_args] * num_snapshots,
-        id_column=[id_column] * num_snapshots,
+        embed_columns=[embed_columns] * num_timesteps,
+        embedder=[embedder] * num_timesteps,
+        embed_args=[merged_embed_args] * num_timesteps,
+        id_column=[id_column] * num_timesteps,
    )

    # Collect all embeddings
--- a/flows/embedding_utils.py
+++ b/flows/embedding_utils.py
@ -425,18 +425,18 @@ def generate_initial_frame(

 def generate_jittered_snapshots(
    initial_df: pd.DataFrame,
-    num_snapshots: int,
+    num_timesteps: int,
    jitter_scale: float = 0.1,
    seed: int = 42,
 ) -> List[pd.DataFrame]:
    """
-    Generate snapshots by applying random jitter to the initial frame and randomly adding/removing points.
+    Generate one jittered snapshot per timestep, with random point add/remove.

    Parameters:
    - initial_df: pd.DataFrame
        The initial DataFrame to apply jitter.
-    - num_snapshots: int
-        Number of snapshots to generate.
+    - num_timesteps: int
+        Number of timesteps (one snapshot produced per timestep).
    - jitter_scale: float
        Standard deviation of the Gaussian noise added for jitter.
    - seed: int
@ -450,7 +450,7 @@ def generate_jittered_snapshots(
    snapshots = []
    current_df = initial_df.copy()

-    for i in range(num_snapshots):
+    for i in range(num_timesteps):
        # Apply jitter (set to 0 for testing)
        jitter = np.random.normal(
            loc=0.0,