rename snapshots -> timesteps

This commit is contained in:
Michael Pilosov 2026-04-21 19:55:01 -06:00
parent afc9b5b2f2
commit 92069a3c91
6 changed files with 35 additions and 32 deletions

View File

@ -361,14 +361,14 @@ def synthesize_output_paths(
generator_path: str,
embedder: str,
num_points: int,
num_snapshots: int,
num_timesteps: int,
jitter_scale: float,
seed: int,
) -> Tuple[str, str]:
gen = generator_path.split(".")[-1]
emb = embedder.split(".")[-1]
ref = f"{gen}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
embf = f"{gen}_{emb}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
embf = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
return ref, embf
@ -473,7 +473,10 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]:
params.get("generator_path", "sklearn.datasets.make_s_curve"),
params.get("embedder", "sklearn.decomposition.FactorAnalysis"),
int(params.get("num_points", 5000)),
int(params.get("num_snapshots", 48)),
# Fallback to the old num_snapshots key for runs dispatched
# before the T-rename, so historical figs still resolve after
# `rename 's/_S/_T/' figs/*.html`.
int(params.get("num_timesteps", params.get("num_snapshots", 48))),
float(params.get("jitter_scale", 0.01)),
int(params.get("seed", 42)),
)
@ -594,7 +597,7 @@ async def submit(request: Request) -> HTMLResponse:
try:
num_points = int(data.get("num_points", "5000") or 5000)
num_snapshots = int(data.get("num_snapshots", "48") or 48)
num_timesteps = int(data.get("num_timesteps", "48") or 48)
jitter_scale = float(data.get("jitter_scale", "0.01") or 0.01)
seed = int(data.get("seed", "42") or 42)
except ValueError as e:
@ -606,7 +609,7 @@ async def submit(request: Request) -> HTMLResponse:
parameters: Dict[str, Any] = {
"num_points": num_points,
"num_snapshots": num_snapshots,
"num_timesteps": num_timesteps,
"jitter_scale": jitter_scale,
"seed": seed,
"generator_path": generator_path,
@ -633,7 +636,7 @@ async def submit(request: Request) -> HTMLResponse:
)
ref_file, emb_file = synthesize_output_paths(
generator_path, reducer, num_points, num_snapshots, jitter_scale, seed
generator_path, reducer, num_points, num_timesteps, jitter_scale, seed
)
RUN_OUTPUTS[run["id"]] = {"ref": ref_file, "embed": emb_file}

View File

@ -172,7 +172,7 @@ async function main() {
const hidden = {
datasetId: document.getElementById('dataset_id'),
numPoints: document.getElementById('num_points'),
numSnapshots: document.getElementById('num_snapshots'),
numTimesteps: document.getElementById('num_timesteps'),
jitterScale: document.getElementById('jitter_scale'),
};
@ -260,12 +260,12 @@ async function main() {
});
applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value));
// n frames: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
// timesteps: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
// so toggling changes cycle length without rerolling. cycleStartMs is
// shared so all cards animate in lockstep.
const fInputs = document.querySelectorAll('input[name="f"]');
function applyF(n) {
hidden.numSnapshots.value = String(n);
hidden.numTimesteps.value = String(n);
const start = performance.now();
for (const s of scenes) {
s.numFrames = n;

View File

@ -25,7 +25,7 @@
{% if r.params %}
<div class="paramline">
<span><span class="k">N</span>&nbsp;{{ r.params.get('num_points', '?') }}</span>
<span><span class="k">S</span>&nbsp;{{ r.params.get('num_snapshots', '?') }}</span>
<span><span class="k">T</span>&nbsp;{{ r.params.get('num_timesteps', '?') }}</span>
<span><span class="k">J</span>&nbsp;{{ r.params.get('jitter_scale', '?') }}</span>
<span><span class="k">s</span>&nbsp;{{ r.params.get('seed', '?') }}</span>
{% set ea = r.params.get('embed_args') or {} %}

View File

@ -60,8 +60,8 @@
<label><input type="radio" name="j" value="0.01"><span>0.010</span></label>
</div>
<span class="ctl-label">n frames</span>
<div class="segmented" role="radiogroup" aria-label="number of frames">
<span class="ctl-label">timesteps</span>
<div class="segmented" role="radiogroup" aria-label="number of timesteps">
<label><input type="radio" name="f" value="12"><span>12</span></label>
<label><input type="radio" name="f" value="24" checked><span>24</span></label>
<label><input type="radio" name="f" value="48"><span>48</span></label>
@ -98,7 +98,7 @@
<!-- Picker-driven hidden fields. Values are written by dataset-picker.js. -->
<input type="hidden" name="dataset_id" id="dataset_id" value="" />
<input type="hidden" name="num_points" id="num_points" value="500" />
<input type="hidden" name="num_snapshots" id="num_snapshots" value="24" />
<input type="hidden" name="num_timesteps" id="num_timesteps" value="24" />
<input type="hidden" name="jitter_scale" id="jitter_scale" value="0.005" />
<input type="hidden" name="seed" id="seed" value="42" />

View File

@ -64,9 +64,9 @@ def generate_initial_frame_task(
@task(cache_policy=INPUTS, cache_expiration=timedelta(hours=12))
def generate_snapshots_task(
initial_df: pd.DataFrame, num_snapshots: int, jitter_scale: float, seed: int = 42
initial_df: pd.DataFrame, num_timesteps: int, jitter_scale: float, seed: int = 42
) -> List[pd.DataFrame]:
return E.generate_jittered_snapshots(initial_df, num_snapshots, jitter_scale, seed)
return E.generate_jittered_snapshots(initial_df, num_timesteps, jitter_scale, seed)
@task(
@ -138,7 +138,7 @@ _DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}))
def embedding_flow(
num_points: int = 5000,
num_snapshots: int = 48,
num_timesteps: int = 48,
jitter_scale: float = 0.01,
seed: int = 42,
generator_path: str = "sklearn.datasets.make_s_curve",
@ -166,10 +166,10 @@ def embedding_flow(
Path(output_dir).mkdir(parents=True, exist_ok=True)
_generator = generator_path.split(".")[-1]
output_ref: str = (
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
)
output_embed: str = (
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
)
title_ref = f"Reference: {_generator}, N={num_points} with {jitter_scale} noise"
title_embed = f"Embedding: {embedder.split('.')[-1]} on {_generator}, N={num_points} with {jitter_scale} noise"
@ -186,27 +186,27 @@ def embedding_flow(
# Generate snapshots
snapshots = generate_snapshots_task.submit(
initial_df=initial_frame.result(),
num_snapshots=num_snapshots,
num_timesteps=num_timesteps,
jitter_scale=jitter_scale,
seed=seed,
)
snapshot_list = snapshots.result()
# Generate corresponding dates (assuming daily snapshots for simplicity)
# One date per timestep (monthly, starting at 2000-01-01 for cosmetic reasons)
dates = [
f"{year}-{month:02d}-01"
for year in range(2000, 2001 + math.floor(num_snapshots / 12))
for year in range(2000, 2001 + math.floor(num_timesteps / 12))
for month in range(1, 13)
][:num_snapshots]
][:num_timesteps]
# Apply embeddings in parallel using Prefect's mapping
embeddings = create_embedding.map(
snapshot=snapshot_list,
time_idx=dates,
embed_columns=[embed_columns] * num_snapshots,
embedder=[embedder] * num_snapshots,
embed_args=[merged_embed_args] * num_snapshots,
id_column=[id_column] * num_snapshots,
embed_columns=[embed_columns] * num_timesteps,
embedder=[embedder] * num_timesteps,
embed_args=[merged_embed_args] * num_timesteps,
id_column=[id_column] * num_timesteps,
)
# Collect all embeddings

View File

@ -425,18 +425,18 @@ def generate_initial_frame(
def generate_jittered_snapshots(
initial_df: pd.DataFrame,
num_snapshots: int,
num_timesteps: int,
jitter_scale: float = 0.1,
seed: int = 42,
) -> List[pd.DataFrame]:
"""
Generate snapshots by applying random jitter to the initial frame and randomly adding/removing points.
Generate one jittered snapshot per timestep, with random point add/remove.
Parameters:
- initial_df: pd.DataFrame
The initial DataFrame to apply jitter.
- num_snapshots: int
Number of snapshots to generate.
- num_timesteps: int
Number of timesteps (one snapshot produced per timestep).
- jitter_scale: float
Standard deviation of the Gaussian noise added for jitter.
- seed: int
@ -450,7 +450,7 @@ def generate_jittered_snapshots(
snapshots = []
current_df = initial_df.copy()
for i in range(num_snapshots):
for i in range(num_timesteps):
# Apply jitter (set to 0 for testing)
jitter = np.random.normal(
loc=0.0,