rename snapshots -> timesteps
This commit is contained in:
parent
afc9b5b2f2
commit
92069a3c91
@ -361,14 +361,14 @@ def synthesize_output_paths(
|
||||
generator_path: str,
|
||||
embedder: str,
|
||||
num_points: int,
|
||||
num_snapshots: int,
|
||||
num_timesteps: int,
|
||||
jitter_scale: float,
|
||||
seed: int,
|
||||
) -> Tuple[str, str]:
|
||||
gen = generator_path.split(".")[-1]
|
||||
emb = embedder.split(".")[-1]
|
||||
ref = f"{gen}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
embf = f"{gen}_{emb}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
embf = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
return ref, embf
|
||||
|
||||
|
||||
@ -473,7 +473,10 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]:
|
||||
params.get("generator_path", "sklearn.datasets.make_s_curve"),
|
||||
params.get("embedder", "sklearn.decomposition.FactorAnalysis"),
|
||||
int(params.get("num_points", 5000)),
|
||||
int(params.get("num_snapshots", 48)),
|
||||
# Fallback to the old num_snapshots key for runs dispatched
|
||||
# before the T-rename, so historical figs still resolve after
|
||||
# `rename 's/_S/_T/' figs/*.html`.
|
||||
int(params.get("num_timesteps", params.get("num_snapshots", 48))),
|
||||
float(params.get("jitter_scale", 0.01)),
|
||||
int(params.get("seed", 42)),
|
||||
)
|
||||
@ -594,7 +597,7 @@ async def submit(request: Request) -> HTMLResponse:
|
||||
|
||||
try:
|
||||
num_points = int(data.get("num_points", "5000") or 5000)
|
||||
num_snapshots = int(data.get("num_snapshots", "48") or 48)
|
||||
num_timesteps = int(data.get("num_timesteps", "48") or 48)
|
||||
jitter_scale = float(data.get("jitter_scale", "0.01") or 0.01)
|
||||
seed = int(data.get("seed", "42") or 42)
|
||||
except ValueError as e:
|
||||
@ -606,7 +609,7 @@ async def submit(request: Request) -> HTMLResponse:
|
||||
|
||||
parameters: Dict[str, Any] = {
|
||||
"num_points": num_points,
|
||||
"num_snapshots": num_snapshots,
|
||||
"num_timesteps": num_timesteps,
|
||||
"jitter_scale": jitter_scale,
|
||||
"seed": seed,
|
||||
"generator_path": generator_path,
|
||||
@ -633,7 +636,7 @@ async def submit(request: Request) -> HTMLResponse:
|
||||
)
|
||||
|
||||
ref_file, emb_file = synthesize_output_paths(
|
||||
generator_path, reducer, num_points, num_snapshots, jitter_scale, seed
|
||||
generator_path, reducer, num_points, num_timesteps, jitter_scale, seed
|
||||
)
|
||||
RUN_OUTPUTS[run["id"]] = {"ref": ref_file, "embed": emb_file}
|
||||
|
||||
|
||||
@ -172,7 +172,7 @@ async function main() {
|
||||
const hidden = {
|
||||
datasetId: document.getElementById('dataset_id'),
|
||||
numPoints: document.getElementById('num_points'),
|
||||
numSnapshots: document.getElementById('num_snapshots'),
|
||||
numTimesteps: document.getElementById('num_timesteps'),
|
||||
jitterScale: document.getElementById('jitter_scale'),
|
||||
};
|
||||
|
||||
@ -260,12 +260,12 @@ async function main() {
|
||||
});
|
||||
applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value));
|
||||
|
||||
// n frames: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
|
||||
// timesteps: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
|
||||
// so toggling changes cycle length without rerolling. cycleStartMs is
|
||||
// shared so all cards animate in lockstep.
|
||||
const fInputs = document.querySelectorAll('input[name="f"]');
|
||||
function applyF(n) {
|
||||
hidden.numSnapshots.value = String(n);
|
||||
hidden.numTimesteps.value = String(n);
|
||||
const start = performance.now();
|
||||
for (const s of scenes) {
|
||||
s.numFrames = n;
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
{% if r.params %}
|
||||
<div class="paramline">
|
||||
<span><span class="k">N</span> {{ r.params.get('num_points', '?') }}</span>
|
||||
<span><span class="k">S</span> {{ r.params.get('num_snapshots', '?') }}</span>
|
||||
<span><span class="k">T</span> {{ r.params.get('num_timesteps', '?') }}</span>
|
||||
<span><span class="k">J</span> {{ r.params.get('jitter_scale', '?') }}</span>
|
||||
<span><span class="k">s</span> {{ r.params.get('seed', '?') }}</span>
|
||||
{% set ea = r.params.get('embed_args') or {} %}
|
||||
|
||||
@ -60,8 +60,8 @@
|
||||
<label><input type="radio" name="j" value="0.01"><span>0.010</span></label>
|
||||
</div>
|
||||
|
||||
<span class="ctl-label">n frames</span>
|
||||
<div class="segmented" role="radiogroup" aria-label="number of frames">
|
||||
<span class="ctl-label">timesteps</span>
|
||||
<div class="segmented" role="radiogroup" aria-label="number of timesteps">
|
||||
<label><input type="radio" name="f" value="12"><span>12</span></label>
|
||||
<label><input type="radio" name="f" value="24" checked><span>24</span></label>
|
||||
<label><input type="radio" name="f" value="48"><span>48</span></label>
|
||||
@ -98,7 +98,7 @@
|
||||
<!-- Picker-driven hidden fields. Values are written by dataset-picker.js. -->
|
||||
<input type="hidden" name="dataset_id" id="dataset_id" value="" />
|
||||
<input type="hidden" name="num_points" id="num_points" value="500" />
|
||||
<input type="hidden" name="num_snapshots" id="num_snapshots" value="24" />
|
||||
<input type="hidden" name="num_timesteps" id="num_timesteps" value="24" />
|
||||
<input type="hidden" name="jitter_scale" id="jitter_scale" value="0.005" />
|
||||
<input type="hidden" name="seed" id="seed" value="42" />
|
||||
|
||||
|
||||
@ -64,9 +64,9 @@ def generate_initial_frame_task(
|
||||
|
||||
@task(cache_policy=INPUTS, cache_expiration=timedelta(hours=12))
|
||||
def generate_snapshots_task(
|
||||
initial_df: pd.DataFrame, num_snapshots: int, jitter_scale: float, seed: int = 42
|
||||
initial_df: pd.DataFrame, num_timesteps: int, jitter_scale: float, seed: int = 42
|
||||
) -> List[pd.DataFrame]:
|
||||
return E.generate_jittered_snapshots(initial_df, num_snapshots, jitter_scale, seed)
|
||||
return E.generate_jittered_snapshots(initial_df, num_timesteps, jitter_scale, seed)
|
||||
|
||||
|
||||
@task(
|
||||
@ -138,7 +138,7 @@ _DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
|
||||
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}))
|
||||
def embedding_flow(
|
||||
num_points: int = 5000,
|
||||
num_snapshots: int = 48,
|
||||
num_timesteps: int = 48,
|
||||
jitter_scale: float = 0.01,
|
||||
seed: int = 42,
|
||||
generator_path: str = "sklearn.datasets.make_s_curve",
|
||||
@ -166,10 +166,10 @@ def embedding_flow(
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
_generator = generator_path.split(".")[-1]
|
||||
output_ref: str = (
|
||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
)
|
||||
output_embed: str = (
|
||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
)
|
||||
title_ref = f"Reference: {_generator}, N={num_points} with {jitter_scale} noise"
|
||||
title_embed = f"Embedding: {embedder.split('.')[-1]} on {_generator}, N={num_points} with {jitter_scale} noise"
|
||||
@ -186,27 +186,27 @@ def embedding_flow(
|
||||
# Generate snapshots
|
||||
snapshots = generate_snapshots_task.submit(
|
||||
initial_df=initial_frame.result(),
|
||||
num_snapshots=num_snapshots,
|
||||
num_timesteps=num_timesteps,
|
||||
jitter_scale=jitter_scale,
|
||||
seed=seed,
|
||||
)
|
||||
snapshot_list = snapshots.result()
|
||||
|
||||
# Generate corresponding dates (assuming daily snapshots for simplicity)
|
||||
# One date per timestep (monthly, starting at 2000-01-01 for cosmetic reasons)
|
||||
dates = [
|
||||
f"{year}-{month:02d}-01"
|
||||
for year in range(2000, 2001 + math.floor(num_snapshots / 12))
|
||||
for year in range(2000, 2001 + math.floor(num_timesteps / 12))
|
||||
for month in range(1, 13)
|
||||
][:num_snapshots]
|
||||
][:num_timesteps]
|
||||
|
||||
# Apply embeddings in parallel using Prefect's mapping
|
||||
embeddings = create_embedding.map(
|
||||
snapshot=snapshot_list,
|
||||
time_idx=dates,
|
||||
embed_columns=[embed_columns] * num_snapshots,
|
||||
embedder=[embedder] * num_snapshots,
|
||||
embed_args=[merged_embed_args] * num_snapshots,
|
||||
id_column=[id_column] * num_snapshots,
|
||||
embed_columns=[embed_columns] * num_timesteps,
|
||||
embedder=[embedder] * num_timesteps,
|
||||
embed_args=[merged_embed_args] * num_timesteps,
|
||||
id_column=[id_column] * num_timesteps,
|
||||
)
|
||||
|
||||
# Collect all embeddings
|
||||
|
||||
@ -425,18 +425,18 @@ def generate_initial_frame(
|
||||
|
||||
def generate_jittered_snapshots(
|
||||
initial_df: pd.DataFrame,
|
||||
num_snapshots: int,
|
||||
num_timesteps: int,
|
||||
jitter_scale: float = 0.1,
|
||||
seed: int = 42,
|
||||
) -> List[pd.DataFrame]:
|
||||
"""
|
||||
Generate snapshots by applying random jitter to the initial frame and randomly adding/removing points.
|
||||
Generate one jittered snapshot per timestep, with random point add/remove.
|
||||
|
||||
Parameters:
|
||||
- initial_df: pd.DataFrame
|
||||
The initial DataFrame to apply jitter.
|
||||
- num_snapshots: int
|
||||
Number of snapshots to generate.
|
||||
- num_timesteps: int
|
||||
Number of timesteps (one snapshot produced per timestep).
|
||||
- jitter_scale: float
|
||||
Standard deviation of the Gaussian noise added for jitter.
|
||||
- seed: int
|
||||
@ -450,7 +450,7 @@ def generate_jittered_snapshots(
|
||||
snapshots = []
|
||||
current_df = initial_df.copy()
|
||||
|
||||
for i in range(num_snapshots):
|
||||
for i in range(num_timesteps):
|
||||
# Apply jitter (set to 0 for testing)
|
||||
jitter = np.random.normal(
|
||||
loc=0.0,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user