Compare commits
4 Commits
ac511c942f
...
92069a3c91
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92069a3c91 | ||
|
|
afc9b5b2f2 | ||
|
|
ca0ad9fd2d | ||
|
|
32c1738e95 |
@ -13,6 +13,7 @@ from __future__ import annotations
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
@ -21,6 +22,13 @@ from fastapi import FastAPI, Form, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from sklearn.datasets import (
|
||||
make_blobs,
|
||||
make_classification,
|
||||
make_gaussian_quantiles,
|
||||
make_s_curve,
|
||||
make_swiss_roll,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -35,11 +43,137 @@ FIGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PREFECT_API = os.environ.get("PREFECT_API_URL", "http://localhost:4200/api")
|
||||
DEPLOYMENT_NAME = "embedding-flow/embedding-flow"
|
||||
|
||||
GENERATOR_OPTIONS = [
|
||||
("sklearn.datasets.make_s_curve", "make_s_curve"),
|
||||
("sklearn.datasets.make_swiss_roll", "make_swiss_roll"),
|
||||
("sklearn.datasets.make_blobs", "make_blobs"),
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataset catalogue
|
||||
# ---------------------------------------------------------------------------
|
||||
# Metadata for the /data.json endpoint consumed by the dataset picker, and
|
||||
# for server-side lookup when the picker posts its selection back. kwargs
|
||||
# must carry n_features=3 for generators that aren't already 3-D, since
|
||||
# they'll be forwarded verbatim to the Prefect flow's generator_kwargs.
|
||||
|
||||
DATASET_PREVIEW_N = 5000
|
||||
DATASET_PREVIEW_SEED = 0
|
||||
|
||||
DATASET_META: Dict[str, Dict[str, Any]] = {
|
||||
"s_curve": {
|
||||
"name": "S-Curve",
|
||||
"path": "sklearn.datasets.make_s_curve",
|
||||
"kwargs": {},
|
||||
"description": (
|
||||
"A 2-D manifold warped into R³. Continuous label encodes position "
|
||||
"along the curve — a good test of whether a reducer unrolls the "
|
||||
"sheet without tearing."
|
||||
),
|
||||
"kind": "continuous",
|
||||
},
|
||||
"swiss_roll": {
|
||||
"name": "Swiss Roll",
|
||||
"path": "sklearn.datasets.make_swiss_roll",
|
||||
"kwargs": {},
|
||||
"description": (
|
||||
"A rolled-up plane. The canonical hard case for linear methods: "
|
||||
"PCA collapses the spiral, non-linear methods should recover the "
|
||||
"unroll."
|
||||
),
|
||||
"kind": "continuous",
|
||||
},
|
||||
"swiss_roll_hole": {
|
||||
"name": "Swiss Roll (hole)",
|
||||
"path": "sklearn.datasets.make_swiss_roll",
|
||||
"kwargs": {"hole": True},
|
||||
"description": (
|
||||
"Swiss roll with a rectangular hole punched through. Same manifold, "
|
||||
"non-trivial topology — a faithful unroll should preserve the hole "
|
||||
"rather than smearing it closed."
|
||||
),
|
||||
"kind": "continuous",
|
||||
},
|
||||
"blobs": {
|
||||
"name": "Gaussian Blobs",
|
||||
"path": "sklearn.datasets.make_blobs",
|
||||
"kwargs": {"n_features": 3, "centers": 5, "cluster_std": 1.0},
|
||||
"description": (
|
||||
"Five isotropic Gaussian clusters in R³. Discrete class labels. "
|
||||
"Tests whether a reducer preserves cluster separation when "
|
||||
"projected to 2-D."
|
||||
),
|
||||
"kind": "categorical",
|
||||
},
|
||||
"gaussian_quantiles": {
|
||||
"name": "Gaussian Quantiles",
|
||||
"path": "sklearn.datasets.make_gaussian_quantiles",
|
||||
"kwargs": {"n_features": 3, "n_classes": 4},
|
||||
"description": (
|
||||
"Concentric Gaussian shells in R³; class = which shell. Classes "
|
||||
"are linearly inseparable by construction — PCA collapses them, "
|
||||
"kernel and manifold methods have a chance."
|
||||
),
|
||||
"kind": "categorical",
|
||||
},
|
||||
"classification": {
|
||||
"name": "Hypercube Clusters",
|
||||
"path": "sklearn.datasets.make_classification",
|
||||
"kwargs": {
|
||||
"n_features": 3,
|
||||
"n_informative": 3,
|
||||
"n_redundant": 0,
|
||||
"n_repeated": 0,
|
||||
"n_classes": 4,
|
||||
"n_clusters_per_class": 2,
|
||||
"class_sep": 1.5,
|
||||
},
|
||||
"description": (
|
||||
"Four classes, two sub-clusters each, placed at hypercube vertices "
|
||||
"with informative noise. A denser discrete test than blobs — "
|
||||
"within-class bimodality stresses cluster-preserving reducers."
|
||||
),
|
||||
"kind": "categorical",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _dataset_previews() -> Dict[str, Dict[str, Any]]:
|
||||
"""Attach freshly-generated points+labels to the catalogue for the picker."""
|
||||
N, SEED = DATASET_PREVIEW_N, DATASET_PREVIEW_SEED
|
||||
s, sl = make_s_curve(n_samples=N, noise=0.03, random_state=SEED)
|
||||
sr, srl = make_swiss_roll(n_samples=N, noise=0.15, random_state=SEED)
|
||||
srh, srhl = make_swiss_roll(n_samples=N, noise=0.15, hole=True, random_state=SEED)
|
||||
b, bl = make_blobs(
|
||||
n_samples=N, n_features=3, centers=5, cluster_std=1.0, random_state=SEED
|
||||
)
|
||||
gq, gql = make_gaussian_quantiles(
|
||||
n_samples=N, n_features=3, n_classes=4, random_state=SEED
|
||||
)
|
||||
cls, clsl = make_classification(
|
||||
n_samples=N,
|
||||
n_features=3,
|
||||
n_informative=3,
|
||||
n_redundant=0,
|
||||
n_repeated=0,
|
||||
n_classes=4,
|
||||
n_clusters_per_class=2,
|
||||
class_sep=1.5,
|
||||
random_state=SEED,
|
||||
)
|
||||
samples = {
|
||||
"s_curve": (s, sl),
|
||||
"swiss_roll": (sr, srl),
|
||||
"swiss_roll_hole": (srh, srhl),
|
||||
"blobs": (b, bl),
|
||||
"gaussian_quantiles": (gq, gql),
|
||||
"classification": (cls, clsl),
|
||||
}
|
||||
out: Dict[str, Dict[str, Any]] = {}
|
||||
for key, meta in DATASET_META.items():
|
||||
pts, labels = samples[key]
|
||||
out[key] = {
|
||||
**meta,
|
||||
"points": pts.tolist(),
|
||||
"labels": labels.tolist(),
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -227,14 +361,14 @@ def synthesize_output_paths(
|
||||
generator_path: str,
|
||||
embedder: str,
|
||||
num_points: int,
|
||||
num_snapshots: int,
|
||||
num_timesteps: int,
|
||||
jitter_scale: float,
|
||||
seed: int,
|
||||
) -> Tuple[str, str]:
|
||||
gen = generator_path.split(".")[-1]
|
||||
emb = embedder.split(".")[-1]
|
||||
ref = f"{gen}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
embf = f"{gen}_{emb}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
embf = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
return ref, embf
|
||||
|
||||
|
||||
@ -339,7 +473,10 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]:
|
||||
params.get("generator_path", "sklearn.datasets.make_s_curve"),
|
||||
params.get("embedder", "sklearn.decomposition.FactorAnalysis"),
|
||||
int(params.get("num_points", 5000)),
|
||||
int(params.get("num_snapshots", 48)),
|
||||
# Fallback to the old num_snapshots key for runs dispatched
|
||||
# before the T-rename, so historical figs still resolve after
|
||||
# `rename 's/_S/_T/' figs/*.html`.
|
||||
int(params.get("num_timesteps", params.get("num_snapshots", 48))),
|
||||
float(params.get("jitter_scale", 0.01)),
|
||||
int(params.get("seed", 42)),
|
||||
)
|
||||
@ -394,7 +531,6 @@ async def index(request: Request) -> HTMLResponse:
|
||||
"reducers": reducers,
|
||||
"default_reducer": default_reducer,
|
||||
"default_spec": default_spec,
|
||||
"generators": GENERATOR_OPTIONS,
|
||||
"runs": views,
|
||||
"deployment_id": dep_id,
|
||||
"prefect_api": PREFECT_API,
|
||||
@ -402,6 +538,11 @@ async def index(request: Request) -> HTMLResponse:
|
||||
)
|
||||
|
||||
|
||||
@app.get("/data.json")
|
||||
async def data_json() -> JSONResponse:
|
||||
return JSONResponse(_dataset_previews())
|
||||
|
||||
|
||||
@app.get("/reducer-form", response_class=HTMLResponse)
|
||||
async def reducer_form(request: Request, name: str) -> HTMLResponse:
|
||||
spec = REDUCERS.get(name)
|
||||
@ -436,27 +577,39 @@ async def submit(request: Request) -> HTMLResponse:
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Data params
|
||||
# Dataset came from the picker via dataset_id; fall back to explicit
|
||||
# generator_path / generator_kwargs if a client posts those directly.
|
||||
dataset_id = data.get("dataset_id") or ""
|
||||
if dataset_id and dataset_id in DATASET_META:
|
||||
meta = DATASET_META[dataset_id]
|
||||
generator_path = meta["path"]
|
||||
generator_kwargs = dict(meta["kwargs"])
|
||||
else:
|
||||
generator_path = data.get("generator_path") or "sklearn.datasets.make_s_curve"
|
||||
raw_kwargs = data.get("generator_kwargs") or ""
|
||||
try:
|
||||
generator_kwargs = json.loads(raw_kwargs) if raw_kwargs else {}
|
||||
except json.JSONDecodeError as e:
|
||||
return HTMLResponse(
|
||||
f"<div class='flash err'>bad generator_kwargs JSON: {e}</div>",
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
try:
|
||||
num_points = int(data.get("num_points", "5000") or 5000)
|
||||
num_snapshots = int(data.get("num_snapshots", "48") or 48)
|
||||
num_timesteps = int(data.get("num_timesteps", "48") or 48)
|
||||
jitter_scale = float(data.get("jitter_scale", "0.01") or 0.01)
|
||||
seed = int(data.get("seed", "42") or 42)
|
||||
except ValueError as e:
|
||||
return HTMLResponse(
|
||||
f"<div class='flash err'>bad numeric input: {e}</div>", status_code=400
|
||||
)
|
||||
generator_path = data.get("generator_path") or "sklearn.datasets.make_s_curve"
|
||||
|
||||
embed_args = build_embed_args(reducer, data)
|
||||
|
||||
generator_kwargs: Dict[str, Any] = {}
|
||||
if generator_path.endswith("make_blobs"):
|
||||
generator_kwargs["n_features"] = 3
|
||||
|
||||
parameters: Dict[str, Any] = {
|
||||
"num_points": num_points,
|
||||
"num_snapshots": num_snapshots,
|
||||
"num_timesteps": num_timesteps,
|
||||
"jitter_scale": jitter_scale,
|
||||
"seed": seed,
|
||||
"generator_path": generator_path,
|
||||
@ -483,7 +636,7 @@ async def submit(request: Request) -> HTMLResponse:
|
||||
)
|
||||
|
||||
ref_file, emb_file = synthesize_output_paths(
|
||||
generator_path, reducer, num_points, num_snapshots, jitter_scale, seed
|
||||
generator_path, reducer, num_points, num_timesteps, jitter_scale, seed
|
||||
)
|
||||
RUN_OUTPUTS[run["id"]] = {"ref": ref_file, "embed": emb_file}
|
||||
|
||||
367
app/web/static/dataset-picker.js
Normal file
367
app/web/static/dataset-picker.js
Normal file
@ -0,0 +1,367 @@
|
||||
// Dataset picker — ported from app/demo/index.html. Renders the six sklearn
|
||||
// previews into cards, streams jittered random walks, and writes the current
|
||||
// selection into hidden form inputs so the main <form> can submit it to the
|
||||
// Prefect flow.
|
||||
import * as THREE from 'three';
|
||||
import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
|
||||
|
||||
// Trajectories are precomputed at the max cycle length. Toggling n-frames
|
||||
// truncates (12 is a prefix of 24 is a prefix of 48) so the same walk is
|
||||
// reused — no reroll on toggle, and the per-frame pulse stays consistent.
|
||||
const MAX_FRAMES = 48;
|
||||
|
||||
const CATEGORICAL_HEX = [
|
||||
'#1f4e5f', '#c97b3f', '#8b5a9f', '#5a8560',
|
||||
'#c74a5e', '#6b7d8f', '#b89f51', '#4a6fa5',
|
||||
];
|
||||
const CATEGORICAL = CATEGORICAL_HEX.map(h => new THREE.Color(h));
|
||||
|
||||
function rampContinuous(t) {
|
||||
const hue = (1 - t) * 215 + t * 28;
|
||||
const sat = 0.62;
|
||||
const lit = 0.50 + (t - 0.5) * 0.08;
|
||||
return new THREE.Color().setHSL(hue / 360, sat, lit);
|
||||
}
|
||||
|
||||
function normalize(points) {
|
||||
const n = points.length;
|
||||
let mx = 0, my = 0, mz = 0;
|
||||
for (const p of points) { mx += p[0]; my += p[1]; mz += p[2]; }
|
||||
mx /= n; my /= n; mz /= n;
|
||||
|
||||
// p95 of per-point max-coord magnitude — robust to distribution tails
|
||||
// (gaussian_quantiles / classification otherwise shrink to a fraction of
|
||||
// the viewport under pure max-abs normalization).
|
||||
const perPoint = new Float64Array(n);
|
||||
for (let i = 0; i < n; i++) {
|
||||
const p = points[i];
|
||||
const a = Math.abs(p[0] - mx);
|
||||
const b = Math.abs(p[1] - my);
|
||||
const c = Math.abs(p[2] - mz);
|
||||
perPoint[i] = a > b ? (a > c ? a : c) : (b > c ? b : c);
|
||||
}
|
||||
const sorted = Array.from(perPoint).sort((a, b) => a - b);
|
||||
const scale = Math.max(sorted[Math.floor(n * 0.95)], 1e-9);
|
||||
|
||||
const out = new Float32Array(n * 3);
|
||||
for (let i = 0; i < n; i++) {
|
||||
out[i*3] = (points[i][0] - mx) / scale;
|
||||
out[i*3+1] = (points[i][1] - my) / scale;
|
||||
out[i*3+2] = (points[i][2] - mz) / scale;
|
||||
}
|
||||
return { positions: out };
|
||||
}
|
||||
|
||||
function buildColors(labels, kind) {
|
||||
const n = labels.length;
|
||||
const colors = new Float32Array(n * 3);
|
||||
if (kind === 'categorical') {
|
||||
for (let i = 0; i < n; i++) {
|
||||
const c = CATEGORICAL[labels[i] % CATEGORICAL.length];
|
||||
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
|
||||
}
|
||||
} else {
|
||||
let lo = Infinity, hi = -Infinity;
|
||||
for (const v of labels) { if (v < lo) lo = v; if (v > hi) hi = v; }
|
||||
const range = (hi - lo) || 1;
|
||||
for (let i = 0; i < n; i++) {
|
||||
const c = rampContinuous((labels[i] - lo) / range);
|
||||
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
|
||||
}
|
||||
}
|
||||
return colors;
|
||||
}
|
||||
|
||||
function createScene(container, dataset) {
|
||||
const { positions: basePositions } = normalize(dataset.points);
|
||||
const colors = buildColors(dataset.labels, dataset.kind);
|
||||
const positions = new Float32Array(basePositions);
|
||||
|
||||
const geometry = new THREE.BufferGeometry();
|
||||
geometry.setAttribute('position', new THREE.BufferAttribute(positions, 3));
|
||||
geometry.setAttribute('color', new THREE.BufferAttribute(colors, 3));
|
||||
|
||||
const material = new THREE.PointsMaterial({
|
||||
size: 2.1,
|
||||
sizeAttenuation: false,
|
||||
vertexColors: true,
|
||||
transparent: true,
|
||||
opacity: 0.92,
|
||||
});
|
||||
|
||||
const scene = new THREE.Scene();
|
||||
scene.background = new THREE.Color(0xf2eee4);
|
||||
scene.add(new THREE.Points(geometry, material));
|
||||
|
||||
const camera = new THREE.PerspectiveCamera(42, 1, 0.1, 100);
|
||||
camera.position.set(2.6, 1.9, 2.6);
|
||||
camera.lookAt(0, 0, 0);
|
||||
|
||||
const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: false });
|
||||
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
|
||||
container.appendChild(renderer.domElement);
|
||||
|
||||
const controls = new OrbitControls(camera, renderer.domElement);
|
||||
controls.enableDamping = true;
|
||||
controls.dampingFactor = 0.08;
|
||||
controls.enablePan = false;
|
||||
controls.autoRotate = true;
|
||||
controls.autoRotateSpeed = 0.55;
|
||||
controls.minDistance = 1.5;
|
||||
controls.maxDistance = 6;
|
||||
|
||||
return {
|
||||
scene, camera, renderer, controls, container, geometry,
|
||||
basePositions,
|
||||
trajectories: null,
|
||||
numFrames: 0,
|
||||
snapshotMs: 1000 / 12,
|
||||
// holdMs pads the end of each cycle at rest, so frame-0-as-rest is
|
||||
// actually visible (otherwise it's zero-duration).
|
||||
holdMs: 200,
|
||||
// Shared across scenes — applyF() resets all of them together so the
|
||||
// three previews stay in lockstep through n-frames toggles.
|
||||
cycleStartMs: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function buildTrajectories(s, numFrames) {
|
||||
const n = s.basePositions.length;
|
||||
const buf = new Float32Array(numFrames * n);
|
||||
for (let f = 1; f < numFrames; f++) {
|
||||
const prev = (f - 1) * n;
|
||||
const cur = f * n;
|
||||
for (let j = 0; j < n; j += 2) {
|
||||
const u1 = Math.random() || 1e-12;
|
||||
const u2 = Math.random();
|
||||
const mag = Math.sqrt(-2.0 * Math.log(u1));
|
||||
buf[cur + j] = buf[prev + j] + mag * Math.cos(2 * Math.PI * u2);
|
||||
if (j + 1 < n) buf[cur + j + 1] = buf[prev + j + 1] + mag * Math.sin(2 * Math.PI * u2);
|
||||
}
|
||||
}
|
||||
s.trajectories = buf;
|
||||
s.numFrames = numFrames;
|
||||
}
|
||||
|
||||
function sizeScene(s) {
|
||||
const rect = s.container.getBoundingClientRect();
|
||||
const size = Math.max(1, Math.floor(rect.width));
|
||||
s.renderer.setSize(size, size);
|
||||
s.camera.aspect = 1;
|
||||
s.camera.updateProjectionMatrix();
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const gallery = document.getElementById('gallery');
|
||||
let data;
|
||||
try {
|
||||
const res = await fetch('/data.json');
|
||||
data = await res.json();
|
||||
} catch (err) {
|
||||
gallery.innerHTML =
|
||||
`<div class="picker-loading">failed to load /data.json — ${err}</div>`;
|
||||
return;
|
||||
}
|
||||
|
||||
gallery.innerHTML = '';
|
||||
const scenes = [];
|
||||
const order = Object.entries(data);
|
||||
let selectedId = null;
|
||||
|
||||
// Hidden form inputs the main <form> will submit.
|
||||
const hidden = {
|
||||
datasetId: document.getElementById('dataset_id'),
|
||||
numPoints: document.getElementById('num_points'),
|
||||
numTimesteps: document.getElementById('num_timesteps'),
|
||||
jitterScale: document.getElementById('jitter_scale'),
|
||||
};
|
||||
|
||||
const pickerDetails = document.getElementById('picker');
|
||||
const summaryPath = document.getElementById('picker-summary-path');
|
||||
const selectedPath = document.getElementById('selected-path');
|
||||
const continueBtn = document.getElementById('continue-btn');
|
||||
|
||||
const vizToScene = new WeakMap();
|
||||
const sizeObserver = new ResizeObserver((entries) => {
|
||||
for (const entry of entries) {
|
||||
const s = vizToScene.get(entry.target);
|
||||
if (s) sizeScene(s);
|
||||
}
|
||||
});
|
||||
|
||||
order.forEach(([id, ds], i) => {
|
||||
const card = document.createElement('div');
|
||||
card.className = 'card';
|
||||
card.dataset.id = id;
|
||||
card.innerHTML = `
|
||||
<div class="viz">
|
||||
<span class="fig-label">Fig. 1.${i + 1}</span>
|
||||
<span class="key-hint">[${i + 1}]</span>
|
||||
<span class="controls-hint">drag · scroll</span>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="card-label">
|
||||
<span class="dot"></span>
|
||||
<span>${ds.name}</span>
|
||||
</div>
|
||||
<div class="card-path">${ds.path}</div>
|
||||
<div class="card-desc">${ds.description}</div>
|
||||
</div>
|
||||
`;
|
||||
gallery.appendChild(card);
|
||||
|
||||
const viz = card.querySelector('.viz');
|
||||
const s = createScene(viz, ds);
|
||||
buildTrajectories(s, MAX_FRAMES);
|
||||
scenes.push(s);
|
||||
vizToScene.set(viz, s);
|
||||
sizeObserver.observe(viz);
|
||||
|
||||
s.controls.addEventListener('start', () => { s.controls.autoRotate = false; });
|
||||
|
||||
card.addEventListener('click', () => selectCard(id, card, ds));
|
||||
});
|
||||
|
||||
function selectCard(id, card, ds) {
|
||||
document.querySelectorAll('#gallery .card').forEach(c => c.classList.remove('selected'));
|
||||
card.classList.add('selected');
|
||||
selectedId = id;
|
||||
selectedPath.textContent = ds.path;
|
||||
hidden.datasetId.value = id;
|
||||
updateContinue();
|
||||
}
|
||||
|
||||
function updateContinue() {
|
||||
continueBtn.disabled = !selectedId;
|
||||
continueBtn.title = selectedId ? '' : 'pick a dataset first';
|
||||
}
|
||||
|
||||
const slider = document.getElementById('n-slider');
|
||||
const nValue = document.getElementById('n-value');
|
||||
function applyN(n) {
|
||||
nValue.textContent = n.toLocaleString();
|
||||
hidden.numPoints.value = String(n);
|
||||
for (const s of scenes) {
|
||||
const cap = s.geometry.attributes.position.count;
|
||||
s.geometry.setDrawRange(0, Math.min(n, cap));
|
||||
}
|
||||
}
|
||||
slider.addEventListener('input', (e) => applyN(parseInt(e.target.value, 10)));
|
||||
applyN(parseInt(slider.value, 10));
|
||||
|
||||
let jitterScale = 0;
|
||||
const jInputs = document.querySelectorAll('input[name="j"]');
|
||||
function applyJ(v) {
|
||||
jitterScale = v;
|
||||
hidden.jitterScale.value = String(v);
|
||||
}
|
||||
jInputs.forEach(input => {
|
||||
input.addEventListener('change', (e) => applyJ(parseFloat(e.target.value)));
|
||||
});
|
||||
applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value));
|
||||
|
||||
// timesteps: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
|
||||
// so toggling changes cycle length without rerolling. cycleStartMs is
|
||||
// shared so all cards animate in lockstep.
|
||||
const fInputs = document.querySelectorAll('input[name="f"]');
|
||||
function applyF(n) {
|
||||
hidden.numTimesteps.value = String(n);
|
||||
const start = performance.now();
|
||||
for (const s of scenes) {
|
||||
s.numFrames = n;
|
||||
s.cycleStartMs = start;
|
||||
}
|
||||
}
|
||||
fInputs.forEach(input => {
|
||||
input.addEventListener('change', (e) => applyF(parseInt(e.target.value, 10)));
|
||||
});
|
||||
applyF(parseInt(document.querySelector('input[name="f"]:checked').value, 10));
|
||||
|
||||
function selectByIndex(idx, { scroll = true } = {}) {
|
||||
const entry = order[idx];
|
||||
if (!entry) return;
|
||||
const [id, ds] = entry;
|
||||
const card = gallery.children[idx];
|
||||
if (!card) return;
|
||||
selectCard(id, card, ds);
|
||||
if (scroll) card.scrollIntoView({ behavior: 'smooth', inline: 'nearest', block: 'nearest' });
|
||||
}
|
||||
document.addEventListener('keydown', (e) => {
|
||||
if (!pickerDetails.open) return;
|
||||
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
|
||||
if (/^[1-9]$/.test(e.key)) {
|
||||
selectByIndex(parseInt(e.key, 10) - 1);
|
||||
return;
|
||||
}
|
||||
if (e.key === 'ArrowRight' || e.key === 'ArrowLeft') {
|
||||
e.preventDefault();
|
||||
const currentIdx = order.findIndex(([id]) => id === selectedId);
|
||||
const n = order.length;
|
||||
const nextIdx = e.key === 'ArrowRight'
|
||||
? (currentIdx < 0 ? 0 : Math.min(currentIdx + 1, n - 1))
|
||||
: (currentIdx < 0 ? n - 1 : Math.max(currentIdx - 1, 0));
|
||||
selectByIndex(nextIdx);
|
||||
}
|
||||
});
|
||||
|
||||
continueBtn.addEventListener('click', () => {
|
||||
if (!selectedId) return;
|
||||
const ds = data[selectedId];
|
||||
summaryPath.textContent = ds.path;
|
||||
pickerDetails.open = false;
|
||||
});
|
||||
|
||||
function tick() {
|
||||
requestAnimationFrame(tick);
|
||||
// When the picker is collapsed the canvases are display:none inside a
|
||||
// closed <details>; rects are zero. Skip the per-frame work.
|
||||
if (!pickerDetails.open) return;
|
||||
const now = performance.now();
|
||||
for (const s of scenes) {
|
||||
const N = s.numFrames;
|
||||
const n = s.basePositions.length;
|
||||
const walkMs = N * s.snapshotMs;
|
||||
const cycleMs = walkMs + s.holdMs;
|
||||
const elapsed = ((now - s.cycleStartMs) % cycleMs + cycleMs) % cycleMs;
|
||||
|
||||
const total = n / 3;
|
||||
const drawCount = s.geometry.drawRange.count;
|
||||
const visibleN = Number.isFinite(drawCount) ? Math.min(drawCount, total) : total;
|
||||
const limit = visibleN * 3;
|
||||
const pos = s.geometry.attributes.position.array;
|
||||
const base = s.basePositions;
|
||||
|
||||
if (elapsed >= walkMs) {
|
||||
for (let i = 0; i < limit; i++) pos[i] = base[i];
|
||||
} else {
|
||||
const frameF = elapsed / s.snapshotMs;
|
||||
const frameIdx = Math.floor(frameF);
|
||||
const interpT = frameF - frameIdx;
|
||||
const nextIdx = (frameIdx + 1) % N;
|
||||
const aOff = frameIdx * n;
|
||||
const bOff = nextIdx * n;
|
||||
const tr = s.trajectories;
|
||||
const scale = jitterScale;
|
||||
const u = 1 - interpT;
|
||||
for (let i = 0; i < limit; i++) {
|
||||
pos[i] = base[i] + (tr[aOff + i] * u + tr[bOff + i] * interpT) * scale;
|
||||
}
|
||||
}
|
||||
s.geometry.attributes.position.needsUpdate = true;
|
||||
|
||||
s.controls.update();
|
||||
s.renderer.render(s.scene, s.camera);
|
||||
}
|
||||
}
|
||||
tick();
|
||||
|
||||
// Reopening the picker after it's been closed: canvases may have been
|
||||
// laid out at zero size while hidden. Re-measure on toggle.
|
||||
pickerDetails.addEventListener('toggle', () => {
|
||||
if (pickerDetails.open) {
|
||||
for (const s of scenes) sizeScene(s);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
main();
|
||||
@ -513,3 +513,351 @@ button.submit:disabled { background: var(--faint); border-color: var(--faint); c
|
||||
}
|
||||
.htmx-request .htmx-indicator { opacity: 1; }
|
||||
.htmx-request.htmx-indicator { opacity: 1; }
|
||||
|
||||
/* ---------- dataset picker (§ 1) -------------------------------------- */
|
||||
/* Collapsible <details> below the masthead; expands to a gallery of 3D
|
||||
dataset previews, collapses to a one-line summary chip once confirmed. */
|
||||
|
||||
.dataset-picker {
|
||||
--picker-panel: #f2eee4;
|
||||
--picker-hair: #d8d3c6;
|
||||
border-bottom: 1px solid var(--rule);
|
||||
background: var(--page);
|
||||
}
|
||||
.dataset-picker > summary {
|
||||
list-style: none;
|
||||
cursor: pointer;
|
||||
padding: 0.9rem 2.2rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
user-select: none;
|
||||
}
|
||||
.dataset-picker > summary::-webkit-details-marker { display: none; }
|
||||
.dataset-picker > summary:hover { background: var(--accent-tint); }
|
||||
.dataset-picker .picker-meta {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 1rem;
|
||||
flex-wrap: wrap;
|
||||
min-width: 0;
|
||||
}
|
||||
.dataset-picker .section-number {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.78rem;
|
||||
font-weight: 600;
|
||||
color: var(--accent);
|
||||
letter-spacing: 0;
|
||||
}
|
||||
.dataset-picker .picker-title {
|
||||
font-family: var(--serif);
|
||||
font-size: 1rem;
|
||||
color: var(--ink);
|
||||
font-style: italic;
|
||||
}
|
||||
.dataset-picker .picker-selection {
|
||||
font-size: 0.78rem;
|
||||
color: var(--mute);
|
||||
display: inline-flex;
|
||||
align-items: baseline;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.dataset-picker .picker-selection .lbl {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.68rem;
|
||||
letter-spacing: 0.08em;
|
||||
text-transform: uppercase;
|
||||
color: var(--faint);
|
||||
}
|
||||
.dataset-picker .picker-selection code {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.78rem;
|
||||
color: var(--accent);
|
||||
}
|
||||
.dataset-picker .picker-toggle {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.74rem;
|
||||
color: var(--faint);
|
||||
letter-spacing: 0;
|
||||
}
|
||||
.dataset-picker .picker-toggle::before { content: "[ edit ]"; }
|
||||
.dataset-picker[open] .picker-toggle::before { content: "[ collapse ]"; color: var(--accent); }
|
||||
|
||||
.dataset-picker .picker-body {
|
||||
padding: 0.4rem 2.2rem 1.6rem;
|
||||
max-width: 1440px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.dataset-picker .lede {
|
||||
color: var(--mute);
|
||||
font-family: var(--serif);
|
||||
font-style: italic;
|
||||
font-size: 0.88rem;
|
||||
max-width: 62ch;
|
||||
margin: 0.2rem 0 0.9rem;
|
||||
}
|
||||
.dataset-picker .lede kbd {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.72rem;
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--rule);
|
||||
padding: 0 5px;
|
||||
margin: 0 1px;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.picker-controls {
|
||||
display: grid;
|
||||
grid-template-columns: auto 1fr auto;
|
||||
align-items: center;
|
||||
column-gap: 1rem;
|
||||
row-gap: 0.55rem;
|
||||
padding: 0.75rem 0;
|
||||
border-top: 1px solid var(--rule);
|
||||
border-bottom: 1px solid var(--rule);
|
||||
margin-bottom: 1.1rem;
|
||||
}
|
||||
.picker-controls .ctl-label {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.68rem;
|
||||
letter-spacing: 0.08em;
|
||||
text-transform: uppercase;
|
||||
color: var(--mute);
|
||||
}
|
||||
.picker-controls .ctl-value {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.82rem;
|
||||
color: var(--accent);
|
||||
min-width: 3.5rem;
|
||||
text-align: right;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
.picker-controls input[type="range"] {
|
||||
width: 100%;
|
||||
accent-color: var(--accent);
|
||||
height: 4px;
|
||||
}
|
||||
.picker-controls .segmented {
|
||||
grid-column: 2 / -1;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
.picker-controls .segmented label {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.78rem;
|
||||
color: var(--mute);
|
||||
cursor: pointer;
|
||||
padding: 3px 2px 4px;
|
||||
border-bottom: 1px solid transparent;
|
||||
transition: color 120ms ease, border-color 120ms ease;
|
||||
user-select: none;
|
||||
font-variant-numeric: tabular-nums;
|
||||
position: relative;
|
||||
}
|
||||
.picker-controls .segmented label:hover { color: var(--ink); }
|
||||
.picker-controls .segmented label:has(input:checked) {
|
||||
color: var(--accent);
|
||||
border-bottom-color: var(--accent);
|
||||
}
|
||||
.picker-controls .segmented label:has(input:focus-visible) {
|
||||
outline: 1px solid var(--accent);
|
||||
outline-offset: 2px;
|
||||
}
|
||||
.picker-controls .segmented input[type="radio"] {
|
||||
position: absolute;
|
||||
opacity: 0;
|
||||
width: 1px; height: 1px;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.dataset-picker .gallery {
|
||||
display: flex;
|
||||
gap: 1.15rem;
|
||||
overflow-x: auto;
|
||||
overflow-y: hidden;
|
||||
scroll-snap-type: x mandatory;
|
||||
scroll-behavior: smooth;
|
||||
-webkit-overflow-scrolling: touch;
|
||||
padding: 2px 2px 0.85rem;
|
||||
margin: 0 -2px 1.1rem;
|
||||
scrollbar-width: thin;
|
||||
scrollbar-color: var(--rule-2) transparent;
|
||||
}
|
||||
.dataset-picker .gallery::-webkit-scrollbar { height: 6px; }
|
||||
.dataset-picker .gallery::-webkit-scrollbar-track { background: transparent; }
|
||||
.dataset-picker .gallery::-webkit-scrollbar-thumb {
|
||||
background: var(--rule-2);
|
||||
border-radius: 3px;
|
||||
}
|
||||
.dataset-picker .gallery::-webkit-scrollbar-thumb:hover { background: var(--mute); }
|
||||
|
||||
.dataset-picker .picker-loading {
|
||||
padding: 3rem 0;
|
||||
text-align: center;
|
||||
color: var(--mute);
|
||||
font-family: var(--mono);
|
||||
font-size: 0.82rem;
|
||||
}
|
||||
|
||||
.dataset-picker .card {
|
||||
flex: 0 0 240px;
|
||||
/* Prevent long .card-path from forcing the card wider than its flex-basis. */
|
||||
min-width: 0;
|
||||
scroll-snap-align: start;
|
||||
border: 1px solid var(--rule);
|
||||
background: var(--picker-panel);
|
||||
cursor: pointer;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
transition: border-color 120ms ease, box-shadow 120ms ease;
|
||||
}
|
||||
.dataset-picker .card:hover { border-color: var(--rule-2); }
|
||||
.dataset-picker .card.selected {
|
||||
border-color: var(--accent);
|
||||
box-shadow: 0 0 0 1px var(--accent);
|
||||
}
|
||||
.dataset-picker .viz {
|
||||
aspect-ratio: 1 / 1;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
.dataset-picker .viz canvas {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
display: block;
|
||||
width: 100% !important;
|
||||
height: 100% !important;
|
||||
}
|
||||
.dataset-picker .fig-label {
|
||||
position: absolute;
|
||||
top: 8px;
|
||||
left: 10px;
|
||||
font-family: var(--mono);
|
||||
font-size: 0.62rem;
|
||||
color: var(--mute);
|
||||
letter-spacing: 0.05em;
|
||||
text-transform: uppercase;
|
||||
pointer-events: none;
|
||||
}
|
||||
.dataset-picker .key-hint {
|
||||
position: absolute;
|
||||
top: 6px;
|
||||
right: 8px;
|
||||
font-family: var(--mono);
|
||||
font-size: 0.7rem;
|
||||
color: var(--mute);
|
||||
background: rgba(250, 250, 247, 0.85);
|
||||
border: 1px solid var(--rule);
|
||||
padding: 0 5px;
|
||||
pointer-events: none;
|
||||
}
|
||||
.dataset-picker .card.selected .key-hint {
|
||||
color: var(--accent);
|
||||
border-color: var(--accent);
|
||||
}
|
||||
.dataset-picker .controls-hint {
|
||||
position: absolute;
|
||||
bottom: 6px;
|
||||
right: 8px;
|
||||
font-family: var(--mono);
|
||||
font-size: 0.62rem;
|
||||
color: var(--mute);
|
||||
opacity: 0;
|
||||
transition: opacity 150ms ease;
|
||||
pointer-events: none;
|
||||
}
|
||||
.dataset-picker .card:hover .controls-hint { opacity: 0.75; }
|
||||
.dataset-picker .card-body {
|
||||
padding: 0.75rem 0.9rem 0.9rem;
|
||||
border-top: 1px solid var(--rule);
|
||||
background: var(--page);
|
||||
flex: 1;
|
||||
}
|
||||
.dataset-picker .card-label {
|
||||
font-family: var(--sans);
|
||||
font-weight: 500;
|
||||
font-size: 0.9rem;
|
||||
margin-bottom: 2px;
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.dataset-picker .card-label .dot {
|
||||
display: inline-block;
|
||||
width: 7px; height: 7px;
|
||||
border-radius: 50%;
|
||||
background: transparent;
|
||||
border: 1px solid var(--rule-2);
|
||||
}
|
||||
.dataset-picker .card.selected .card-label .dot {
|
||||
background: var(--accent);
|
||||
border-color: var(--accent);
|
||||
}
|
||||
.dataset-picker .card-path {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.68rem;
|
||||
color: var(--mute);
|
||||
margin-bottom: 0.4rem;
|
||||
word-break: break-all;
|
||||
}
|
||||
.dataset-picker .card-desc {
|
||||
font-family: var(--serif);
|
||||
font-size: 0.82rem;
|
||||
color: #4a4a4a;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.picker-footer {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding-top: 0.85rem;
|
||||
border-top: 1px solid var(--rule);
|
||||
gap: 1.2rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.picker-footer .selection {
|
||||
font-size: 0.82rem;
|
||||
color: var(--mute);
|
||||
display: inline-flex;
|
||||
align-items: baseline;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.picker-footer .selection .lbl {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.68rem;
|
||||
letter-spacing: 0.08em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.picker-footer .selection code {
|
||||
font-family: var(--mono);
|
||||
font-size: 0.82rem;
|
||||
color: var(--accent);
|
||||
}
|
||||
.picker-footer .continue {
|
||||
background: var(--accent);
|
||||
color: var(--page);
|
||||
border: 1px solid var(--accent);
|
||||
padding: 0.45rem 1rem;
|
||||
font-family: var(--sans);
|
||||
font-size: 0.82rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.04em;
|
||||
cursor: pointer;
|
||||
border-radius: 1px;
|
||||
transition: background 120ms ease;
|
||||
}
|
||||
.picker-footer .continue:not(:disabled):hover { background: #143642; }
|
||||
.picker-footer .continue:disabled {
|
||||
background: var(--faint);
|
||||
border-color: var(--faint);
|
||||
color: var(--page);
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
@media (max-width: 940px) {
|
||||
.dataset-picker > summary { padding: 0.9rem 1.2rem; }
|
||||
.dataset-picker .picker-body { padding: 0.4rem 1.2rem 1.4rem; }
|
||||
}
|
||||
@ -25,7 +25,7 @@
|
||||
{% if r.params %}
|
||||
<div class="paramline">
|
||||
<span><span class="k">N</span> {{ r.params.get('num_points', '?') }}</span>
|
||||
<span><span class="k">S</span> {{ r.params.get('num_snapshots', '?') }}</span>
|
||||
<span><span class="k">T</span> {{ r.params.get('num_timesteps', '?') }}</span>
|
||||
<span><span class="k">J</span> {{ r.params.get('jitter_scale', '?') }}</span>
|
||||
<span><span class="k">s</span> {{ r.params.get('seed', '?') }}</span>
|
||||
{% set ea = r.params.get('embed_args') or {} %}
|
||||
@ -6,6 +6,14 @@
|
||||
<title>embedding notebook — web1</title>
|
||||
<link rel="stylesheet" href="/static/style.css" />
|
||||
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
|
||||
<script type="importmap">
|
||||
{
|
||||
"imports": {
|
||||
"three": "https://unpkg.com/three@0.160.0/build/three.module.js",
|
||||
"three/addons/": "https://unpkg.com/three@0.160.0/examples/jsm/"
|
||||
}
|
||||
}
|
||||
</script>
|
||||
<link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3E%3Ccircle cx='8' cy='8' r='3' fill='%231f4e5f'/%3E%3C/svg%3E" />
|
||||
</head>
|
||||
<body>
|
||||
@ -21,6 +29,59 @@
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<details class="dataset-picker" id="picker" open>
|
||||
<summary>
|
||||
<span class="picker-meta">
|
||||
<span class="section-number">§ 1</span>
|
||||
<span class="picker-title">input dataset</span>
|
||||
<span class="picker-selection">
|
||||
<span class="lbl">generator</span>
|
||||
<code id="picker-summary-path">—</code>
|
||||
</span>
|
||||
</span>
|
||||
<span class="picker-toggle" aria-hidden="true"></span>
|
||||
</summary>
|
||||
|
||||
<div class="picker-body">
|
||||
<p class="lede">
|
||||
Six candidate generators for the embedding pipeline. Drag to rotate, scroll to zoom,
|
||||
<kbd>←</kbd> <kbd>→</kbd> or <kbd>1</kbd> … <kbd>6</kbd> to select.
|
||||
</p>
|
||||
|
||||
<div class="picker-controls">
|
||||
<label class="ctl-label" for="n-slider">n samples</label>
|
||||
<input type="range" id="n-slider" min="100" max="5000" step="100" value="500">
|
||||
<span class="ctl-value" id="n-value">500</span>
|
||||
|
||||
<span class="ctl-label">noise σ</span>
|
||||
<div class="segmented" role="radiogroup" aria-label="noise σ">
|
||||
<label><input type="radio" name="j" value="0.001"><span>0.001</span></label>
|
||||
<label><input type="radio" name="j" value="0.005" checked><span>0.005</span></label>
|
||||
<label><input type="radio" name="j" value="0.01"><span>0.010</span></label>
|
||||
</div>
|
||||
|
||||
<span class="ctl-label">timesteps</span>
|
||||
<div class="segmented" role="radiogroup" aria-label="number of timesteps">
|
||||
<label><input type="radio" name="f" value="12"><span>12</span></label>
|
||||
<label><input type="radio" name="f" value="24" checked><span>24</span></label>
|
||||
<label><input type="radio" name="f" value="48"><span>48</span></label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="gallery" id="gallery">
|
||||
<div class="picker-loading">loading samples…</div>
|
||||
</div>
|
||||
|
||||
<div class="picker-footer">
|
||||
<div class="selection">
|
||||
<span class="lbl">generator</span>
|
||||
<code id="selected-path">—</code>
|
||||
</div>
|
||||
<button type="button" class="continue" id="continue-btn" disabled>Continue →</button>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<main>
|
||||
|
||||
<!-- ==================== LEFT: parameter notebook ==================== -->
|
||||
@ -34,10 +95,17 @@
|
||||
hx-indicator="#busy"
|
||||
>
|
||||
|
||||
<!-- §1 reducer -->
|
||||
<!-- Picker-driven hidden fields. Values are written by dataset-picker.js. -->
|
||||
<input type="hidden" name="dataset_id" id="dataset_id" value="" />
|
||||
<input type="hidden" name="num_points" id="num_points" value="500" />
|
||||
<input type="hidden" name="num_timesteps" id="num_timesteps" value="24" />
|
||||
<input type="hidden" name="jitter_scale" id="jitter_scale" value="0.005" />
|
||||
<input type="hidden" name="seed" id="seed" value="42" />
|
||||
|
||||
<!-- §2 reducer -->
|
||||
<div class="section">
|
||||
<div class="section-label">
|
||||
<span>§ 1 reducer</span><span class="ordinal">method</span>
|
||||
<span>§ 2 reducer</span><span class="ordinal">method</span>
|
||||
</div>
|
||||
<p class="lead">Dimensionality reduction applied to each snapshot. Only reducers whose Python package is importable are shown.</p>
|
||||
|
||||
@ -63,53 +131,16 @@
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- §2 reducer params -->
|
||||
<!-- §3 reducer params -->
|
||||
<div class="section">
|
||||
<div class="section-label">
|
||||
<span>§ 2 parameters</span><span class="ordinal">kwargs</span>
|
||||
<span>§ 3 parameters</span><span class="ordinal">kwargs</span>
|
||||
</div>
|
||||
<div id="reducer-params">
|
||||
{% include "_reducer_form.html" with context %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- §3 data -->
|
||||
<div class="section">
|
||||
<div class="section-label">
|
||||
<span>§ 3 data & drift</span><span class="ordinal">sampling</span>
|
||||
</div>
|
||||
|
||||
<div class="form-grid">
|
||||
<label for="generator_path">
|
||||
generator
|
||||
<span class="hint">data-generating surface</span>
|
||||
</label>
|
||||
<select name="generator_path" id="generator_path">
|
||||
{% for path, short in generators %}
|
||||
<option value="{{ path }}" {% if path == 'sklearn.datasets.make_s_curve' %}selected{% endif %}>{{ short }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
|
||||
<label for="num_points">n<sub>points</sub></label>
|
||||
<input type="number" id="num_points" name="num_points" value="5000" min="100" step="100" />
|
||||
|
||||
<label for="num_snapshots">n<sub>snapshots</sub></label>
|
||||
<input type="number" id="num_snapshots" name="num_snapshots" value="48" min="2" step="1" />
|
||||
|
||||
<label for="jitter_scale">
|
||||
jitter scale
|
||||
<span class="hint">std of per-step Gaussian drift</span>
|
||||
</label>
|
||||
<input type="number" id="jitter_scale" name="jitter_scale" value="0.01" step="0.001" min="0" />
|
||||
|
||||
<label for="seed">
|
||||
jitter seed
|
||||
<span class="hint">seeds only the drift simulation — the embedder's seed is in §2 (advanced).</span>
|
||||
</label>
|
||||
<input type="number" id="seed" name="seed" value="42" step="1" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="actions">
|
||||
<button type="submit" class="submit">submit run</button>
|
||||
<span id="busy" class="htmx-indicator">dispatching…</span>
|
||||
@ -145,9 +176,11 @@
|
||||
</main>
|
||||
|
||||
<footer class="colophon">
|
||||
<span><span class="k">web1</span> · scientific instrument · port 8001</span>
|
||||
<span><span class="k">web</span> · scientific instrument · port 8001</span>
|
||||
<span>fastapi · htmx · no build step</span>
|
||||
</footer>
|
||||
|
||||
<script type="module" src="/static/dataset-picker.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@ -64,9 +64,9 @@ def generate_initial_frame_task(
|
||||
|
||||
@task(cache_policy=INPUTS, cache_expiration=timedelta(hours=12))
|
||||
def generate_snapshots_task(
|
||||
initial_df: pd.DataFrame, num_snapshots: int, jitter_scale: float, seed: int = 42
|
||||
initial_df: pd.DataFrame, num_timesteps: int, jitter_scale: float, seed: int = 42
|
||||
) -> List[pd.DataFrame]:
|
||||
return E.generate_jittered_snapshots(initial_df, num_snapshots, jitter_scale, seed)
|
||||
return E.generate_jittered_snapshots(initial_df, num_timesteps, jitter_scale, seed)
|
||||
|
||||
|
||||
@task(
|
||||
@ -138,7 +138,7 @@ _DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
|
||||
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}))
|
||||
def embedding_flow(
|
||||
num_points: int = 5000,
|
||||
num_snapshots: int = 48,
|
||||
num_timesteps: int = 48,
|
||||
jitter_scale: float = 0.01,
|
||||
seed: int = 42,
|
||||
generator_path: str = "sklearn.datasets.make_s_curve",
|
||||
@ -166,10 +166,10 @@ def embedding_flow(
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
_generator = generator_path.split(".")[-1]
|
||||
output_ref: str = (
|
||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
)
|
||||
output_embed: str = (
|
||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
|
||||
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
|
||||
)
|
||||
title_ref = f"Reference: {_generator}, N={num_points} with {jitter_scale} noise"
|
||||
title_embed = f"Embedding: {embedder.split('.')[-1]} on {_generator}, N={num_points} with {jitter_scale} noise"
|
||||
@ -186,27 +186,27 @@ def embedding_flow(
|
||||
# Generate snapshots
|
||||
snapshots = generate_snapshots_task.submit(
|
||||
initial_df=initial_frame.result(),
|
||||
num_snapshots=num_snapshots,
|
||||
num_timesteps=num_timesteps,
|
||||
jitter_scale=jitter_scale,
|
||||
seed=seed,
|
||||
)
|
||||
snapshot_list = snapshots.result()
|
||||
|
||||
# Generate corresponding dates (assuming daily snapshots for simplicity)
|
||||
# One date per timestep (monthly, starting at 2000-01-01 for cosmetic reasons)
|
||||
dates = [
|
||||
f"{year}-{month:02d}-01"
|
||||
for year in range(2000, 2001 + math.floor(num_snapshots / 12))
|
||||
for year in range(2000, 2001 + math.floor(num_timesteps / 12))
|
||||
for month in range(1, 13)
|
||||
][:num_snapshots]
|
||||
][:num_timesteps]
|
||||
|
||||
# Apply embeddings in parallel using Prefect's mapping
|
||||
embeddings = create_embedding.map(
|
||||
snapshot=snapshot_list,
|
||||
time_idx=dates,
|
||||
embed_columns=[embed_columns] * num_snapshots,
|
||||
embedder=[embedder] * num_snapshots,
|
||||
embed_args=[merged_embed_args] * num_snapshots,
|
||||
id_column=[id_column] * num_snapshots,
|
||||
embed_columns=[embed_columns] * num_timesteps,
|
||||
embedder=[embedder] * num_timesteps,
|
||||
embed_args=[merged_embed_args] * num_timesteps,
|
||||
id_column=[id_column] * num_timesteps,
|
||||
)
|
||||
|
||||
# Collect all embeddings
|
||||
|
||||
@ -425,18 +425,18 @@ def generate_initial_frame(
|
||||
|
||||
def generate_jittered_snapshots(
|
||||
initial_df: pd.DataFrame,
|
||||
num_snapshots: int,
|
||||
num_timesteps: int,
|
||||
jitter_scale: float = 0.1,
|
||||
seed: int = 42,
|
||||
) -> List[pd.DataFrame]:
|
||||
"""
|
||||
Generate snapshots by applying random jitter to the initial frame and randomly adding/removing points.
|
||||
Generate one jittered snapshot per timestep, with random point add/remove.
|
||||
|
||||
Parameters:
|
||||
- initial_df: pd.DataFrame
|
||||
The initial DataFrame to apply jitter.
|
||||
- num_snapshots: int
|
||||
Number of snapshots to generate.
|
||||
- num_timesteps: int
|
||||
Number of timesteps (one snapshot produced per timestep).
|
||||
- jitter_scale: float
|
||||
Standard deviation of the Gaussian noise added for jitter.
|
||||
- seed: int
|
||||
@ -450,7 +450,7 @@ def generate_jittered_snapshots(
|
||||
snapshots = []
|
||||
current_df = initial_df.copy()
|
||||
|
||||
for i in range(num_snapshots):
|
||||
for i in range(num_timesteps):
|
||||
# Apply jitter (set to 0 for testing)
|
||||
jitter = np.random.normal(
|
||||
loc=0.0,
|
||||
|
||||
10
makefile
10
makefile
@ -1,14 +1,8 @@
|
||||
run:
|
||||
.venv/bin/python flows/embedding_flow.py
|
||||
|
||||
web1:
|
||||
.venv/bin/python -m uvicorn app.web1.main:app --host 0.0.0.0 --port 8001 --reload
|
||||
|
||||
web2:
|
||||
.venv/bin/python -m uvicorn app.web2.main:app --host 0.0.0.0 --port 8002 --reload
|
||||
|
||||
web3:
|
||||
.venv/bin/python -m uvicorn app.web3.main:app --host 0.0.0.0 --port 8003 --reload
|
||||
web:
|
||||
.venv/bin/python -m uvicorn app.web.main:app --host 0.0.0.0 --port 8001 --reload
|
||||
|
||||
demo:
|
||||
.venv/bin/python -m uvicorn app.demo.main:app --host 0.0.0.0 --port 8010 --reload
|
||||
|
||||
Loading…
Reference in New Issue
Block a user