Compare commits

..

No commits in common. "92069a3c91b142d59461642bc3e92ae764fdd480" and "ac511c942f7b6c5d62b02ad2b7caeee27460c3b1" have entirely different histories.

10 changed files with 88 additions and 983 deletions

View File

@ -1,367 +0,0 @@
// Dataset picker — ported from app/demo/index.html. Renders the six sklearn
// previews into cards, streams jittered random walks, and writes the current
// selection into hidden form inputs so the main <form> can submit it to the
// Prefect flow.
import * as THREE from 'three';
import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
// Trajectories are precomputed at the max cycle length. Toggling n-frames
// truncates (12 is a prefix of 24 is a prefix of 48) so the same walk is
// reused — no reroll on toggle, and the per-frame pulse stays consistent.
const MAX_FRAMES = 48;
const CATEGORICAL_HEX = [
'#1f4e5f', '#c97b3f', '#8b5a9f', '#5a8560',
'#c74a5e', '#6b7d8f', '#b89f51', '#4a6fa5',
];
const CATEGORICAL = CATEGORICAL_HEX.map(h => new THREE.Color(h));
function rampContinuous(t) {
const hue = (1 - t) * 215 + t * 28;
const sat = 0.62;
const lit = 0.50 + (t - 0.5) * 0.08;
return new THREE.Color().setHSL(hue / 360, sat, lit);
}
function normalize(points) {
const n = points.length;
let mx = 0, my = 0, mz = 0;
for (const p of points) { mx += p[0]; my += p[1]; mz += p[2]; }
mx /= n; my /= n; mz /= n;
// p95 of per-point max-coord magnitude — robust to distribution tails
// (gaussian_quantiles / classification otherwise shrink to a fraction of
// the viewport under pure max-abs normalization).
const perPoint = new Float64Array(n);
for (let i = 0; i < n; i++) {
const p = points[i];
const a = Math.abs(p[0] - mx);
const b = Math.abs(p[1] - my);
const c = Math.abs(p[2] - mz);
perPoint[i] = a > b ? (a > c ? a : c) : (b > c ? b : c);
}
const sorted = Array.from(perPoint).sort((a, b) => a - b);
const scale = Math.max(sorted[Math.floor(n * 0.95)], 1e-9);
const out = new Float32Array(n * 3);
for (let i = 0; i < n; i++) {
out[i*3] = (points[i][0] - mx) / scale;
out[i*3+1] = (points[i][1] - my) / scale;
out[i*3+2] = (points[i][2] - mz) / scale;
}
return { positions: out };
}
function buildColors(labels, kind) {
const n = labels.length;
const colors = new Float32Array(n * 3);
if (kind === 'categorical') {
for (let i = 0; i < n; i++) {
const c = CATEGORICAL[labels[i] % CATEGORICAL.length];
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
}
} else {
let lo = Infinity, hi = -Infinity;
for (const v of labels) { if (v < lo) lo = v; if (v > hi) hi = v; }
const range = (hi - lo) || 1;
for (let i = 0; i < n; i++) {
const c = rampContinuous((labels[i] - lo) / range);
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
}
}
return colors;
}
function createScene(container, dataset) {
const { positions: basePositions } = normalize(dataset.points);
const colors = buildColors(dataset.labels, dataset.kind);
const positions = new Float32Array(basePositions);
const geometry = new THREE.BufferGeometry();
geometry.setAttribute('position', new THREE.BufferAttribute(positions, 3));
geometry.setAttribute('color', new THREE.BufferAttribute(colors, 3));
const material = new THREE.PointsMaterial({
size: 2.1,
sizeAttenuation: false,
vertexColors: true,
transparent: true,
opacity: 0.92,
});
const scene = new THREE.Scene();
scene.background = new THREE.Color(0xf2eee4);
scene.add(new THREE.Points(geometry, material));
const camera = new THREE.PerspectiveCamera(42, 1, 0.1, 100);
camera.position.set(2.6, 1.9, 2.6);
camera.lookAt(0, 0, 0);
const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: false });
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
container.appendChild(renderer.domElement);
const controls = new OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.08;
controls.enablePan = false;
controls.autoRotate = true;
controls.autoRotateSpeed = 0.55;
controls.minDistance = 1.5;
controls.maxDistance = 6;
return {
scene, camera, renderer, controls, container, geometry,
basePositions,
trajectories: null,
numFrames: 0,
snapshotMs: 1000 / 12,
// holdMs pads the end of each cycle at rest, so frame-0-as-rest is
// actually visible (otherwise it's zero-duration).
holdMs: 200,
// Shared across scenes — applyF() resets all of them together so the
// three previews stay in lockstep through n-frames toggles.
cycleStartMs: 0,
};
}
function buildTrajectories(s, numFrames) {
const n = s.basePositions.length;
const buf = new Float32Array(numFrames * n);
for (let f = 1; f < numFrames; f++) {
const prev = (f - 1) * n;
const cur = f * n;
for (let j = 0; j < n; j += 2) {
const u1 = Math.random() || 1e-12;
const u2 = Math.random();
const mag = Math.sqrt(-2.0 * Math.log(u1));
buf[cur + j] = buf[prev + j] + mag * Math.cos(2 * Math.PI * u2);
if (j + 1 < n) buf[cur + j + 1] = buf[prev + j + 1] + mag * Math.sin(2 * Math.PI * u2);
}
}
s.trajectories = buf;
s.numFrames = numFrames;
}
function sizeScene(s) {
const rect = s.container.getBoundingClientRect();
const size = Math.max(1, Math.floor(rect.width));
s.renderer.setSize(size, size);
s.camera.aspect = 1;
s.camera.updateProjectionMatrix();
}
async function main() {
const gallery = document.getElementById('gallery');
let data;
try {
const res = await fetch('/data.json');
data = await res.json();
} catch (err) {
gallery.innerHTML =
`<div class="picker-loading">failed to load /data.json — ${err}</div>`;
return;
}
gallery.innerHTML = '';
const scenes = [];
const order = Object.entries(data);
let selectedId = null;
// Hidden form inputs the main <form> will submit.
const hidden = {
datasetId: document.getElementById('dataset_id'),
numPoints: document.getElementById('num_points'),
numTimesteps: document.getElementById('num_timesteps'),
jitterScale: document.getElementById('jitter_scale'),
};
const pickerDetails = document.getElementById('picker');
const summaryPath = document.getElementById('picker-summary-path');
const selectedPath = document.getElementById('selected-path');
const continueBtn = document.getElementById('continue-btn');
const vizToScene = new WeakMap();
const sizeObserver = new ResizeObserver((entries) => {
for (const entry of entries) {
const s = vizToScene.get(entry.target);
if (s) sizeScene(s);
}
});
order.forEach(([id, ds], i) => {
const card = document.createElement('div');
card.className = 'card';
card.dataset.id = id;
card.innerHTML = `
<div class="viz">
<span class="fig-label">Fig. 1.${i + 1}</span>
<span class="key-hint">[${i + 1}]</span>
<span class="controls-hint">drag · scroll</span>
</div>
<div class="card-body">
<div class="card-label">
<span class="dot"></span>
<span>${ds.name}</span>
</div>
<div class="card-path">${ds.path}</div>
<div class="card-desc">${ds.description}</div>
</div>
`;
gallery.appendChild(card);
const viz = card.querySelector('.viz');
const s = createScene(viz, ds);
buildTrajectories(s, MAX_FRAMES);
scenes.push(s);
vizToScene.set(viz, s);
sizeObserver.observe(viz);
s.controls.addEventListener('start', () => { s.controls.autoRotate = false; });
card.addEventListener('click', () => selectCard(id, card, ds));
});
function selectCard(id, card, ds) {
document.querySelectorAll('#gallery .card').forEach(c => c.classList.remove('selected'));
card.classList.add('selected');
selectedId = id;
selectedPath.textContent = ds.path;
hidden.datasetId.value = id;
updateContinue();
}
function updateContinue() {
continueBtn.disabled = !selectedId;
continueBtn.title = selectedId ? '' : 'pick a dataset first';
}
const slider = document.getElementById('n-slider');
const nValue = document.getElementById('n-value');
function applyN(n) {
nValue.textContent = n.toLocaleString();
hidden.numPoints.value = String(n);
for (const s of scenes) {
const cap = s.geometry.attributes.position.count;
s.geometry.setDrawRange(0, Math.min(n, cap));
}
}
slider.addEventListener('input', (e) => applyN(parseInt(e.target.value, 10)));
applyN(parseInt(slider.value, 10));
let jitterScale = 0;
const jInputs = document.querySelectorAll('input[name="j"]');
function applyJ(v) {
jitterScale = v;
hidden.jitterScale.value = String(v);
}
jInputs.forEach(input => {
input.addEventListener('change', (e) => applyJ(parseFloat(e.target.value)));
});
applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value));
// timesteps: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
// so toggling changes cycle length without rerolling. cycleStartMs is
// shared so all cards animate in lockstep.
const fInputs = document.querySelectorAll('input[name="f"]');
function applyF(n) {
hidden.numTimesteps.value = String(n);
const start = performance.now();
for (const s of scenes) {
s.numFrames = n;
s.cycleStartMs = start;
}
}
fInputs.forEach(input => {
input.addEventListener('change', (e) => applyF(parseInt(e.target.value, 10)));
});
applyF(parseInt(document.querySelector('input[name="f"]:checked').value, 10));
function selectByIndex(idx, { scroll = true } = {}) {
const entry = order[idx];
if (!entry) return;
const [id, ds] = entry;
const card = gallery.children[idx];
if (!card) return;
selectCard(id, card, ds);
if (scroll) card.scrollIntoView({ behavior: 'smooth', inline: 'nearest', block: 'nearest' });
}
document.addEventListener('keydown', (e) => {
if (!pickerDetails.open) return;
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
if (/^[1-9]$/.test(e.key)) {
selectByIndex(parseInt(e.key, 10) - 1);
return;
}
if (e.key === 'ArrowRight' || e.key === 'ArrowLeft') {
e.preventDefault();
const currentIdx = order.findIndex(([id]) => id === selectedId);
const n = order.length;
const nextIdx = e.key === 'ArrowRight'
? (currentIdx < 0 ? 0 : Math.min(currentIdx + 1, n - 1))
: (currentIdx < 0 ? n - 1 : Math.max(currentIdx - 1, 0));
selectByIndex(nextIdx);
}
});
continueBtn.addEventListener('click', () => {
if (!selectedId) return;
const ds = data[selectedId];
summaryPath.textContent = ds.path;
pickerDetails.open = false;
});
function tick() {
requestAnimationFrame(tick);
// When the picker is collapsed the canvases are display:none inside a
// closed <details>; rects are zero. Skip the per-frame work.
if (!pickerDetails.open) return;
const now = performance.now();
for (const s of scenes) {
const N = s.numFrames;
const n = s.basePositions.length;
const walkMs = N * s.snapshotMs;
const cycleMs = walkMs + s.holdMs;
const elapsed = ((now - s.cycleStartMs) % cycleMs + cycleMs) % cycleMs;
const total = n / 3;
const drawCount = s.geometry.drawRange.count;
const visibleN = Number.isFinite(drawCount) ? Math.min(drawCount, total) : total;
const limit = visibleN * 3;
const pos = s.geometry.attributes.position.array;
const base = s.basePositions;
if (elapsed >= walkMs) {
for (let i = 0; i < limit; i++) pos[i] = base[i];
} else {
const frameF = elapsed / s.snapshotMs;
const frameIdx = Math.floor(frameF);
const interpT = frameF - frameIdx;
const nextIdx = (frameIdx + 1) % N;
const aOff = frameIdx * n;
const bOff = nextIdx * n;
const tr = s.trajectories;
const scale = jitterScale;
const u = 1 - interpT;
for (let i = 0; i < limit; i++) {
pos[i] = base[i] + (tr[aOff + i] * u + tr[bOff + i] * interpT) * scale;
}
}
s.geometry.attributes.position.needsUpdate = true;
s.controls.update();
s.renderer.render(s.scene, s.camera);
}
}
tick();
// Reopening the picker after it's been closed: canvases may have been
// laid out at zero size while hidden. Re-measure on toggle.
pickerDetails.addEventListener('toggle', () => {
if (pickerDetails.open) {
for (const s of scenes) sizeScene(s);
}
});
}
main();

View File

@ -13,7 +13,6 @@ from __future__ import annotations
import importlib.util
import json
import os
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@ -22,13 +21,6 @@ from fastapi import FastAPI, Form, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from sklearn.datasets import (
make_blobs,
make_classification,
make_gaussian_quantiles,
make_s_curve,
make_swiss_roll,
)
# ---------------------------------------------------------------------------
@ -43,137 +35,11 @@ FIGS_DIR.mkdir(parents=True, exist_ok=True)
PREFECT_API = os.environ.get("PREFECT_API_URL", "http://localhost:4200/api")
DEPLOYMENT_NAME = "embedding-flow/embedding-flow"
# ---------------------------------------------------------------------------
# Dataset catalogue
# ---------------------------------------------------------------------------
# Metadata for the /data.json endpoint consumed by the dataset picker, and
# for server-side lookup when the picker posts its selection back. kwargs
# must carry n_features=3 for generators that aren't already 3-D, since
# they'll be forwarded verbatim to the Prefect flow's generator_kwargs.
DATASET_PREVIEW_N = 5000
DATASET_PREVIEW_SEED = 0
DATASET_META: Dict[str, Dict[str, Any]] = {
"s_curve": {
"name": "S-Curve",
"path": "sklearn.datasets.make_s_curve",
"kwargs": {},
"description": (
"A 2-D manifold warped into R³. Continuous label encodes position "
"along the curve — a good test of whether a reducer unrolls the "
"sheet without tearing."
),
"kind": "continuous",
},
"swiss_roll": {
"name": "Swiss Roll",
"path": "sklearn.datasets.make_swiss_roll",
"kwargs": {},
"description": (
"A rolled-up plane. The canonical hard case for linear methods: "
"PCA collapses the spiral, non-linear methods should recover the "
"unroll."
),
"kind": "continuous",
},
"swiss_roll_hole": {
"name": "Swiss Roll (hole)",
"path": "sklearn.datasets.make_swiss_roll",
"kwargs": {"hole": True},
"description": (
"Swiss roll with a rectangular hole punched through. Same manifold, "
"non-trivial topology — a faithful unroll should preserve the hole "
"rather than smearing it closed."
),
"kind": "continuous",
},
"blobs": {
"name": "Gaussian Blobs",
"path": "sklearn.datasets.make_blobs",
"kwargs": {"n_features": 3, "centers": 5, "cluster_std": 1.0},
"description": (
"Five isotropic Gaussian clusters in R³. Discrete class labels. "
"Tests whether a reducer preserves cluster separation when "
"projected to 2-D."
),
"kind": "categorical",
},
"gaussian_quantiles": {
"name": "Gaussian Quantiles",
"path": "sklearn.datasets.make_gaussian_quantiles",
"kwargs": {"n_features": 3, "n_classes": 4},
"description": (
"Concentric Gaussian shells in R³; class = which shell. Classes "
"are linearly inseparable by construction — PCA collapses them, "
"kernel and manifold methods have a chance."
),
"kind": "categorical",
},
"classification": {
"name": "Hypercube Clusters",
"path": "sklearn.datasets.make_classification",
"kwargs": {
"n_features": 3,
"n_informative": 3,
"n_redundant": 0,
"n_repeated": 0,
"n_classes": 4,
"n_clusters_per_class": 2,
"class_sep": 1.5,
},
"description": (
"Four classes, two sub-clusters each, placed at hypercube vertices "
"with informative noise. A denser discrete test than blobs — "
"within-class bimodality stresses cluster-preserving reducers."
),
"kind": "categorical",
},
}
@lru_cache(maxsize=1)
def _dataset_previews() -> Dict[str, Dict[str, Any]]:
"""Attach freshly-generated points+labels to the catalogue for the picker."""
N, SEED = DATASET_PREVIEW_N, DATASET_PREVIEW_SEED
s, sl = make_s_curve(n_samples=N, noise=0.03, random_state=SEED)
sr, srl = make_swiss_roll(n_samples=N, noise=0.15, random_state=SEED)
srh, srhl = make_swiss_roll(n_samples=N, noise=0.15, hole=True, random_state=SEED)
b, bl = make_blobs(
n_samples=N, n_features=3, centers=5, cluster_std=1.0, random_state=SEED
)
gq, gql = make_gaussian_quantiles(
n_samples=N, n_features=3, n_classes=4, random_state=SEED
)
cls, clsl = make_classification(
n_samples=N,
n_features=3,
n_informative=3,
n_redundant=0,
n_repeated=0,
n_classes=4,
n_clusters_per_class=2,
class_sep=1.5,
random_state=SEED,
)
samples = {
"s_curve": (s, sl),
"swiss_roll": (sr, srl),
"swiss_roll_hole": (srh, srhl),
"blobs": (b, bl),
"gaussian_quantiles": (gq, gql),
"classification": (cls, clsl),
}
out: Dict[str, Dict[str, Any]] = {}
for key, meta in DATASET_META.items():
pts, labels = samples[key]
out[key] = {
**meta,
"points": pts.tolist(),
"labels": labels.tolist(),
}
return out
GENERATOR_OPTIONS = [
("sklearn.datasets.make_s_curve", "make_s_curve"),
("sklearn.datasets.make_swiss_roll", "make_swiss_roll"),
("sklearn.datasets.make_blobs", "make_blobs"),
]
# ---------------------------------------------------------------------------
@ -361,14 +227,14 @@ def synthesize_output_paths(
generator_path: str,
embedder: str,
num_points: int,
num_timesteps: int,
num_snapshots: int,
jitter_scale: float,
seed: int,
) -> Tuple[str, str]:
gen = generator_path.split(".")[-1]
emb = embedder.split(".")[-1]
ref = f"{gen}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
embf = f"{gen}_{emb}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
ref = f"{gen}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
embf = f"{gen}_{emb}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
return ref, embf
@ -473,10 +339,7 @@ def _run_view(run: Dict[str, Any]) -> Dict[str, Any]:
params.get("generator_path", "sklearn.datasets.make_s_curve"),
params.get("embedder", "sklearn.decomposition.FactorAnalysis"),
int(params.get("num_points", 5000)),
# Fallback to the old num_snapshots key for runs dispatched
# before the T-rename, so historical figs still resolve after
# `rename 's/_S/_T/' figs/*.html`.
int(params.get("num_timesteps", params.get("num_snapshots", 48))),
int(params.get("num_snapshots", 48)),
float(params.get("jitter_scale", 0.01)),
int(params.get("seed", 42)),
)
@ -531,6 +394,7 @@ async def index(request: Request) -> HTMLResponse:
"reducers": reducers,
"default_reducer": default_reducer,
"default_spec": default_spec,
"generators": GENERATOR_OPTIONS,
"runs": views,
"deployment_id": dep_id,
"prefect_api": PREFECT_API,
@ -538,11 +402,6 @@ async def index(request: Request) -> HTMLResponse:
)
@app.get("/data.json")
async def data_json() -> JSONResponse:
return JSONResponse(_dataset_previews())
@app.get("/reducer-form", response_class=HTMLResponse)
async def reducer_form(request: Request, name: str) -> HTMLResponse:
spec = REDUCERS.get(name)
@ -577,39 +436,27 @@ async def submit(request: Request) -> HTMLResponse:
status_code=400,
)
# Dataset came from the picker via dataset_id; fall back to explicit
# generator_path / generator_kwargs if a client posts those directly.
dataset_id = data.get("dataset_id") or ""
if dataset_id and dataset_id in DATASET_META:
meta = DATASET_META[dataset_id]
generator_path = meta["path"]
generator_kwargs = dict(meta["kwargs"])
else:
generator_path = data.get("generator_path") or "sklearn.datasets.make_s_curve"
raw_kwargs = data.get("generator_kwargs") or ""
try:
generator_kwargs = json.loads(raw_kwargs) if raw_kwargs else {}
except json.JSONDecodeError as e:
return HTMLResponse(
f"<div class='flash err'>bad generator_kwargs JSON: {e}</div>",
status_code=400,
)
# Data params
try:
num_points = int(data.get("num_points", "5000") or 5000)
num_timesteps = int(data.get("num_timesteps", "48") or 48)
num_snapshots = int(data.get("num_snapshots", "48") or 48)
jitter_scale = float(data.get("jitter_scale", "0.01") or 0.01)
seed = int(data.get("seed", "42") or 42)
except ValueError as e:
return HTMLResponse(
f"<div class='flash err'>bad numeric input: {e}</div>", status_code=400
)
generator_path = data.get("generator_path") or "sklearn.datasets.make_s_curve"
embed_args = build_embed_args(reducer, data)
generator_kwargs: Dict[str, Any] = {}
if generator_path.endswith("make_blobs"):
generator_kwargs["n_features"] = 3
parameters: Dict[str, Any] = {
"num_points": num_points,
"num_timesteps": num_timesteps,
"num_snapshots": num_snapshots,
"jitter_scale": jitter_scale,
"seed": seed,
"generator_path": generator_path,
@ -636,7 +483,7 @@ async def submit(request: Request) -> HTMLResponse:
)
ref_file, emb_file = synthesize_output_paths(
generator_path, reducer, num_points, num_timesteps, jitter_scale, seed
generator_path, reducer, num_points, num_snapshots, jitter_scale, seed
)
RUN_OUTPUTS[run["id"]] = {"ref": ref_file, "embed": emb_file}

View File

@ -513,351 +513,3 @@ button.submit:disabled { background: var(--faint); border-color: var(--faint); c
}
.htmx-request .htmx-indicator { opacity: 1; }
.htmx-request.htmx-indicator { opacity: 1; }
/* ---------- dataset picker (§ 1) -------------------------------------- */
/* Collapsible <details> below the masthead; expands to a gallery of 3D
dataset previews, collapses to a one-line summary chip once confirmed. */
.dataset-picker {
--picker-panel: #f2eee4;
--picker-hair: #d8d3c6;
border-bottom: 1px solid var(--rule);
background: var(--page);
}
.dataset-picker > summary {
list-style: none;
cursor: pointer;
padding: 0.9rem 2.2rem;
display: flex;
justify-content: space-between;
align-items: center;
gap: 1rem;
user-select: none;
}
.dataset-picker > summary::-webkit-details-marker { display: none; }
.dataset-picker > summary:hover { background: var(--accent-tint); }
.dataset-picker .picker-meta {
display: flex;
align-items: baseline;
gap: 1rem;
flex-wrap: wrap;
min-width: 0;
}
.dataset-picker .section-number {
font-family: var(--mono);
font-size: 0.78rem;
font-weight: 600;
color: var(--accent);
letter-spacing: 0;
}
.dataset-picker .picker-title {
font-family: var(--serif);
font-size: 1rem;
color: var(--ink);
font-style: italic;
}
.dataset-picker .picker-selection {
font-size: 0.78rem;
color: var(--mute);
display: inline-flex;
align-items: baseline;
gap: 0.5rem;
}
.dataset-picker .picker-selection .lbl {
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--faint);
}
.dataset-picker .picker-selection code {
font-family: var(--mono);
font-size: 0.78rem;
color: var(--accent);
}
.dataset-picker .picker-toggle {
font-family: var(--mono);
font-size: 0.74rem;
color: var(--faint);
letter-spacing: 0;
}
.dataset-picker .picker-toggle::before { content: "[ edit ]"; }
.dataset-picker[open] .picker-toggle::before { content: "[ collapse ]"; color: var(--accent); }
.dataset-picker .picker-body {
padding: 0.4rem 2.2rem 1.6rem;
max-width: 1440px;
margin: 0 auto;
}
.dataset-picker .lede {
color: var(--mute);
font-family: var(--serif);
font-style: italic;
font-size: 0.88rem;
max-width: 62ch;
margin: 0.2rem 0 0.9rem;
}
.dataset-picker .lede kbd {
font-family: var(--mono);
font-size: 0.72rem;
background: var(--panel);
border: 1px solid var(--rule);
padding: 0 5px;
margin: 0 1px;
color: var(--ink);
}
.picker-controls {
display: grid;
grid-template-columns: auto 1fr auto;
align-items: center;
column-gap: 1rem;
row-gap: 0.55rem;
padding: 0.75rem 0;
border-top: 1px solid var(--rule);
border-bottom: 1px solid var(--rule);
margin-bottom: 1.1rem;
}
.picker-controls .ctl-label {
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--mute);
}
.picker-controls .ctl-value {
font-family: var(--mono);
font-size: 0.82rem;
color: var(--accent);
min-width: 3.5rem;
text-align: right;
font-variant-numeric: tabular-nums;
}
.picker-controls input[type="range"] {
width: 100%;
accent-color: var(--accent);
height: 4px;
}
.picker-controls .segmented {
grid-column: 2 / -1;
display: flex;
justify-content: space-between;
align-items: center;
}
.picker-controls .segmented label {
font-family: var(--mono);
font-size: 0.78rem;
color: var(--mute);
cursor: pointer;
padding: 3px 2px 4px;
border-bottom: 1px solid transparent;
transition: color 120ms ease, border-color 120ms ease;
user-select: none;
font-variant-numeric: tabular-nums;
position: relative;
}
.picker-controls .segmented label:hover { color: var(--ink); }
.picker-controls .segmented label:has(input:checked) {
color: var(--accent);
border-bottom-color: var(--accent);
}
.picker-controls .segmented label:has(input:focus-visible) {
outline: 1px solid var(--accent);
outline-offset: 2px;
}
.picker-controls .segmented input[type="radio"] {
position: absolute;
opacity: 0;
width: 1px; height: 1px;
margin: 0;
}
.dataset-picker .gallery {
display: flex;
gap: 1.15rem;
overflow-x: auto;
overflow-y: hidden;
scroll-snap-type: x mandatory;
scroll-behavior: smooth;
-webkit-overflow-scrolling: touch;
padding: 2px 2px 0.85rem;
margin: 0 -2px 1.1rem;
scrollbar-width: thin;
scrollbar-color: var(--rule-2) transparent;
}
.dataset-picker .gallery::-webkit-scrollbar { height: 6px; }
.dataset-picker .gallery::-webkit-scrollbar-track { background: transparent; }
.dataset-picker .gallery::-webkit-scrollbar-thumb {
background: var(--rule-2);
border-radius: 3px;
}
.dataset-picker .gallery::-webkit-scrollbar-thumb:hover { background: var(--mute); }
.dataset-picker .picker-loading {
padding: 3rem 0;
text-align: center;
color: var(--mute);
font-family: var(--mono);
font-size: 0.82rem;
}
.dataset-picker .card {
flex: 0 0 240px;
/* Prevent long .card-path from forcing the card wider than its flex-basis. */
min-width: 0;
scroll-snap-align: start;
border: 1px solid var(--rule);
background: var(--picker-panel);
cursor: pointer;
display: flex;
flex-direction: column;
transition: border-color 120ms ease, box-shadow 120ms ease;
}
.dataset-picker .card:hover { border-color: var(--rule-2); }
.dataset-picker .card.selected {
border-color: var(--accent);
box-shadow: 0 0 0 1px var(--accent);
}
.dataset-picker .viz {
aspect-ratio: 1 / 1;
position: relative;
overflow: hidden;
}
.dataset-picker .viz canvas {
position: absolute;
inset: 0;
display: block;
width: 100% !important;
height: 100% !important;
}
.dataset-picker .fig-label {
position: absolute;
top: 8px;
left: 10px;
font-family: var(--mono);
font-size: 0.62rem;
color: var(--mute);
letter-spacing: 0.05em;
text-transform: uppercase;
pointer-events: none;
}
.dataset-picker .key-hint {
position: absolute;
top: 6px;
right: 8px;
font-family: var(--mono);
font-size: 0.7rem;
color: var(--mute);
background: rgba(250, 250, 247, 0.85);
border: 1px solid var(--rule);
padding: 0 5px;
pointer-events: none;
}
.dataset-picker .card.selected .key-hint {
color: var(--accent);
border-color: var(--accent);
}
.dataset-picker .controls-hint {
position: absolute;
bottom: 6px;
right: 8px;
font-family: var(--mono);
font-size: 0.62rem;
color: var(--mute);
opacity: 0;
transition: opacity 150ms ease;
pointer-events: none;
}
.dataset-picker .card:hover .controls-hint { opacity: 0.75; }
.dataset-picker .card-body {
padding: 0.75rem 0.9rem 0.9rem;
border-top: 1px solid var(--rule);
background: var(--page);
flex: 1;
}
.dataset-picker .card-label {
font-family: var(--sans);
font-weight: 500;
font-size: 0.9rem;
margin-bottom: 2px;
display: flex;
align-items: baseline;
gap: 0.5rem;
}
.dataset-picker .card-label .dot {
display: inline-block;
width: 7px; height: 7px;
border-radius: 50%;
background: transparent;
border: 1px solid var(--rule-2);
}
.dataset-picker .card.selected .card-label .dot {
background: var(--accent);
border-color: var(--accent);
}
.dataset-picker .card-path {
font-family: var(--mono);
font-size: 0.68rem;
color: var(--mute);
margin-bottom: 0.4rem;
word-break: break-all;
}
.dataset-picker .card-desc {
font-family: var(--serif);
font-size: 0.82rem;
color: #4a4a4a;
line-height: 1.5;
}
.picker-footer {
display: flex;
align-items: center;
justify-content: space-between;
padding-top: 0.85rem;
border-top: 1px solid var(--rule);
gap: 1.2rem;
flex-wrap: wrap;
}
.picker-footer .selection {
font-size: 0.82rem;
color: var(--mute);
display: inline-flex;
align-items: baseline;
gap: 0.5rem;
}
.picker-footer .selection .lbl {
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
}
.picker-footer .selection code {
font-family: var(--mono);
font-size: 0.82rem;
color: var(--accent);
}
.picker-footer .continue {
background: var(--accent);
color: var(--page);
border: 1px solid var(--accent);
padding: 0.45rem 1rem;
font-family: var(--sans);
font-size: 0.82rem;
font-weight: 600;
letter-spacing: 0.04em;
cursor: pointer;
border-radius: 1px;
transition: background 120ms ease;
}
.picker-footer .continue:not(:disabled):hover { background: #143642; }
.picker-footer .continue:disabled {
background: var(--faint);
border-color: var(--faint);
color: var(--page);
cursor: not-allowed;
}
@media (max-width: 940px) {
.dataset-picker > summary { padding: 0.9rem 1.2rem; }
.dataset-picker .picker-body { padding: 0.4rem 1.2rem 1.4rem; }
}

View File

@ -25,7 +25,7 @@
{% if r.params %}
<div class="paramline">
<span><span class="k">N</span>&nbsp;{{ r.params.get('num_points', '?') }}</span>
<span><span class="k">T</span>&nbsp;{{ r.params.get('num_timesteps', '?') }}</span>
<span><span class="k">S</span>&nbsp;{{ r.params.get('num_snapshots', '?') }}</span>
<span><span class="k">J</span>&nbsp;{{ r.params.get('jitter_scale', '?') }}</span>
<span><span class="k">s</span>&nbsp;{{ r.params.get('seed', '?') }}</span>
{% set ea = r.params.get('embed_args') or {} %}

View File

@ -6,14 +6,6 @@
<title>embedding notebook — web1</title>
<link rel="stylesheet" href="/static/style.css" />
<script src="https://unpkg.com/htmx.org@2.0.4"></script>
<script type="importmap">
{
"imports": {
"three": "https://unpkg.com/three@0.160.0/build/three.module.js",
"three/addons/": "https://unpkg.com/three@0.160.0/examples/jsm/"
}
}
</script>
<link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3E%3Ccircle cx='8' cy='8' r='3' fill='%231f4e5f'/%3E%3C/svg%3E" />
</head>
<body>
@ -29,59 +21,6 @@
</div>
</header>
<details class="dataset-picker" id="picker" open>
<summary>
<span class="picker-meta">
<span class="section-number">§ 1</span>
<span class="picker-title">input dataset</span>
<span class="picker-selection">
<span class="lbl">generator</span>
<code id="picker-summary-path"></code>
</span>
</span>
<span class="picker-toggle" aria-hidden="true"></span>
</summary>
<div class="picker-body">
<p class="lede">
Six candidate generators for the embedding pipeline. Drag to rotate, scroll to zoom,
<kbd></kbd>&nbsp;<kbd></kbd> or <kbd>1</kbd>&thinsp;&hellip;&thinsp;<kbd>6</kbd> to select.
</p>
<div class="picker-controls">
<label class="ctl-label" for="n-slider">n samples</label>
<input type="range" id="n-slider" min="100" max="5000" step="100" value="500">
<span class="ctl-value" id="n-value">500</span>
<span class="ctl-label">noise σ</span>
<div class="segmented" role="radiogroup" aria-label="noise σ">
<label><input type="radio" name="j" value="0.001"><span>0.001</span></label>
<label><input type="radio" name="j" value="0.005" checked><span>0.005</span></label>
<label><input type="radio" name="j" value="0.01"><span>0.010</span></label>
</div>
<span class="ctl-label">timesteps</span>
<div class="segmented" role="radiogroup" aria-label="number of timesteps">
<label><input type="radio" name="f" value="12"><span>12</span></label>
<label><input type="radio" name="f" value="24" checked><span>24</span></label>
<label><input type="radio" name="f" value="48"><span>48</span></label>
</div>
</div>
<div class="gallery" id="gallery">
<div class="picker-loading">loading samples&hellip;</div>
</div>
<div class="picker-footer">
<div class="selection">
<span class="lbl">generator</span>
<code id="selected-path"></code>
</div>
<button type="button" class="continue" id="continue-btn" disabled>Continue &rarr;</button>
</div>
</div>
</details>
<main>
<!-- ==================== LEFT: parameter notebook ==================== -->
@ -95,17 +34,10 @@
hx-indicator="#busy"
>
<!-- Picker-driven hidden fields. Values are written by dataset-picker.js. -->
<input type="hidden" name="dataset_id" id="dataset_id" value="" />
<input type="hidden" name="num_points" id="num_points" value="500" />
<input type="hidden" name="num_timesteps" id="num_timesteps" value="24" />
<input type="hidden" name="jitter_scale" id="jitter_scale" value="0.005" />
<input type="hidden" name="seed" id="seed" value="42" />
<!-- §2 reducer -->
<!-- §1 reducer -->
<div class="section">
<div class="section-label">
<span>§ 2 &nbsp; reducer</span><span class="ordinal">method</span>
<span>§ 1 &nbsp; reducer</span><span class="ordinal">method</span>
</div>
<p class="lead">Dimensionality reduction applied to each snapshot. Only reducers whose Python package is importable are shown.</p>
@ -131,16 +63,53 @@
</ul>
</div>
<!-- §3 reducer params -->
<!-- §2 reducer params -->
<div class="section">
<div class="section-label">
<span>§ 3 &nbsp; parameters</span><span class="ordinal">kwargs</span>
<span>§ 2 &nbsp; parameters</span><span class="ordinal">kwargs</span>
</div>
<div id="reducer-params">
{% include "_reducer_form.html" with context %}
</div>
</div>
<!-- §3 data -->
<div class="section">
<div class="section-label">
<span>§ 3 &nbsp; data &amp; drift</span><span class="ordinal">sampling</span>
</div>
<div class="form-grid">
<label for="generator_path">
generator
<span class="hint">data-generating surface</span>
</label>
<select name="generator_path" id="generator_path">
{% for path, short in generators %}
<option value="{{ path }}" {% if path == 'sklearn.datasets.make_s_curve' %}selected{% endif %}>{{ short }}</option>
{% endfor %}
</select>
<label for="num_points">n<sub>points</sub></label>
<input type="number" id="num_points" name="num_points" value="5000" min="100" step="100" />
<label for="num_snapshots">n<sub>snapshots</sub></label>
<input type="number" id="num_snapshots" name="num_snapshots" value="48" min="2" step="1" />
<label for="jitter_scale">
jitter scale
<span class="hint">std of per-step Gaussian drift</span>
</label>
<input type="number" id="jitter_scale" name="jitter_scale" value="0.01" step="0.001" min="0" />
<label for="seed">
jitter seed
<span class="hint">seeds only the drift simulation — the embedder's seed is in §2 (advanced).</span>
</label>
<input type="number" id="seed" name="seed" value="42" step="1" />
</div>
</div>
<div class="actions">
<button type="submit" class="submit">submit run</button>
<span id="busy" class="htmx-indicator">dispatching&hellip;</span>
@ -176,11 +145,9 @@
</main>
<footer class="colophon">
<span><span class="k">web</span> · scientific instrument · port 8001</span>
<span><span class="k">web1</span> · scientific instrument · port 8001</span>
<span>fastapi · htmx · no build step</span>
</footer>
<script type="module" src="/static/dataset-picker.js"></script>
</body>
</html>

View File

@ -64,9 +64,9 @@ def generate_initial_frame_task(
@task(cache_policy=INPUTS, cache_expiration=timedelta(hours=12))
def generate_snapshots_task(
initial_df: pd.DataFrame, num_timesteps: int, jitter_scale: float, seed: int = 42
initial_df: pd.DataFrame, num_snapshots: int, jitter_scale: float, seed: int = 42
) -> List[pd.DataFrame]:
return E.generate_jittered_snapshots(initial_df, num_timesteps, jitter_scale, seed)
return E.generate_jittered_snapshots(initial_df, num_snapshots, jitter_scale, seed)
@task(
@ -138,7 +138,7 @@ _DEFAULT_EMBED_ARGS: Dict[str, Any] = {"n_components": 2, "random_state": 30}
@flow(task_runner=RayTaskRunner(init_kwargs={"num_cpus": 4}))
def embedding_flow(
num_points: int = 5000,
num_timesteps: int = 48,
num_snapshots: int = 48,
jitter_scale: float = 0.01,
seed: int = 42,
generator_path: str = "sklearn.datasets.make_s_curve",
@ -166,10 +166,10 @@ def embedding_flow(
Path(output_dir).mkdir(parents=True, exist_ok=True)
_generator = generator_path.split(".")[-1]
output_ref: str = (
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
f"{output_dir.strip('/')}/{_generator}_Reference_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
)
output_embed: str = (
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_T{num_timesteps}_J{jitter_scale}_s{seed}.html"
f"{output_dir.strip('/')}/{_generator}_{embedder.split('.')[-1]}_N{num_points}_S{num_snapshots}_J{jitter_scale}_s{seed}.html"
)
title_ref = f"Reference: {_generator}, N={num_points} with {jitter_scale} noise"
title_embed = f"Embedding: {embedder.split('.')[-1]} on {_generator}, N={num_points} with {jitter_scale} noise"
@ -186,27 +186,27 @@ def embedding_flow(
# Generate snapshots
snapshots = generate_snapshots_task.submit(
initial_df=initial_frame.result(),
num_timesteps=num_timesteps,
num_snapshots=num_snapshots,
jitter_scale=jitter_scale,
seed=seed,
)
snapshot_list = snapshots.result()
# One date per timestep (monthly, starting at 2000-01-01 for cosmetic reasons)
# Generate corresponding dates (assuming daily snapshots for simplicity)
dates = [
f"{year}-{month:02d}-01"
for year in range(2000, 2001 + math.floor(num_timesteps / 12))
for year in range(2000, 2001 + math.floor(num_snapshots / 12))
for month in range(1, 13)
][:num_timesteps]
][:num_snapshots]
# Apply embeddings in parallel using Prefect's mapping
embeddings = create_embedding.map(
snapshot=snapshot_list,
time_idx=dates,
embed_columns=[embed_columns] * num_timesteps,
embedder=[embedder] * num_timesteps,
embed_args=[merged_embed_args] * num_timesteps,
id_column=[id_column] * num_timesteps,
embed_columns=[embed_columns] * num_snapshots,
embedder=[embedder] * num_snapshots,
embed_args=[merged_embed_args] * num_snapshots,
id_column=[id_column] * num_snapshots,
)
# Collect all embeddings

View File

@ -425,18 +425,18 @@ def generate_initial_frame(
def generate_jittered_snapshots(
initial_df: pd.DataFrame,
num_timesteps: int,
num_snapshots: int,
jitter_scale: float = 0.1,
seed: int = 42,
) -> List[pd.DataFrame]:
"""
Generate one jittered snapshot per timestep, with random point add/remove.
Generate snapshots by applying random jitter to the initial frame and randomly adding/removing points.
Parameters:
- initial_df: pd.DataFrame
The initial DataFrame to apply jitter.
- num_timesteps: int
Number of timesteps (one snapshot produced per timestep).
- num_snapshots: int
Number of snapshots to generate.
- jitter_scale: float
Standard deviation of the Gaussian noise added for jitter.
- seed: int
@ -450,7 +450,7 @@ def generate_jittered_snapshots(
snapshots = []
current_df = initial_df.copy()
for i in range(num_timesteps):
for i in range(num_snapshots):
# Apply jitter (set to 0 for testing)
jitter = np.random.normal(
loc=0.0,

View File

@ -1,8 +1,14 @@
run:
.venv/bin/python flows/embedding_flow.py
web:
.venv/bin/python -m uvicorn app.web.main:app --host 0.0.0.0 --port 8001 --reload
web1:
.venv/bin/python -m uvicorn app.web1.main:app --host 0.0.0.0 --port 8001 --reload
web2:
.venv/bin/python -m uvicorn app.web2.main:app --host 0.0.0.0 --port 8002 --reload
web3:
.venv/bin/python -m uvicorn app.web3.main:app --host 0.0.0.0 --port 8003 --reload
demo:
.venv/bin/python -m uvicorn app.demo.main:app --host 0.0.0.0 --port 8010 --reload