migrate new data picker

This commit is contained in:
Michael Pilosov 2026-04-21 19:46:31 -06:00
parent ca0ad9fd2d
commit afc9b5b2f2
4 changed files with 952 additions and 54 deletions

View File

@ -13,6 +13,7 @@ from __future__ import annotations
import importlib.util import importlib.util
import json import json
import os import os
from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
@ -21,6 +22,13 @@ from fastapi import FastAPI, Form, Request
from fastapi.responses import HTMLResponse, JSONResponse from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from sklearn.datasets import (
make_blobs,
make_classification,
make_gaussian_quantiles,
make_s_curve,
make_swiss_roll,
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -35,11 +43,137 @@ FIGS_DIR.mkdir(parents=True, exist_ok=True)
PREFECT_API = os.environ.get("PREFECT_API_URL", "http://localhost:4200/api") PREFECT_API = os.environ.get("PREFECT_API_URL", "http://localhost:4200/api")
DEPLOYMENT_NAME = "embedding-flow/embedding-flow" DEPLOYMENT_NAME = "embedding-flow/embedding-flow"
GENERATOR_OPTIONS = [
("sklearn.datasets.make_s_curve", "make_s_curve"), # ---------------------------------------------------------------------------
("sklearn.datasets.make_swiss_roll", "make_swiss_roll"), # Dataset catalogue
("sklearn.datasets.make_blobs", "make_blobs"), # ---------------------------------------------------------------------------
] # Metadata for the /data.json endpoint consumed by the dataset picker, and
# for server-side lookup when the picker posts its selection back. kwargs
# must carry n_features=3 for generators that aren't already 3-D, since
# they'll be forwarded verbatim to the Prefect flow's generator_kwargs.
DATASET_PREVIEW_N = 5000
DATASET_PREVIEW_SEED = 0
DATASET_META: Dict[str, Dict[str, Any]] = {
"s_curve": {
"name": "S-Curve",
"path": "sklearn.datasets.make_s_curve",
"kwargs": {},
"description": (
"A 2-D manifold warped into R³. Continuous label encodes position "
"along the curve — a good test of whether a reducer unrolls the "
"sheet without tearing."
),
"kind": "continuous",
},
"swiss_roll": {
"name": "Swiss Roll",
"path": "sklearn.datasets.make_swiss_roll",
"kwargs": {},
"description": (
"A rolled-up plane. The canonical hard case for linear methods: "
"PCA collapses the spiral, non-linear methods should recover the "
"unroll."
),
"kind": "continuous",
},
"swiss_roll_hole": {
"name": "Swiss Roll (hole)",
"path": "sklearn.datasets.make_swiss_roll",
"kwargs": {"hole": True},
"description": (
"Swiss roll with a rectangular hole punched through. Same manifold, "
"non-trivial topology — a faithful unroll should preserve the hole "
"rather than smearing it closed."
),
"kind": "continuous",
},
"blobs": {
"name": "Gaussian Blobs",
"path": "sklearn.datasets.make_blobs",
"kwargs": {"n_features": 3, "centers": 5, "cluster_std": 1.0},
"description": (
"Five isotropic Gaussian clusters in R³. Discrete class labels. "
"Tests whether a reducer preserves cluster separation when "
"projected to 2-D."
),
"kind": "categorical",
},
"gaussian_quantiles": {
"name": "Gaussian Quantiles",
"path": "sklearn.datasets.make_gaussian_quantiles",
"kwargs": {"n_features": 3, "n_classes": 4},
"description": (
"Concentric Gaussian shells in R³; class = which shell. Classes "
"are linearly inseparable by construction — PCA collapses them, "
"kernel and manifold methods have a chance."
),
"kind": "categorical",
},
"classification": {
"name": "Hypercube Clusters",
"path": "sklearn.datasets.make_classification",
"kwargs": {
"n_features": 3,
"n_informative": 3,
"n_redundant": 0,
"n_repeated": 0,
"n_classes": 4,
"n_clusters_per_class": 2,
"class_sep": 1.5,
},
"description": (
"Four classes, two sub-clusters each, placed at hypercube vertices "
"with informative noise. A denser discrete test than blobs — "
"within-class bimodality stresses cluster-preserving reducers."
),
"kind": "categorical",
},
}
@lru_cache(maxsize=1)
def _dataset_previews() -> Dict[str, Dict[str, Any]]:
"""Attach freshly-generated points+labels to the catalogue for the picker."""
N, SEED = DATASET_PREVIEW_N, DATASET_PREVIEW_SEED
s, sl = make_s_curve(n_samples=N, noise=0.03, random_state=SEED)
sr, srl = make_swiss_roll(n_samples=N, noise=0.15, random_state=SEED)
srh, srhl = make_swiss_roll(n_samples=N, noise=0.15, hole=True, random_state=SEED)
b, bl = make_blobs(
n_samples=N, n_features=3, centers=5, cluster_std=1.0, random_state=SEED
)
gq, gql = make_gaussian_quantiles(
n_samples=N, n_features=3, n_classes=4, random_state=SEED
)
cls, clsl = make_classification(
n_samples=N,
n_features=3,
n_informative=3,
n_redundant=0,
n_repeated=0,
n_classes=4,
n_clusters_per_class=2,
class_sep=1.5,
random_state=SEED,
)
samples = {
"s_curve": (s, sl),
"swiss_roll": (sr, srl),
"swiss_roll_hole": (srh, srhl),
"blobs": (b, bl),
"gaussian_quantiles": (gq, gql),
"classification": (cls, clsl),
}
out: Dict[str, Dict[str, Any]] = {}
for key, meta in DATASET_META.items():
pts, labels = samples[key]
out[key] = {
**meta,
"points": pts.tolist(),
"labels": labels.tolist(),
}
return out
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -394,7 +528,6 @@ async def index(request: Request) -> HTMLResponse:
"reducers": reducers, "reducers": reducers,
"default_reducer": default_reducer, "default_reducer": default_reducer,
"default_spec": default_spec, "default_spec": default_spec,
"generators": GENERATOR_OPTIONS,
"runs": views, "runs": views,
"deployment_id": dep_id, "deployment_id": dep_id,
"prefect_api": PREFECT_API, "prefect_api": PREFECT_API,
@ -402,6 +535,11 @@ async def index(request: Request) -> HTMLResponse:
) )
@app.get("/data.json")
async def data_json() -> JSONResponse:
return JSONResponse(_dataset_previews())
@app.get("/reducer-form", response_class=HTMLResponse) @app.get("/reducer-form", response_class=HTMLResponse)
async def reducer_form(request: Request, name: str) -> HTMLResponse: async def reducer_form(request: Request, name: str) -> HTMLResponse:
spec = REDUCERS.get(name) spec = REDUCERS.get(name)
@ -436,7 +574,24 @@ async def submit(request: Request) -> HTMLResponse:
status_code=400, status_code=400,
) )
# Data params # Dataset came from the picker via dataset_id; fall back to explicit
# generator_path / generator_kwargs if a client posts those directly.
dataset_id = data.get("dataset_id") or ""
if dataset_id and dataset_id in DATASET_META:
meta = DATASET_META[dataset_id]
generator_path = meta["path"]
generator_kwargs = dict(meta["kwargs"])
else:
generator_path = data.get("generator_path") or "sklearn.datasets.make_s_curve"
raw_kwargs = data.get("generator_kwargs") or ""
try:
generator_kwargs = json.loads(raw_kwargs) if raw_kwargs else {}
except json.JSONDecodeError as e:
return HTMLResponse(
f"<div class='flash err'>bad generator_kwargs JSON: {e}</div>",
status_code=400,
)
try: try:
num_points = int(data.get("num_points", "5000") or 5000) num_points = int(data.get("num_points", "5000") or 5000)
num_snapshots = int(data.get("num_snapshots", "48") or 48) num_snapshots = int(data.get("num_snapshots", "48") or 48)
@ -446,14 +601,9 @@ async def submit(request: Request) -> HTMLResponse:
return HTMLResponse( return HTMLResponse(
f"<div class='flash err'>bad numeric input: {e}</div>", status_code=400 f"<div class='flash err'>bad numeric input: {e}</div>", status_code=400
) )
generator_path = data.get("generator_path") or "sklearn.datasets.make_s_curve"
embed_args = build_embed_args(reducer, data) embed_args = build_embed_args(reducer, data)
generator_kwargs: Dict[str, Any] = {}
if generator_path.endswith("make_blobs"):
generator_kwargs["n_features"] = 3
parameters: Dict[str, Any] = { parameters: Dict[str, Any] = {
"num_points": num_points, "num_points": num_points,
"num_snapshots": num_snapshots, "num_snapshots": num_snapshots,

View File

@ -0,0 +1,367 @@
// Dataset picker — ported from app/demo/index.html. Renders the six sklearn
// previews into cards, streams jittered random walks, and writes the current
// selection into hidden form inputs so the main <form> can submit it to the
// Prefect flow.
import * as THREE from 'three';
import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
// Trajectories are precomputed at the max cycle length. Toggling n-frames
// truncates (12 is a prefix of 24 is a prefix of 48) so the same walk is
// reused — no reroll on toggle, and the per-frame pulse stays consistent.
const MAX_FRAMES = 48;
const CATEGORICAL_HEX = [
'#1f4e5f', '#c97b3f', '#8b5a9f', '#5a8560',
'#c74a5e', '#6b7d8f', '#b89f51', '#4a6fa5',
];
const CATEGORICAL = CATEGORICAL_HEX.map(h => new THREE.Color(h));
function rampContinuous(t) {
const hue = (1 - t) * 215 + t * 28;
const sat = 0.62;
const lit = 0.50 + (t - 0.5) * 0.08;
return new THREE.Color().setHSL(hue / 360, sat, lit);
}
function normalize(points) {
const n = points.length;
let mx = 0, my = 0, mz = 0;
for (const p of points) { mx += p[0]; my += p[1]; mz += p[2]; }
mx /= n; my /= n; mz /= n;
// p95 of per-point max-coord magnitude — robust to distribution tails
// (gaussian_quantiles / classification otherwise shrink to a fraction of
// the viewport under pure max-abs normalization).
const perPoint = new Float64Array(n);
for (let i = 0; i < n; i++) {
const p = points[i];
const a = Math.abs(p[0] - mx);
const b = Math.abs(p[1] - my);
const c = Math.abs(p[2] - mz);
perPoint[i] = a > b ? (a > c ? a : c) : (b > c ? b : c);
}
const sorted = Array.from(perPoint).sort((a, b) => a - b);
const scale = Math.max(sorted[Math.floor(n * 0.95)], 1e-9);
const out = new Float32Array(n * 3);
for (let i = 0; i < n; i++) {
out[i*3] = (points[i][0] - mx) / scale;
out[i*3+1] = (points[i][1] - my) / scale;
out[i*3+2] = (points[i][2] - mz) / scale;
}
return { positions: out };
}
function buildColors(labels, kind) {
const n = labels.length;
const colors = new Float32Array(n * 3);
if (kind === 'categorical') {
for (let i = 0; i < n; i++) {
const c = CATEGORICAL[labels[i] % CATEGORICAL.length];
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
}
} else {
let lo = Infinity, hi = -Infinity;
for (const v of labels) { if (v < lo) lo = v; if (v > hi) hi = v; }
const range = (hi - lo) || 1;
for (let i = 0; i < n; i++) {
const c = rampContinuous((labels[i] - lo) / range);
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
}
}
return colors;
}
function createScene(container, dataset) {
const { positions: basePositions } = normalize(dataset.points);
const colors = buildColors(dataset.labels, dataset.kind);
const positions = new Float32Array(basePositions);
const geometry = new THREE.BufferGeometry();
geometry.setAttribute('position', new THREE.BufferAttribute(positions, 3));
geometry.setAttribute('color', new THREE.BufferAttribute(colors, 3));
const material = new THREE.PointsMaterial({
size: 2.1,
sizeAttenuation: false,
vertexColors: true,
transparent: true,
opacity: 0.92,
});
const scene = new THREE.Scene();
scene.background = new THREE.Color(0xf2eee4);
scene.add(new THREE.Points(geometry, material));
const camera = new THREE.PerspectiveCamera(42, 1, 0.1, 100);
camera.position.set(2.6, 1.9, 2.6);
camera.lookAt(0, 0, 0);
const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: false });
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
container.appendChild(renderer.domElement);
const controls = new OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.08;
controls.enablePan = false;
controls.autoRotate = true;
controls.autoRotateSpeed = 0.55;
controls.minDistance = 1.5;
controls.maxDistance = 6;
return {
scene, camera, renderer, controls, container, geometry,
basePositions,
trajectories: null,
numFrames: 0,
snapshotMs: 1000 / 12,
// holdMs pads the end of each cycle at rest, so frame-0-as-rest is
// actually visible (otherwise it's zero-duration).
holdMs: 200,
// Shared across scenes — applyF() resets all of them together so the
// three previews stay in lockstep through n-frames toggles.
cycleStartMs: 0,
};
}
function buildTrajectories(s, numFrames) {
const n = s.basePositions.length;
const buf = new Float32Array(numFrames * n);
for (let f = 1; f < numFrames; f++) {
const prev = (f - 1) * n;
const cur = f * n;
for (let j = 0; j < n; j += 2) {
const u1 = Math.random() || 1e-12;
const u2 = Math.random();
const mag = Math.sqrt(-2.0 * Math.log(u1));
buf[cur + j] = buf[prev + j] + mag * Math.cos(2 * Math.PI * u2);
if (j + 1 < n) buf[cur + j + 1] = buf[prev + j + 1] + mag * Math.sin(2 * Math.PI * u2);
}
}
s.trajectories = buf;
s.numFrames = numFrames;
}
function sizeScene(s) {
const rect = s.container.getBoundingClientRect();
const size = Math.max(1, Math.floor(rect.width));
s.renderer.setSize(size, size);
s.camera.aspect = 1;
s.camera.updateProjectionMatrix();
}
async function main() {
const gallery = document.getElementById('gallery');
let data;
try {
const res = await fetch('/data.json');
data = await res.json();
} catch (err) {
gallery.innerHTML =
`<div class="picker-loading">failed to load /data.json — ${err}</div>`;
return;
}
gallery.innerHTML = '';
const scenes = [];
const order = Object.entries(data);
let selectedId = null;
// Hidden form inputs the main <form> will submit.
const hidden = {
datasetId: document.getElementById('dataset_id'),
numPoints: document.getElementById('num_points'),
numSnapshots: document.getElementById('num_snapshots'),
jitterScale: document.getElementById('jitter_scale'),
};
const pickerDetails = document.getElementById('picker');
const summaryPath = document.getElementById('picker-summary-path');
const selectedPath = document.getElementById('selected-path');
const continueBtn = document.getElementById('continue-btn');
const vizToScene = new WeakMap();
const sizeObserver = new ResizeObserver((entries) => {
for (const entry of entries) {
const s = vizToScene.get(entry.target);
if (s) sizeScene(s);
}
});
order.forEach(([id, ds], i) => {
const card = document.createElement('div');
card.className = 'card';
card.dataset.id = id;
card.innerHTML = `
<div class="viz">
<span class="fig-label">Fig. 1.${i + 1}</span>
<span class="key-hint">[${i + 1}]</span>
<span class="controls-hint">drag · scroll</span>
</div>
<div class="card-body">
<div class="card-label">
<span class="dot"></span>
<span>${ds.name}</span>
</div>
<div class="card-path">${ds.path}</div>
<div class="card-desc">${ds.description}</div>
</div>
`;
gallery.appendChild(card);
const viz = card.querySelector('.viz');
const s = createScene(viz, ds);
buildTrajectories(s, MAX_FRAMES);
scenes.push(s);
vizToScene.set(viz, s);
sizeObserver.observe(viz);
s.controls.addEventListener('start', () => { s.controls.autoRotate = false; });
card.addEventListener('click', () => selectCard(id, card, ds));
});
function selectCard(id, card, ds) {
document.querySelectorAll('#gallery .card').forEach(c => c.classList.remove('selected'));
card.classList.add('selected');
selectedId = id;
selectedPath.textContent = ds.path;
hidden.datasetId.value = id;
updateContinue();
}
function updateContinue() {
continueBtn.disabled = !selectedId;
continueBtn.title = selectedId ? '' : 'pick a dataset first';
}
const slider = document.getElementById('n-slider');
const nValue = document.getElementById('n-value');
function applyN(n) {
nValue.textContent = n.toLocaleString();
hidden.numPoints.value = String(n);
for (const s of scenes) {
const cap = s.geometry.attributes.position.count;
s.geometry.setDrawRange(0, Math.min(n, cap));
}
}
slider.addEventListener('input', (e) => applyN(parseInt(e.target.value, 10)));
applyN(parseInt(slider.value, 10));
let jitterScale = 0;
const jInputs = document.querySelectorAll('input[name="j"]');
function applyJ(v) {
jitterScale = v;
hidden.jitterScale.value = String(v);
}
jInputs.forEach(input => {
input.addEventListener('change', (e) => applyJ(parseFloat(e.target.value)));
});
applyJ(parseFloat(document.querySelector('input[name="j"]:checked').value));
// n frames: truncates the precomputed walk. Prefix-stable (12 ⊂ 24 ⊂ 48),
// so toggling changes cycle length without rerolling. cycleStartMs is
// shared so all cards animate in lockstep.
const fInputs = document.querySelectorAll('input[name="f"]');
function applyF(n) {
hidden.numSnapshots.value = String(n);
const start = performance.now();
for (const s of scenes) {
s.numFrames = n;
s.cycleStartMs = start;
}
}
fInputs.forEach(input => {
input.addEventListener('change', (e) => applyF(parseInt(e.target.value, 10)));
});
applyF(parseInt(document.querySelector('input[name="f"]:checked').value, 10));
function selectByIndex(idx, { scroll = true } = {}) {
const entry = order[idx];
if (!entry) return;
const [id, ds] = entry;
const card = gallery.children[idx];
if (!card) return;
selectCard(id, card, ds);
if (scroll) card.scrollIntoView({ behavior: 'smooth', inline: 'nearest', block: 'nearest' });
}
document.addEventListener('keydown', (e) => {
if (!pickerDetails.open) return;
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
if (/^[1-9]$/.test(e.key)) {
selectByIndex(parseInt(e.key, 10) - 1);
return;
}
if (e.key === 'ArrowRight' || e.key === 'ArrowLeft') {
e.preventDefault();
const currentIdx = order.findIndex(([id]) => id === selectedId);
const n = order.length;
const nextIdx = e.key === 'ArrowRight'
? (currentIdx < 0 ? 0 : Math.min(currentIdx + 1, n - 1))
: (currentIdx < 0 ? n - 1 : Math.max(currentIdx - 1, 0));
selectByIndex(nextIdx);
}
});
continueBtn.addEventListener('click', () => {
if (!selectedId) return;
const ds = data[selectedId];
summaryPath.textContent = ds.path;
pickerDetails.open = false;
});
function tick() {
requestAnimationFrame(tick);
// When the picker is collapsed the canvases are display:none inside a
// closed <details>; rects are zero. Skip the per-frame work.
if (!pickerDetails.open) return;
const now = performance.now();
for (const s of scenes) {
const N = s.numFrames;
const n = s.basePositions.length;
const walkMs = N * s.snapshotMs;
const cycleMs = walkMs + s.holdMs;
const elapsed = ((now - s.cycleStartMs) % cycleMs + cycleMs) % cycleMs;
const total = n / 3;
const drawCount = s.geometry.drawRange.count;
const visibleN = Number.isFinite(drawCount) ? Math.min(drawCount, total) : total;
const limit = visibleN * 3;
const pos = s.geometry.attributes.position.array;
const base = s.basePositions;
if (elapsed >= walkMs) {
for (let i = 0; i < limit; i++) pos[i] = base[i];
} else {
const frameF = elapsed / s.snapshotMs;
const frameIdx = Math.floor(frameF);
const interpT = frameF - frameIdx;
const nextIdx = (frameIdx + 1) % N;
const aOff = frameIdx * n;
const bOff = nextIdx * n;
const tr = s.trajectories;
const scale = jitterScale;
const u = 1 - interpT;
for (let i = 0; i < limit; i++) {
pos[i] = base[i] + (tr[aOff + i] * u + tr[bOff + i] * interpT) * scale;
}
}
s.geometry.attributes.position.needsUpdate = true;
s.controls.update();
s.renderer.render(s.scene, s.camera);
}
}
tick();
// Reopening the picker after it's been closed: canvases may have been
// laid out at zero size while hidden. Re-measure on toggle.
pickerDetails.addEventListener('toggle', () => {
if (pickerDetails.open) {
for (const s of scenes) sizeScene(s);
}
});
}
main();

View File

@ -513,3 +513,351 @@ button.submit:disabled { background: var(--faint); border-color: var(--faint); c
} }
.htmx-request .htmx-indicator { opacity: 1; } .htmx-request .htmx-indicator { opacity: 1; }
.htmx-request.htmx-indicator { opacity: 1; } .htmx-request.htmx-indicator { opacity: 1; }
/* ---------- dataset picker (§ 1) -------------------------------------- */
/* Collapsible <details> below the masthead; expands to a gallery of 3D
dataset previews, collapses to a one-line summary chip once confirmed. */
.dataset-picker {
--picker-panel: #f2eee4;
--picker-hair: #d8d3c6;
border-bottom: 1px solid var(--rule);
background: var(--page);
}
.dataset-picker > summary {
list-style: none;
cursor: pointer;
padding: 0.9rem 2.2rem;
display: flex;
justify-content: space-between;
align-items: center;
gap: 1rem;
user-select: none;
}
.dataset-picker > summary::-webkit-details-marker { display: none; }
.dataset-picker > summary:hover { background: var(--accent-tint); }
.dataset-picker .picker-meta {
display: flex;
align-items: baseline;
gap: 1rem;
flex-wrap: wrap;
min-width: 0;
}
.dataset-picker .section-number {
font-family: var(--mono);
font-size: 0.78rem;
font-weight: 600;
color: var(--accent);
letter-spacing: 0;
}
.dataset-picker .picker-title {
font-family: var(--serif);
font-size: 1rem;
color: var(--ink);
font-style: italic;
}
.dataset-picker .picker-selection {
font-size: 0.78rem;
color: var(--mute);
display: inline-flex;
align-items: baseline;
gap: 0.5rem;
}
.dataset-picker .picker-selection .lbl {
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--faint);
}
.dataset-picker .picker-selection code {
font-family: var(--mono);
font-size: 0.78rem;
color: var(--accent);
}
.dataset-picker .picker-toggle {
font-family: var(--mono);
font-size: 0.74rem;
color: var(--faint);
letter-spacing: 0;
}
.dataset-picker .picker-toggle::before { content: "[ edit ]"; }
.dataset-picker[open] .picker-toggle::before { content: "[ collapse ]"; color: var(--accent); }
.dataset-picker .picker-body {
padding: 0.4rem 2.2rem 1.6rem;
max-width: 1440px;
margin: 0 auto;
}
.dataset-picker .lede {
color: var(--mute);
font-family: var(--serif);
font-style: italic;
font-size: 0.88rem;
max-width: 62ch;
margin: 0.2rem 0 0.9rem;
}
.dataset-picker .lede kbd {
font-family: var(--mono);
font-size: 0.72rem;
background: var(--panel);
border: 1px solid var(--rule);
padding: 0 5px;
margin: 0 1px;
color: var(--ink);
}
.picker-controls {
display: grid;
grid-template-columns: auto 1fr auto;
align-items: center;
column-gap: 1rem;
row-gap: 0.55rem;
padding: 0.75rem 0;
border-top: 1px solid var(--rule);
border-bottom: 1px solid var(--rule);
margin-bottom: 1.1rem;
}
.picker-controls .ctl-label {
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--mute);
}
.picker-controls .ctl-value {
font-family: var(--mono);
font-size: 0.82rem;
color: var(--accent);
min-width: 3.5rem;
text-align: right;
font-variant-numeric: tabular-nums;
}
.picker-controls input[type="range"] {
width: 100%;
accent-color: var(--accent);
height: 4px;
}
.picker-controls .segmented {
grid-column: 2 / -1;
display: flex;
justify-content: space-between;
align-items: center;
}
.picker-controls .segmented label {
font-family: var(--mono);
font-size: 0.78rem;
color: var(--mute);
cursor: pointer;
padding: 3px 2px 4px;
border-bottom: 1px solid transparent;
transition: color 120ms ease, border-color 120ms ease;
user-select: none;
font-variant-numeric: tabular-nums;
position: relative;
}
.picker-controls .segmented label:hover { color: var(--ink); }
.picker-controls .segmented label:has(input:checked) {
color: var(--accent);
border-bottom-color: var(--accent);
}
.picker-controls .segmented label:has(input:focus-visible) {
outline: 1px solid var(--accent);
outline-offset: 2px;
}
.picker-controls .segmented input[type="radio"] {
position: absolute;
opacity: 0;
width: 1px; height: 1px;
margin: 0;
}
.dataset-picker .gallery {
display: flex;
gap: 1.15rem;
overflow-x: auto;
overflow-y: hidden;
scroll-snap-type: x mandatory;
scroll-behavior: smooth;
-webkit-overflow-scrolling: touch;
padding: 2px 2px 0.85rem;
margin: 0 -2px 1.1rem;
scrollbar-width: thin;
scrollbar-color: var(--rule-2) transparent;
}
.dataset-picker .gallery::-webkit-scrollbar { height: 6px; }
.dataset-picker .gallery::-webkit-scrollbar-track { background: transparent; }
.dataset-picker .gallery::-webkit-scrollbar-thumb {
background: var(--rule-2);
border-radius: 3px;
}
.dataset-picker .gallery::-webkit-scrollbar-thumb:hover { background: var(--mute); }
.dataset-picker .picker-loading {
padding: 3rem 0;
text-align: center;
color: var(--mute);
font-family: var(--mono);
font-size: 0.82rem;
}
.dataset-picker .card {
flex: 0 0 240px;
/* Prevent long .card-path from forcing the card wider than its flex-basis. */
min-width: 0;
scroll-snap-align: start;
border: 1px solid var(--rule);
background: var(--picker-panel);
cursor: pointer;
display: flex;
flex-direction: column;
transition: border-color 120ms ease, box-shadow 120ms ease;
}
.dataset-picker .card:hover { border-color: var(--rule-2); }
.dataset-picker .card.selected {
border-color: var(--accent);
box-shadow: 0 0 0 1px var(--accent);
}
.dataset-picker .viz {
aspect-ratio: 1 / 1;
position: relative;
overflow: hidden;
}
.dataset-picker .viz canvas {
position: absolute;
inset: 0;
display: block;
width: 100% !important;
height: 100% !important;
}
.dataset-picker .fig-label {
position: absolute;
top: 8px;
left: 10px;
font-family: var(--mono);
font-size: 0.62rem;
color: var(--mute);
letter-spacing: 0.05em;
text-transform: uppercase;
pointer-events: none;
}
.dataset-picker .key-hint {
position: absolute;
top: 6px;
right: 8px;
font-family: var(--mono);
font-size: 0.7rem;
color: var(--mute);
background: rgba(250, 250, 247, 0.85);
border: 1px solid var(--rule);
padding: 0 5px;
pointer-events: none;
}
.dataset-picker .card.selected .key-hint {
color: var(--accent);
border-color: var(--accent);
}
.dataset-picker .controls-hint {
position: absolute;
bottom: 6px;
right: 8px;
font-family: var(--mono);
font-size: 0.62rem;
color: var(--mute);
opacity: 0;
transition: opacity 150ms ease;
pointer-events: none;
}
.dataset-picker .card:hover .controls-hint { opacity: 0.75; }
.dataset-picker .card-body {
padding: 0.75rem 0.9rem 0.9rem;
border-top: 1px solid var(--rule);
background: var(--page);
flex: 1;
}
.dataset-picker .card-label {
font-family: var(--sans);
font-weight: 500;
font-size: 0.9rem;
margin-bottom: 2px;
display: flex;
align-items: baseline;
gap: 0.5rem;
}
.dataset-picker .card-label .dot {
display: inline-block;
width: 7px; height: 7px;
border-radius: 50%;
background: transparent;
border: 1px solid var(--rule-2);
}
.dataset-picker .card.selected .card-label .dot {
background: var(--accent);
border-color: var(--accent);
}
.dataset-picker .card-path {
font-family: var(--mono);
font-size: 0.68rem;
color: var(--mute);
margin-bottom: 0.4rem;
word-break: break-all;
}
.dataset-picker .card-desc {
font-family: var(--serif);
font-size: 0.82rem;
color: #4a4a4a;
line-height: 1.5;
}
.picker-footer {
display: flex;
align-items: center;
justify-content: space-between;
padding-top: 0.85rem;
border-top: 1px solid var(--rule);
gap: 1.2rem;
flex-wrap: wrap;
}
.picker-footer .selection {
font-size: 0.82rem;
color: var(--mute);
display: inline-flex;
align-items: baseline;
gap: 0.5rem;
}
.picker-footer .selection .lbl {
font-family: var(--mono);
font-size: 0.68rem;
letter-spacing: 0.08em;
text-transform: uppercase;
}
.picker-footer .selection code {
font-family: var(--mono);
font-size: 0.82rem;
color: var(--accent);
}
.picker-footer .continue {
background: var(--accent);
color: var(--page);
border: 1px solid var(--accent);
padding: 0.45rem 1rem;
font-family: var(--sans);
font-size: 0.82rem;
font-weight: 600;
letter-spacing: 0.04em;
cursor: pointer;
border-radius: 1px;
transition: background 120ms ease;
}
.picker-footer .continue:not(:disabled):hover { background: #143642; }
.picker-footer .continue:disabled {
background: var(--faint);
border-color: var(--faint);
color: var(--page);
cursor: not-allowed;
}
@media (max-width: 940px) {
.dataset-picker > summary { padding: 0.9rem 1.2rem; }
.dataset-picker .picker-body { padding: 0.4rem 1.2rem 1.4rem; }
}

View File

@ -6,6 +6,14 @@
<title>embedding notebook — web1</title> <title>embedding notebook — web1</title>
<link rel="stylesheet" href="/static/style.css" /> <link rel="stylesheet" href="/static/style.css" />
<script src="https://unpkg.com/htmx.org@2.0.4"></script> <script src="https://unpkg.com/htmx.org@2.0.4"></script>
<script type="importmap">
{
"imports": {
"three": "https://unpkg.com/three@0.160.0/build/three.module.js",
"three/addons/": "https://unpkg.com/three@0.160.0/examples/jsm/"
}
}
</script>
<link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3E%3Ccircle cx='8' cy='8' r='3' fill='%231f4e5f'/%3E%3C/svg%3E" /> <link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3E%3Ccircle cx='8' cy='8' r='3' fill='%231f4e5f'/%3E%3C/svg%3E" />
</head> </head>
<body> <body>
@ -21,6 +29,59 @@
</div> </div>
</header> </header>
<details class="dataset-picker" id="picker" open>
<summary>
<span class="picker-meta">
<span class="section-number">§ 1</span>
<span class="picker-title">input dataset</span>
<span class="picker-selection">
<span class="lbl">generator</span>
<code id="picker-summary-path"></code>
</span>
</span>
<span class="picker-toggle" aria-hidden="true"></span>
</summary>
<div class="picker-body">
<p class="lede">
Six candidate generators for the embedding pipeline. Drag to rotate, scroll to zoom,
<kbd></kbd>&nbsp;<kbd></kbd> or <kbd>1</kbd>&thinsp;&hellip;&thinsp;<kbd>6</kbd> to select.
</p>
<div class="picker-controls">
<label class="ctl-label" for="n-slider">n samples</label>
<input type="range" id="n-slider" min="100" max="5000" step="100" value="500">
<span class="ctl-value" id="n-value">500</span>
<span class="ctl-label">noise σ</span>
<div class="segmented" role="radiogroup" aria-label="noise σ">
<label><input type="radio" name="j" value="0.001"><span>0.001</span></label>
<label><input type="radio" name="j" value="0.005" checked><span>0.005</span></label>
<label><input type="radio" name="j" value="0.01"><span>0.010</span></label>
</div>
<span class="ctl-label">n frames</span>
<div class="segmented" role="radiogroup" aria-label="number of frames">
<label><input type="radio" name="f" value="12"><span>12</span></label>
<label><input type="radio" name="f" value="24" checked><span>24</span></label>
<label><input type="radio" name="f" value="48"><span>48</span></label>
</div>
</div>
<div class="gallery" id="gallery">
<div class="picker-loading">loading samples&hellip;</div>
</div>
<div class="picker-footer">
<div class="selection">
<span class="lbl">generator</span>
<code id="selected-path"></code>
</div>
<button type="button" class="continue" id="continue-btn" disabled>Continue &rarr;</button>
</div>
</div>
</details>
<main> <main>
<!-- ==================== LEFT: parameter notebook ==================== --> <!-- ==================== LEFT: parameter notebook ==================== -->
@ -34,10 +95,17 @@
hx-indicator="#busy" hx-indicator="#busy"
> >
<!-- §1 reducer --> <!-- Picker-driven hidden fields. Values are written by dataset-picker.js. -->
<input type="hidden" name="dataset_id" id="dataset_id" value="" />
<input type="hidden" name="num_points" id="num_points" value="500" />
<input type="hidden" name="num_snapshots" id="num_snapshots" value="24" />
<input type="hidden" name="jitter_scale" id="jitter_scale" value="0.005" />
<input type="hidden" name="seed" id="seed" value="42" />
<!-- §2 reducer -->
<div class="section"> <div class="section">
<div class="section-label"> <div class="section-label">
<span>§ 1 &nbsp; reducer</span><span class="ordinal">method</span> <span>§ 2 &nbsp; reducer</span><span class="ordinal">method</span>
</div> </div>
<p class="lead">Dimensionality reduction applied to each snapshot. Only reducers whose Python package is importable are shown.</p> <p class="lead">Dimensionality reduction applied to each snapshot. Only reducers whose Python package is importable are shown.</p>
@ -63,53 +131,16 @@
</ul> </ul>
</div> </div>
<!-- §2 reducer params --> <!-- §3 reducer params -->
<div class="section"> <div class="section">
<div class="section-label"> <div class="section-label">
<span>§ 2 &nbsp; parameters</span><span class="ordinal">kwargs</span> <span>§ 3 &nbsp; parameters</span><span class="ordinal">kwargs</span>
</div> </div>
<div id="reducer-params"> <div id="reducer-params">
{% include "_reducer_form.html" with context %} {% include "_reducer_form.html" with context %}
</div> </div>
</div> </div>
<!-- §3 data -->
<div class="section">
<div class="section-label">
<span>§ 3 &nbsp; data &amp; drift</span><span class="ordinal">sampling</span>
</div>
<div class="form-grid">
<label for="generator_path">
generator
<span class="hint">data-generating surface</span>
</label>
<select name="generator_path" id="generator_path">
{% for path, short in generators %}
<option value="{{ path }}" {% if path == 'sklearn.datasets.make_s_curve' %}selected{% endif %}>{{ short }}</option>
{% endfor %}
</select>
<label for="num_points">n<sub>points</sub></label>
<input type="number" id="num_points" name="num_points" value="5000" min="100" step="100" />
<label for="num_snapshots">n<sub>snapshots</sub></label>
<input type="number" id="num_snapshots" name="num_snapshots" value="48" min="2" step="1" />
<label for="jitter_scale">
jitter scale
<span class="hint">std of per-step Gaussian drift</span>
</label>
<input type="number" id="jitter_scale" name="jitter_scale" value="0.01" step="0.001" min="0" />
<label for="seed">
jitter seed
<span class="hint">seeds only the drift simulation — the embedder's seed is in §2 (advanced).</span>
</label>
<input type="number" id="seed" name="seed" value="42" step="1" />
</div>
</div>
<div class="actions"> <div class="actions">
<button type="submit" class="submit">submit run</button> <button type="submit" class="submit">submit run</button>
<span id="busy" class="htmx-indicator">dispatching&hellip;</span> <span id="busy" class="htmx-indicator">dispatching&hellip;</span>
@ -145,9 +176,11 @@
</main> </main>
<footer class="colophon"> <footer class="colophon">
<span><span class="k">web1</span> · scientific instrument · port 8001</span> <span><span class="k">web</span> · scientific instrument · port 8001</span>
<span>fastapi · htmx · no build step</span> <span>fastapi · htmx · no build step</span>
</footer> </footer>
<script type="module" src="/static/dataset-picker.js"></script>
</body> </body>
</html> </html>