data selector demo

This commit is contained in:
Michael Pilosov 2026-04-21 18:03:38 -06:00
parent 708157c1ef
commit 058db256a3
4 changed files with 590 additions and 0 deletions

0
app/__init__.py Normal file
View File

0
app/demo/__init__.py Normal file
View File

526
app/demo/index.html Normal file
View File

@ -0,0 +1,526 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Input dataset · Dimension Reduction Sandbox</title>
<style>
:root {
--bg: #fafaf7;
--panel: #f2eee4;
--text: #1a1a1a;
--muted: #6b6b6b;
--hair: #d8d3c6;
--accent: #1f4e5f;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
html, body { background: var(--bg); color: var(--text); }
body {
font-family: "Iowan Old Style", "Palatino Linotype", Palatino, Charter, Georgia, serif;
font-size: 16px;
line-height: 1.55;
padding: 48px 56px 64px;
max-width: 960px;
margin: 0 auto;
}
.mono {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
}
header {
display: flex;
align-items: baseline;
gap: 18px;
padding-bottom: 16px;
border-bottom: 1px solid var(--hair);
margin-bottom: 28px;
}
.section-number {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
color: var(--accent);
font-size: 18px;
font-weight: 600;
}
h1 {
font-size: 26px;
font-weight: 500;
letter-spacing: -0.01em;
}
.crumb {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 11px;
color: var(--muted);
letter-spacing: 0.08em;
text-transform: uppercase;
}
.lede {
color: var(--muted);
max-width: 62ch;
margin-bottom: 24px;
font-size: 15px;
}
.controls {
display: flex;
align-items: center;
gap: 16px;
padding: 12px 0;
border-top: 1px solid var(--hair);
border-bottom: 1px solid var(--hair);
margin-bottom: 24px;
}
.ctl-label {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 11px;
letter-spacing: 0.08em;
text-transform: uppercase;
color: var(--muted);
min-width: 88px;
}
.ctl-value {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 13px;
color: var(--accent);
min-width: 56px;
text-align: right;
font-variant-numeric: tabular-nums;
}
#n-slider {
flex: 1;
accent-color: var(--accent);
height: 4px;
}
.lede kbd {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 12px;
background: #fff;
border: 1px solid var(--hair);
padding: 1px 6px;
margin: 0 2px;
color: var(--text);
}
.gallery {
display: grid;
grid-template-columns: repeat(3, minmax(0, 260px));
gap: 20px;
justify-content: start;
margin-bottom: 28px;
}
@media (max-width: 880px) {
.gallery {
grid-template-columns: minmax(0, 320px);
justify-content: center;
}
body { padding: 28px 20px; }
}
.card {
border: 1px solid var(--hair);
background: var(--panel);
cursor: pointer;
display: flex;
flex-direction: column;
transition: border-color 120ms ease, box-shadow 120ms ease;
}
.card:hover { border-color: #8f887a; }
.card.selected {
border-color: var(--accent);
box-shadow: 0 0 0 1px var(--accent);
}
.viz {
aspect-ratio: 1 / 1;
position: relative;
overflow: hidden;
}
.viz canvas {
position: absolute;
inset: 0;
display: block;
width: 100% !important;
height: 100% !important;
}
.fig-label {
position: absolute;
top: 10px;
left: 12px;
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 10px;
color: var(--muted);
letter-spacing: 0.05em;
text-transform: uppercase;
pointer-events: none;
}
.key-hint {
position: absolute;
top: 8px;
right: 10px;
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 11px;
color: var(--muted);
background: rgba(250, 250, 247, 0.85);
border: 1px solid var(--hair);
padding: 1px 6px;
pointer-events: none;
}
.card.selected .key-hint {
color: var(--accent);
border-color: var(--accent);
}
.controls-hint {
position: absolute;
bottom: 8px;
right: 10px;
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 10px;
color: var(--muted);
opacity: 0;
transition: opacity 150ms ease;
pointer-events: none;
}
.card:hover .controls-hint { opacity: 0.75; }
.card-body {
padding: 16px 18px 18px;
border-top: 1px solid var(--hair);
background: var(--bg);
flex: 1;
}
.card-label {
font-weight: 500;
font-size: 16px;
margin-bottom: 3px;
display: flex;
align-items: baseline;
gap: 10px;
}
.card-label .dot {
display: inline-block;
width: 7px; height: 7px;
border-radius: 50%;
background: transparent;
border: 1px solid var(--hair);
}
.card.selected .card-label .dot {
background: var(--accent);
border-color: var(--accent);
}
.card-path {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 11px;
color: var(--muted);
margin-bottom: 8px;
}
.card-desc {
font-size: 13px;
color: #4a4a4a;
line-height: 1.55;
}
.footer {
display: flex;
align-items: center;
justify-content: space-between;
padding-top: 20px;
border-top: 1px solid var(--hair);
gap: 24px;
}
.selection {
font-size: 14px;
color: var(--muted);
}
.selection .lbl {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 11px;
letter-spacing: 0.05em;
text-transform: uppercase;
margin-right: 8px;
}
.selection code {
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 13px;
color: var(--accent);
}
.continue {
background: var(--accent);
color: #fff;
border: none;
padding: 9px 18px;
font-family: inherit;
font-size: 14px;
cursor: pointer;
letter-spacing: 0.01em;
}
.continue:disabled {
background: #e6e1d4;
color: var(--muted);
cursor: not-allowed;
}
.continue:not(:disabled):hover { background: #18404f; }
.loading {
padding: 60px 0;
text-align: center;
color: var(--muted);
font-family: "JetBrains Mono", "SF Mono", Menlo, Monaco, monospace;
font-size: 13px;
}
</style>
</head>
<body>
<header>
<div class="section-number">§1</div>
<h1>Select input dataset</h1>
<div style="flex: 1"></div>
<div class="crumb">Demo · picker</div>
</header>
<p class="lede">
Three candidate generators for the embedding pipeline. Drag to rotate, scroll to zoom,
<kbd>1</kbd>&nbsp;<kbd>2</kbd>&nbsp;<kbd>3</kbd> to select.
</p>
<div class="controls">
<label class="ctl-label" for="n-slider">n samples</label>
<input type="range" id="n-slider" min="100" max="5000" step="100" value="500">
<span class="ctl-value" id="n-value">500</span>
</div>
<div class="gallery" id="gallery">
<div class="loading">loading samples…</div>
</div>
<div class="footer">
<div class="selection">
<span class="lbl">generator</span>
<code id="selected-path"></code>
</div>
<button class="continue" id="continue-btn" disabled>Continue →</button>
</div>
<script type="importmap">
{
"imports": {
"three": "https://unpkg.com/three@0.160.0/build/three.module.js",
"three/addons/": "https://unpkg.com/three@0.160.0/examples/jsm/"
}
}
</script>
<script type="module">
import * as THREE from 'three';
import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
const CATEGORICAL_HEX = [
'#1f4e5f', '#c97b3f', '#8b5a9f', '#5a8560',
'#c74a5e', '#6b7d8f', '#b89f51', '#4a6fa5',
];
const CATEGORICAL = CATEGORICAL_HEX.map(h => new THREE.Color(h));
function rampContinuous(t) {
// blue → teal → ochre ramp, legible against the warm panel background
const hue = (1 - t) * 215 + t * 28;
const sat = 0.62;
const lit = 0.50 + (t - 0.5) * 0.08;
return new THREE.Color().setHSL(hue / 360, sat, lit);
}
function normalize(points) {
const n = points.length;
let mx = 0, my = 0, mz = 0;
for (const p of points) { mx += p[0]; my += p[1]; mz += p[2]; }
mx /= n; my /= n; mz /= n;
let maxAbs = 1e-9;
for (const p of points) {
const a = Math.abs(p[0] - mx);
const b = Math.abs(p[1] - my);
const c = Math.abs(p[2] - mz);
if (a > maxAbs) maxAbs = a;
if (b > maxAbs) maxAbs = b;
if (c > maxAbs) maxAbs = c;
}
const out = new Float32Array(n * 3);
for (let i = 0; i < n; i++) {
out[i*3] = (points[i][0] - mx) / maxAbs;
out[i*3+1] = (points[i][1] - my) / maxAbs;
out[i*3+2] = (points[i][2] - mz) / maxAbs;
}
return out;
}
function buildColors(labels, kind) {
const n = labels.length;
const colors = new Float32Array(n * 3);
if (kind === 'categorical') {
for (let i = 0; i < n; i++) {
const c = CATEGORICAL[labels[i] % CATEGORICAL.length];
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
}
} else {
let lo = Infinity, hi = -Infinity;
for (const v of labels) { if (v < lo) lo = v; if (v > hi) hi = v; }
const range = (hi - lo) || 1;
for (let i = 0; i < n; i++) {
const c = rampContinuous((labels[i] - lo) / range);
colors[i*3] = c.r; colors[i*3+1] = c.g; colors[i*3+2] = c.b;
}
}
return colors;
}
function createScene(container, dataset) {
const positions = normalize(dataset.points);
const colors = buildColors(dataset.labels, dataset.kind);
const geometry = new THREE.BufferGeometry();
geometry.setAttribute('position', new THREE.BufferAttribute(positions, 3));
geometry.setAttribute('color', new THREE.BufferAttribute(colors, 3));
const material = new THREE.PointsMaterial({
size: 2.1,
sizeAttenuation: false,
vertexColors: true,
transparent: true,
opacity: 0.92,
});
const scene = new THREE.Scene();
scene.background = new THREE.Color(0xf2eee4);
scene.add(new THREE.Points(geometry, material));
const camera = new THREE.PerspectiveCamera(42, 1, 0.1, 100);
camera.position.set(2.6, 1.9, 2.6);
camera.lookAt(0, 0, 0);
const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: false });
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
container.appendChild(renderer.domElement);
const controls = new OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.08;
controls.enablePan = false;
controls.autoRotate = true;
controls.autoRotateSpeed = 0.55;
controls.minDistance = 1.5;
controls.maxDistance = 6;
return { scene, camera, renderer, controls, container, geometry };
}
function sizeScene(s) {
const rect = s.container.getBoundingClientRect();
const size = Math.max(1, Math.floor(rect.width));
s.renderer.setSize(size, size);
s.camera.aspect = 1;
s.camera.updateProjectionMatrix();
}
async function main() {
let data;
try {
const res = await fetch('/data.json');
data = await res.json();
} catch (err) {
document.getElementById('gallery').innerHTML =
`<div class="loading">failed to load /data.json — ${err}</div>`;
return;
}
const gallery = document.getElementById('gallery');
gallery.innerHTML = '';
const scenes = [];
const order = Object.entries(data);
let selectedId = null;
order.forEach(([id, ds], i) => {
const card = document.createElement('div');
card.className = 'card';
card.dataset.id = id;
card.innerHTML = `
<div class="viz">
<span class="fig-label">Fig. 1.${i + 1}</span>
<span class="key-hint">[${i + 1}]</span>
<span class="controls-hint">drag · scroll</span>
</div>
<div class="card-body">
<div class="card-label">
<span class="dot"></span>
<span>${ds.name}</span>
</div>
<div class="card-path">${ds.path}</div>
<div class="card-desc">${ds.description}</div>
</div>
`;
gallery.appendChild(card);
const viz = card.querySelector('.viz');
const s = createScene(viz, ds);
sizeScene(s);
scenes.push(s);
// Stop auto-rotate once the user interacts.
s.controls.addEventListener('start', () => { s.controls.autoRotate = false; });
card.addEventListener('click', (e) => {
// Clicks inside the canvas that drove orbit should still select,
// but only register selection on pointer-up with no drag. Simpler:
// always select on click — OrbitControls won't emit click on drag.
selectCard(id, card, ds);
});
});
function selectCard(id, card, ds) {
document.querySelectorAll('.card').forEach(c => c.classList.remove('selected'));
card.classList.add('selected');
selectedId = id;
document.getElementById('selected-path').textContent = ds.path;
document.getElementById('continue-btn').disabled = false;
}
// Sample-count slider → cheap: just change draw range per geometry.
const slider = document.getElementById('n-slider');
const nValue = document.getElementById('n-value');
function applyN(n) {
nValue.textContent = n.toLocaleString();
for (const s of scenes) {
const cap = s.geometry.attributes.position.count;
s.geometry.setDrawRange(0, Math.min(n, cap));
}
}
slider.addEventListener('input', (e) => applyN(parseInt(e.target.value, 10)));
applyN(parseInt(slider.value, 10));
// Keyboard: 1/2/3 select.
document.addEventListener('keydown', (e) => {
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
const idx = { '1': 0, '2': 1, '3': 2 }[e.key];
if (idx === undefined) return;
const [id, ds] = order[idx] || [];
if (!id) return;
const card = gallery.children[idx];
selectCard(id, card, ds);
});
// Continue button → for now, just echo the selection.
document.getElementById('continue-btn').addEventListener('click', () => {
if (!selectedId) return;
const ds = data[selectedId];
alert(`Would continue with generator:\n${ds.path}\n\n(demo — no flow dispatched yet)`);
});
// Resize handling.
let resizeTimer = null;
window.addEventListener('resize', () => {
clearTimeout(resizeTimer);
resizeTimer = setTimeout(() => scenes.forEach(sizeScene), 80);
});
// Render loop.
function tick() {
requestAnimationFrame(tick);
for (const s of scenes) {
s.controls.update();
s.renderer.render(s.scene, s.camera);
}
}
tick();
}
main();
</script>
</body>
</html>

64
app/demo/main.py Normal file
View File

@ -0,0 +1,64 @@
from functools import lru_cache
from pathlib import Path
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from sklearn.datasets import make_blobs, make_s_curve, make_swiss_roll
app = FastAPI()
HERE = Path(__file__).parent
@lru_cache(maxsize=1)
def _datasets():
s, sl = make_s_curve(n_samples=5000, noise=0.03, random_state=0)
sr, srl = make_swiss_roll(n_samples=5000, noise=0.15, random_state=0)
b, bl = make_blobs(
n_samples=5000, n_features=3, centers=5, cluster_std=1.0, random_state=0
)
return {
"s_curve": {
"name": "S-Curve",
"path": "sklearn.datasets.make_s_curve",
"description": (
"A 2-D manifold warped into R³. Continuous label encodes position "
"along the curve — a good test of whether a reducer unrolls the "
"sheet without tearing."
),
"kind": "continuous",
"points": s.tolist(),
"labels": sl.tolist(),
},
"swiss_roll": {
"name": "Swiss Roll",
"path": "sklearn.datasets.make_swiss_roll",
"description": (
"A rolled-up plane. The canonical hard case for linear methods: "
"PCA collapses the spiral, non-linear methods should recover the "
"unroll."
),
"kind": "continuous",
"points": sr.tolist(),
"labels": srl.tolist(),
},
"blobs": {
"name": "Gaussian Blobs",
"path": "sklearn.datasets.make_blobs",
"description": (
"Five isotropic Gaussian clusters in R³. Discrete class labels. "
"Tests whether a reducer preserves cluster separation when "
"projected to 2-D."
),
"kind": "categorical",
"points": b.tolist(),
"labels": bl.tolist(),
},
}
@app.get("/data.json")
def data():
return _datasets()
app.mount("/", StaticFiles(directory=str(HERE), html=True), name="static")