dr-sandbox/app/demo/main.py
2026-04-21 19:14:23 -06:00

138 lines
4.6 KiB
Python

from functools import lru_cache
from pathlib import Path
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from sklearn.datasets import (
make_blobs,
make_classification,
make_gaussian_quantiles,
make_s_curve,
make_swiss_roll,
)
app = FastAPI()
HERE = Path(__file__).parent
N = 5000
SEED = 0
@lru_cache(maxsize=1)
def _datasets():
s, sl = make_s_curve(n_samples=N, noise=0.03, random_state=SEED)
sr, srl = make_swiss_roll(n_samples=N, noise=0.15, random_state=SEED)
srh, srhl = make_swiss_roll(n_samples=N, noise=0.15, hole=True, random_state=SEED)
b, bl = make_blobs(
n_samples=N, n_features=3, centers=5, cluster_std=1.0, random_state=SEED
)
gq, gql = make_gaussian_quantiles(
n_samples=N, n_features=3, n_classes=4, random_state=SEED
)
cls, clsl = make_classification(
n_samples=N,
n_features=3,
n_informative=3,
n_redundant=0,
n_repeated=0,
n_classes=4,
n_clusters_per_class=2,
class_sep=1.5,
random_state=SEED,
)
return {
"s_curve": {
"name": "S-Curve",
"path": "sklearn.datasets.make_s_curve",
"kwargs": {},
"description": (
"A 2-D manifold warped into R³. Continuous label encodes position "
"along the curve — a good test of whether a reducer unrolls the "
"sheet without tearing."
),
"kind": "continuous",
"points": s.tolist(),
"labels": sl.tolist(),
},
"swiss_roll": {
"name": "Swiss Roll",
"path": "sklearn.datasets.make_swiss_roll",
"kwargs": {},
"description": (
"A rolled-up plane. The canonical hard case for linear methods: "
"PCA collapses the spiral, non-linear methods should recover the "
"unroll."
),
"kind": "continuous",
"points": sr.tolist(),
"labels": srl.tolist(),
},
"swiss_roll_hole": {
"name": "Swiss Roll (hole)",
"path": "sklearn.datasets.make_swiss_roll",
"kwargs": {"hole": True},
"description": (
"Swiss roll with a rectangular hole punched through. Same manifold, "
"non-trivial topology — a faithful unroll should preserve the hole "
"rather than smearing it closed."
),
"kind": "continuous",
"points": srh.tolist(),
"labels": srhl.tolist(),
},
"blobs": {
"name": "Gaussian Blobs",
"path": "sklearn.datasets.make_blobs",
"kwargs": {"centers": 5, "cluster_std": 1.0},
"description": (
"Five isotropic Gaussian clusters in R³. Discrete class labels. "
"Tests whether a reducer preserves cluster separation when "
"projected to 2-D."
),
"kind": "categorical",
"points": b.tolist(),
"labels": bl.tolist(),
},
"gaussian_quantiles": {
"name": "Gaussian Quantiles",
"path": "sklearn.datasets.make_gaussian_quantiles",
"kwargs": {"n_classes": 4},
"description": (
"Concentric Gaussian shells in R³; class = which shell. Classes "
"are linearly inseparable by construction — PCA collapses them, "
"kernel and manifold methods have a chance."
),
"kind": "categorical",
"points": gq.tolist(),
"labels": gql.tolist(),
},
"classification": {
"name": "Hypercube Clusters",
"path": "sklearn.datasets.make_classification",
"kwargs": {
"n_informative": 3,
"n_redundant": 0,
"n_repeated": 0,
"n_classes": 4,
"n_clusters_per_class": 2,
"class_sep": 1.5,
},
"description": (
"Four classes, two sub-clusters each, placed at hypercube vertices "
"with informative noise. A denser discrete test than blobs — "
"within-class bimodality stresses cluster-preserving reducers."
),
"kind": "categorical",
"points": cls.tolist(),
"labels": clsl.tolist(),
},
}
@app.get("/data.json")
def data():
return _datasets()
app.mount("/", StaticFiles(directory=str(HERE), html=True), name="static")