138 lines
4.6 KiB
Python
138 lines
4.6 KiB
Python
from functools import lru_cache
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI
|
|
from fastapi.staticfiles import StaticFiles
|
|
from sklearn.datasets import (
|
|
make_blobs,
|
|
make_classification,
|
|
make_gaussian_quantiles,
|
|
make_s_curve,
|
|
make_swiss_roll,
|
|
)
|
|
|
|
app = FastAPI()
|
|
HERE = Path(__file__).parent
|
|
|
|
N = 5000
|
|
SEED = 0
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def _datasets():
|
|
s, sl = make_s_curve(n_samples=N, noise=0.03, random_state=SEED)
|
|
sr, srl = make_swiss_roll(n_samples=N, noise=0.15, random_state=SEED)
|
|
srh, srhl = make_swiss_roll(n_samples=N, noise=0.15, hole=True, random_state=SEED)
|
|
b, bl = make_blobs(
|
|
n_samples=N, n_features=3, centers=5, cluster_std=1.0, random_state=SEED
|
|
)
|
|
gq, gql = make_gaussian_quantiles(
|
|
n_samples=N, n_features=3, n_classes=4, random_state=SEED
|
|
)
|
|
cls, clsl = make_classification(
|
|
n_samples=N,
|
|
n_features=3,
|
|
n_informative=3,
|
|
n_redundant=0,
|
|
n_repeated=0,
|
|
n_classes=4,
|
|
n_clusters_per_class=2,
|
|
class_sep=1.5,
|
|
random_state=SEED,
|
|
)
|
|
return {
|
|
"s_curve": {
|
|
"name": "S-Curve",
|
|
"path": "sklearn.datasets.make_s_curve",
|
|
"kwargs": {},
|
|
"description": (
|
|
"A 2-D manifold warped into R³. Continuous label encodes position "
|
|
"along the curve — a good test of whether a reducer unrolls the "
|
|
"sheet without tearing."
|
|
),
|
|
"kind": "continuous",
|
|
"points": s.tolist(),
|
|
"labels": sl.tolist(),
|
|
},
|
|
"swiss_roll": {
|
|
"name": "Swiss Roll",
|
|
"path": "sklearn.datasets.make_swiss_roll",
|
|
"kwargs": {},
|
|
"description": (
|
|
"A rolled-up plane. The canonical hard case for linear methods: "
|
|
"PCA collapses the spiral, non-linear methods should recover the "
|
|
"unroll."
|
|
),
|
|
"kind": "continuous",
|
|
"points": sr.tolist(),
|
|
"labels": srl.tolist(),
|
|
},
|
|
"swiss_roll_hole": {
|
|
"name": "Swiss Roll (hole)",
|
|
"path": "sklearn.datasets.make_swiss_roll",
|
|
"kwargs": {"hole": True},
|
|
"description": (
|
|
"Swiss roll with a rectangular hole punched through. Same manifold, "
|
|
"non-trivial topology — a faithful unroll should preserve the hole "
|
|
"rather than smearing it closed."
|
|
),
|
|
"kind": "continuous",
|
|
"points": srh.tolist(),
|
|
"labels": srhl.tolist(),
|
|
},
|
|
"blobs": {
|
|
"name": "Gaussian Blobs",
|
|
"path": "sklearn.datasets.make_blobs",
|
|
"kwargs": {"centers": 5, "cluster_std": 1.0},
|
|
"description": (
|
|
"Five isotropic Gaussian clusters in R³. Discrete class labels. "
|
|
"Tests whether a reducer preserves cluster separation when "
|
|
"projected to 2-D."
|
|
),
|
|
"kind": "categorical",
|
|
"points": b.tolist(),
|
|
"labels": bl.tolist(),
|
|
},
|
|
"gaussian_quantiles": {
|
|
"name": "Gaussian Quantiles",
|
|
"path": "sklearn.datasets.make_gaussian_quantiles",
|
|
"kwargs": {"n_classes": 4},
|
|
"description": (
|
|
"Concentric Gaussian shells in R³; class = which shell. Classes "
|
|
"are linearly inseparable by construction — PCA collapses them, "
|
|
"kernel and manifold methods have a chance."
|
|
),
|
|
"kind": "categorical",
|
|
"points": gq.tolist(),
|
|
"labels": gql.tolist(),
|
|
},
|
|
"classification": {
|
|
"name": "Hypercube Clusters",
|
|
"path": "sklearn.datasets.make_classification",
|
|
"kwargs": {
|
|
"n_informative": 3,
|
|
"n_redundant": 0,
|
|
"n_repeated": 0,
|
|
"n_classes": 4,
|
|
"n_clusters_per_class": 2,
|
|
"class_sep": 1.5,
|
|
},
|
|
"description": (
|
|
"Four classes, two sub-clusters each, placed at hypercube vertices "
|
|
"with informative noise. A denser discrete test than blobs — "
|
|
"within-class bimodality stresses cluster-preserving reducers."
|
|
),
|
|
"kind": "categorical",
|
|
"points": cls.tolist(),
|
|
"labels": clsl.tolist(),
|
|
},
|
|
}
|
|
|
|
|
|
@app.get("/data.json")
|
|
def data():
|
|
return _datasets()
|
|
|
|
|
|
app.mount("/", StaticFiles(directory=str(HERE), html=True), name="static")
|