from functools import lru_cache from pathlib import Path from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from sklearn.datasets import ( make_blobs, make_classification, make_gaussian_quantiles, make_s_curve, make_swiss_roll, ) app = FastAPI() HERE = Path(__file__).parent N = 5000 SEED = 0 @lru_cache(maxsize=1) def _datasets(): s, sl = make_s_curve(n_samples=N, noise=0.03, random_state=SEED) sr, srl = make_swiss_roll(n_samples=N, noise=0.15, random_state=SEED) srh, srhl = make_swiss_roll(n_samples=N, noise=0.15, hole=True, random_state=SEED) b, bl = make_blobs( n_samples=N, n_features=3, centers=5, cluster_std=1.0, random_state=SEED ) gq, gql = make_gaussian_quantiles( n_samples=N, n_features=3, n_classes=4, random_state=SEED ) cls, clsl = make_classification( n_samples=N, n_features=3, n_informative=3, n_redundant=0, n_repeated=0, n_classes=4, n_clusters_per_class=2, class_sep=1.5, random_state=SEED, ) return { "s_curve": { "name": "S-Curve", "path": "sklearn.datasets.make_s_curve", "kwargs": {}, "description": ( "A 2-D manifold warped into R³. Continuous label encodes position " "along the curve — a good test of whether a reducer unrolls the " "sheet without tearing." ), "kind": "continuous", "points": s.tolist(), "labels": sl.tolist(), }, "swiss_roll": { "name": "Swiss Roll", "path": "sklearn.datasets.make_swiss_roll", "kwargs": {}, "description": ( "A rolled-up plane. The canonical hard case for linear methods: " "PCA collapses the spiral, non-linear methods should recover the " "unroll." ), "kind": "continuous", "points": sr.tolist(), "labels": srl.tolist(), }, "swiss_roll_hole": { "name": "Swiss Roll (hole)", "path": "sklearn.datasets.make_swiss_roll", "kwargs": {"hole": True}, "description": ( "Swiss roll with a rectangular hole punched through. Same manifold, " "non-trivial topology — a faithful unroll should preserve the hole " "rather than smearing it closed." ), "kind": "continuous", "points": srh.tolist(), "labels": srhl.tolist(), }, "blobs": { "name": "Gaussian Blobs", "path": "sklearn.datasets.make_blobs", "kwargs": {"centers": 5, "cluster_std": 1.0}, "description": ( "Five isotropic Gaussian clusters in R³. Discrete class labels. " "Tests whether a reducer preserves cluster separation when " "projected to 2-D." ), "kind": "categorical", "points": b.tolist(), "labels": bl.tolist(), }, "gaussian_quantiles": { "name": "Gaussian Quantiles", "path": "sklearn.datasets.make_gaussian_quantiles", "kwargs": {"n_classes": 4}, "description": ( "Concentric Gaussian shells in R³; class = which shell. Classes " "are linearly inseparable by construction — PCA collapses them, " "kernel and manifold methods have a chance." ), "kind": "categorical", "points": gq.tolist(), "labels": gql.tolist(), }, "classification": { "name": "Hypercube Clusters", "path": "sklearn.datasets.make_classification", "kwargs": { "n_informative": 3, "n_redundant": 0, "n_repeated": 0, "n_classes": 4, "n_clusters_per_class": 2, "class_sep": 1.5, }, "description": ( "Four classes, two sub-clusters each, placed at hypercube vertices " "with informative noise. A denser discrete test than blobs — " "within-class bimodality stresses cluster-preserving reducers." ), "kind": "categorical", "points": cls.tolist(), "labels": clsl.tolist(), }, } @app.get("/data.json") def data(): return _datasets() app.mount("/", StaticFiles(directory=str(HERE), html=True), name="static")