Compare commits

..

18 Commits
metal ... main

Author SHA1 Message Date
Michael Pilosov
3064f62192 styling of drop box 2026-05-16 23:31:07 -06:00
Michael Pilosov
0b53dade06 smaller runtime image 2026-05-16 23:30:58 -06:00
Michael Pilosov
27846f661e publish target 2026-05-16 22:32:56 -06:00
Michael Pilosov
8802704dfc bump 2026-05-16 22:25:08 -06:00
Michael Pilosov
a544572e8a more keybindings 2026-05-16 22:23:04 -06:00
Michael Pilosov
befd3caa1d mount cache 2026-05-16 22:22:51 -06:00
Michael Pilosov
d396298130 standalone build example 2026-05-16 22:19:50 -06:00
Michael Pilosov
b2ab86a8ad standalone build 2026-05-16 19:59:14 -06:00
Michael Pilosov
746635b570 dryrun vs real 2026-05-16 19:21:36 -06:00
Michael Pilosov
25e94b0c22 chain build + testpub 2026-05-16 19:20:47 -06:00
Michael Pilosov
606ccda721 defualt res 2048 2026-05-16 19:20:09 -06:00
Michael Pilosov
3f24ce2f80 publishing 2026-05-16 19:14:46 -06:00
Michael Pilosov
394a6911bb styling 2026-05-16 19:14:39 -06:00
Michael Pilosov
22696f3ec7 clean up sdist 2026-05-16 18:45:29 -06:00
Michael Pilosov
15b23c37c1 refactor styling: terminal 2026-05-16 18:45:10 -06:00
Michael Pilosov
27d9494123 rename service 2026-05-16 18:22:26 -06:00
Michael Pilosov
802ee4b857 dep bump + frozen install 2026-05-16 18:22:13 -06:00
Michael Pilosov
1ab49bce1d rename project 2026-05-16 18:08:59 -06:00
20 changed files with 2689 additions and 709 deletions

View File

@ -10,4 +10,5 @@ hf_cache
output*
.python-version
Makefile
docker-compose.yml
compose.yml
build

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ output.png
output*.png
mask*.png
*.jpg
.env

View File

@ -1,10 +1,11 @@
# BiRefNet background removal service — CUDA 12.4 runtime image.
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
# BiRefNet background removal service — CUDA 12.6 inference image.
# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is
# needed; the GPU driver is injected by the nvidia container runtime at run time.
FROM python:3.12-slim
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
UV_PYTHON_INSTALL_DIR=/opt/python \
ENV PYTHONUNBUFFERED=1 \
UV_PROJECT_ENVIRONMENT=/app/.venv \
UV_PYTHON_PREFERENCE=only-system \
UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy \
HF_HOME=/app/hf_cache \
@ -13,18 +14,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
# uv: fast, reproducible Python + dependency management.
COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python + dependencies first so this layer is cached across code changes.
# Install dependencies first so this layer is cached across code changes.
# The BuildKit cache mount keeps the uv download cache warm across rebuilds.
COPY pyproject.toml ./
COPY pyproject.toml uv.lock ./
RUN --mount=type=cache,target=/root/.cache/uv \
uv python install 3.12 \
&& uv sync --no-install-project --no-dev
uv sync --no-install-project --no-dev --frozen
# Application code.
COPY src ./src
@ -35,4 +31,4 @@ RUN --mount=type=cache,target=/root/.cache/uv \
ENV PATH="/app/.venv/bin:${PATH}"
EXPOSE 8000
CMD ["birefnet-service"]
CMD ["rmbg-as-a-service"]

View File

@ -11,7 +11,7 @@ BLUR ?= 0
.DEFAULT_GOAL := help
.PHONY: help build run up stop down logs log ps test test-mask dev sync shell clean fmt
.PHONY: help build run up stop down logs log ps test test-mask dev sync shell clean fmt drytestpub testpub bump
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) \
@ -45,10 +45,10 @@ sync: ## Install dependencies locally with uv
uv sync
dev: sync ## Run the service locally (no Docker; needs local CUDA)
uv run birefnet-service
uv run rmbg-as-a-service
shell: ## Open a shell inside a fresh container
$(COMPOSE) run --rm --entrypoint bash birefnet
$(COMPOSE) run --rm --entrypoint bash rmbg
fmt: ## Format code with ruff
uv run ruff format src scripts
@ -56,3 +56,19 @@ fmt: ## Format code with ruff
clean: ## Stop the service and remove build artifacts
-$(COMPOSE) down
rm -f $(OUTPUT) mask.png
bump: ## Bump the patch version in pyproject.toml (0.0.1 -> 0.0.2)
uv version --bump patch
drytestpub: ## Publish dist/ to TestPyPI (UV_PUBLISH_TOKEN from .env)
uv build && \
set -a && . ./.env && set +a && \
uv publish --publish-url https://test.pypi.org/legacy/ -t "$$UV_PUBLISH_TOKEN" --dry-run
testpub: ## Publish dist/ to TestPyPI (UV_PUBLISH_TOKEN from .env)
uv build && \
set -a && . ./.env && set +a && \
uv publish --publish-url https://test.pypi.org/legacy/ -t "$$UV_PUBLISH_TOKEN"
publish:
rm -rf dist/ && uv build && uv publish

View File

@ -30,15 +30,37 @@ while a model downloads and loads.
### Web UI
Open **http://localhost:8000/** — a two-tab test page (handy over SSH):
Open **http://localhost:8000/** — a single-page test app (handy over SSH):
- **Auto remove** — pick a model variant + resolution.
- **Prompt segment** — type what to keep (e.g. `the dog`), tune the
GroundingDINO box / text thresholds.
Both tabs support a transparency checkerboard preview, click-to-zoom lightbox,
Both modes support a transparency checkerboard preview, click-to-zoom lightbox,
optional crop-to-subject, and download.
#### Keyboard shortcuts
The UI is fully keyboard-drivable. Shortcuts are ignored while typing in a
field and while Ctrl/Cmd/Alt is held.
| Key | Action |
|---------------------|-----------------------------------------------|
| `B` | Toggle the controls sidebar |
| `U` | Open the file picker to upload an image |
| `I` / `O` | Show the input / output image |
| `F` / `Z` | Open the zoom view for the visible image |
| `S` | Save (download PNG), once a result exists |
In the zoom view:
| Key | Action |
|---------------------------|-----------------------------------------|
| `F` / `Z` / `Esc` | Close the zoom view |
| `+` / `-` | Zoom in / out (1×8×) |
| `0` | Reset zoom & pan |
| Arrows or `H` `J` `K` `L` | Pan (while zoomed past 1×) |
## API
### `POST /predict` — automatic background removal
@ -105,10 +127,10 @@ make dev # uv sync + run the server locally
## Layout
```
src/birefnet_service/model.py BiRefNet / RMBG-2.0 wrapper + compositing
src/birefnet_service/prompt_segment.py GroundingDINO + SAM pipeline
src/birefnet_service/server.py LitServe /predict + /segment + web UI
src/birefnet_service/static/ web UI (index.html)
src/rmbg_as_a_service/model.py BiRefNet / RMBG-2.0 wrapper + compositing
src/rmbg_as_a_service/prompt_segment.py GroundingDINO + SAM pipeline
src/rmbg_as_a_service/server.py LitServe /predict + /segment + web UI
src/rmbg_as_a_service/static/ web UI (index.html + styles.css)
scripts/client.py stdlib-only test client
Dockerfile / compose.yml CUDA image + nvidia runtime
Makefile build / run / test shortcuts

32
build/Dockerfile Normal file
View File

@ -0,0 +1,32 @@
# rmbg-as-a-service — slim CUDA inference image.
# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed;
# the GPU driver is injected by the nvidia container runtime at run time.
# ---- builder: install everything into an isolated venv ----
FROM python:3.12-slim AS builder
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:${PATH}"
# rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build
# outranks the plain wheel by local-version ordering).
RUN --mount=type=cache,target=/root/.cache/pip \
pip install \
--extra-index-url https://download.pytorch.org/whl/cu126 \
rmbg-as-a-service==0.0.3 hf-transfer
# ---- runtime: just Python + the prepared venv ----
FROM python:3.12-slim
ENV PYTHONUNBUFFERED=1 \
HF_HOME=/app/hf_cache \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=8000 \
PATH="/opt/venv/bin:${PATH}"
COPY --from=builder /opt/venv /opt/venv
EXPOSE 8000
CMD ["rmbg-as-a-service"]

53
build/Makefile Normal file
View File

@ -0,0 +1,53 @@
# mindthemath/rmbg image — build the published-package container, test it, push it.
# Override inline, e.g.: make release TAG=0.0.2
COMPOSE ?= docker compose
PYTHON ?= python3
IMAGE ?= mindthemath/rmbg
TAG ?= latest
PORT ?= 8000
INPUT ?= ../test.jpg
OUTPUT ?= output.png
BG ?= alpha
# Exported so compose.yml's ${TAG} interpolation picks it up.
export TAG
.DEFAULT_GOAL := help
.PHONY: help build run up stop down logs log ps shell test push release clean
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) \
| awk 'BEGIN{FS=":.*?## "}{printf " \033[36m%-12s\033[0m %s\n", $$1, $$2}'
build: ## Build the mindthemath/rmbg image
$(COMPOSE) build
run up: ## Start the service (GPU) in the background
$(COMPOSE) up -d
stop down: ## Stop and remove the service container
$(COMPOSE) down
logs log: ## Follow service logs
$(COMPOSE) logs -f
ps: ## Show service status
$(COMPOSE) ps
shell: ## Open a shell inside a fresh container
$(COMPOSE) run --rm --entrypoint bash rmbg
test: ## Send INPUT to the running service, save OUTPUT
$(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \
--input $(INPUT) --output $(OUTPUT) --background $(BG)
push: build ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
docker push $(IMAGE):$(TAG)
release: build push ## Build then push mindthemath/rmbg:$(TAG)
clean: ## Stop the service and remove the built image
-$(COMPOSE) down
-docker image rm $(IMAGE):$(TAG)

31
build/compose.yml Normal file
View File

@ -0,0 +1,31 @@
# mindthemath/rmbg — runs the image built from the published (TestPyPI) package.
services:
rmbg:
build:
context: .
dockerfile: Dockerfile
image: mindthemath/rmbg:${TAG:-latest}
container_name: rmbg
ports:
- "${PORT:-8001}:8000"
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
# Default variant/resolution; both are also selectable per request.
- BIREFNET_MODEL=${BIREFNET_MODEL:-general}
- BIREFNET_RESOLUTION=${BIREFNET_RESOLUTION:-1024}
# Use the nvidia-container-runtime for GPU acceleration.
runtime: nvidia
volumes:
# Persist downloaded BiRefNet weights across container restarts.
- hf-cache:/app/hf_cache
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 15s
timeout: 5s
retries: 30
start_period: 180s
restart: unless-stopped
volumes:
hf-cache:

191
build/requirements.txt Normal file
View File

@ -0,0 +1,191 @@
# This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml -o build/requirements.txt
annotated-doc==0.0.4
# via fastapi
annotated-types==0.7.0
# via pydantic
anyio==4.13.0
# via
# starlette
# watchfiles
certifi==2026.4.22
# via requests
charset-normalizer==3.4.7
# via requests
click==8.3.3
# via uvicorn
cuda-bindings==12.9.6
# via torch
cuda-pathfinder==1.5.4
# via cuda-bindings
cuda-toolkit==12.6.3
# via torch
einops==0.8.2
# via rmbg-as-a-service (pyproject.toml)
fastapi==0.136.1
# via litserve
filelock==3.29.0
# via
# huggingface-hub
# torch
# transformers
fsspec==2026.4.0
# via
# huggingface-hub
# torch
h11==0.16.0
# via uvicorn
hf-xet==1.5.0
# via huggingface-hub
httptools==0.7.1
# via uvicorn
huggingface-hub==0.36.2
# via
# timm
# tokenizers
# transformers
idna==3.15
# via
# anyio
# requests
jinja2==3.1.6
# via torch
kornia==0.8.2
# via rmbg-as-a-service (pyproject.toml)
kornia-rs==0.1.11
# via kornia
litserve==0.2.17
# via rmbg-as-a-service (pyproject.toml)
markupsafe==3.0.3
# via jinja2
mpmath==1.3.0
# via sympy
networkx==3.6.1
# via torch
numpy==2.4.5
# via
# rmbg-as-a-service (pyproject.toml)
# torchvision
# transformers
nvidia-cublas-cu12==12.6.4.1
# via
# cuda-toolkit
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
nvidia-cuda-cupti-cu12==12.6.80
# via cuda-toolkit
nvidia-cuda-nvrtc-cu12==12.6.85
# via cuda-toolkit
nvidia-cuda-runtime-cu12==12.6.77
# via cuda-toolkit
nvidia-cudnn-cu12==9.10.2.21
# via torch
nvidia-cufft-cu12==11.3.0.4
# via cuda-toolkit
nvidia-cufile-cu12==1.11.1.6
# via cuda-toolkit
nvidia-curand-cu12==10.3.7.77
# via cuda-toolkit
nvidia-cusolver-cu12==11.7.1.2
# via cuda-toolkit
nvidia-cusparse-cu12==12.5.4.2
# via
# cuda-toolkit
# nvidia-cusolver-cu12
nvidia-cusparselt-cu12==0.7.1
# via torch
nvidia-nccl-cu12==2.28.9
# via torch
nvidia-nvjitlink-cu12==12.6.85
# via
# cuda-toolkit
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
nvidia-nvshmem-cu12==3.4.5
# via torch
nvidia-nvtx-cu12==12.6.77
# via cuda-toolkit
packaging==26.2
# via
# huggingface-hub
# kornia
# transformers
pillow==12.2.0
# via
# rmbg-as-a-service (pyproject.toml)
# torchvision
pydantic==2.13.4
# via fastapi
pydantic-core==2.46.4
# via pydantic
python-dotenv==1.2.2
# via uvicorn
pyyaml==6.0.3
# via
# huggingface-hub
# timm
# transformers
# uvicorn
pyzmq==27.1.0
# via litserve
regex==2026.5.9
# via transformers
requests==2.34.2
# via
# huggingface-hub
# transformers
safetensors==0.7.0
# via
# timm
# transformers
setuptools==81.0.0
# via torch
starlette==1.0.0
# via fastapi
sympy==1.14.0
# via torch
timm==1.0.27
# via rmbg-as-a-service (pyproject.toml)
tokenizers==0.22.2
# via transformers
torch==2.11.0+cu126
# via
# rmbg-as-a-service (pyproject.toml)
# kornia
# timm
# torchvision
torchvision==0.26.0+cu126
# via
# rmbg-as-a-service (pyproject.toml)
# timm
tqdm==4.67.3
# via
# huggingface-hub
# transformers
transformers==4.57.6
# via rmbg-as-a-service (pyproject.toml)
triton==3.6.0
# via torch
typing-extensions==4.15.0
# via
# fastapi
# huggingface-hub
# pydantic
# pydantic-core
# torch
# typing-inspection
typing-inspection==0.4.2
# via
# fastapi
# pydantic
urllib3==2.7.0
# via requests
uvicorn==0.47.0
# via litserve
uvloop==0.22.1
# via uvicorn
watchfiles==1.1.1
# via uvicorn
websockets==16.0
# via uvicorn

View File

@ -1,8 +1,8 @@
services:
birefnet:
rmbg:
build: .
image: birefnet-service:latest
container_name: birefnet-service
image: rmbg-as-a-service:latest
container_name: rmbg-as-a-service
ports:
- "${PORT:-8000}:8000"
environment:

View File

@ -1,12 +1,12 @@
[project]
name = "rmbg-as-a-service"
version = "0.1.0"
version = "0.0.3"
description = "Background removal as a GPU-accelerated API"
readme = "README.md"
requires-python = ">=3.12,<3.13"
requires-python = ">=3.12"
dependencies = [
"torch==2.5.1",
"torchvision==0.20.1",
"torch==2.11.0",
"torchvision==0.26.0",
"transformers>=4.44,<5",
"timm>=1.0.0",
"einops>=0.8.0",
@ -17,7 +17,7 @@ dependencies = [
]
[project.scripts]
birefnet-service = "birefnet_service.server:run"
rmbg-as-a-service = "rmbg_as_a_service.server:run"
[dependency-groups]
dev = ["ruff>=0.6.0"]
@ -27,17 +27,24 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/birefnet_service"]
packages = ["src/rmbg_as_a_service"]
[tool.hatch.build.targets.sdist]
include = [
"src/rmbg_as_a_service",
"README.md",
"pyproject.toml",
]
# BiRefNet (torch) needs CUDA wheels; pull torch/torchvision from the PyTorch index.
[[tool.uv.index]]
name = "pytorch-cu124"
url = "https://download.pytorch.org/whl/cu124"
name = "pytorch-cu126"
url = "https://download.pytorch.org/whl/cu126"
explicit = true
[tool.uv.sources]
torch = [{ index = "pytorch-cu124", marker = "sys_platform == 'linux'" }]
torchvision = [{ index = "pytorch-cu124", marker = "sys_platform == 'linux'" }]
torch = { index = "pytorch-cu126" }
torchvision = { index = "pytorch-cu126" }
[tool.ruff]
line-length = 100

View File

@ -1,61 +0,0 @@
"""Quick CPU vs MPS benchmark for BiRefNet HR at 2048."""
from __future__ import annotations
import argparse
import time
import torch
from PIL import Image
from birefnet_service.model import BiRefNetService
def bench(device: str, image: Image.Image, model: str, resolution: int, warmup: int, iters: int):
svc = BiRefNetService(device=device, default_model=model, default_resolution=resolution)
print(f"[{device}] loaded {model} @ {resolution}, runtime device={svc.device}")
for i in range(warmup):
t0 = time.perf_counter()
svc.remove_background(image, model=model, resolution=resolution)
if svc.device == "mps":
torch.mps.synchronize()
print(f" warmup {i + 1}: {time.perf_counter() - t0:.2f}s")
times = []
for i in range(iters):
t0 = time.perf_counter()
svc.remove_background(image, model=model, resolution=resolution)
if svc.device == "mps":
torch.mps.synchronize()
dt = time.perf_counter() - t0
times.append(dt)
print(f" run {i + 1}: {dt:.2f}s")
avg = sum(times) / len(times)
print(f"[{device}] avg={avg:.2f}s min={min(times):.2f}s best-of-{iters}")
return avg
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--input", default="test.jpg")
ap.add_argument("--model", default="HR")
ap.add_argument("--resolution", type=int, default=2048)
ap.add_argument("--devices", default="cpu,mps")
ap.add_argument("--warmup", type=int, default=1)
ap.add_argument("--iters", type=int, default=3)
args = ap.parse_args()
image = Image.open(args.input)
print(f"image: {args.input} {image.size} mode={image.mode}")
print(f"mps available: {torch.backends.mps.is_available()}")
results = {}
for d in args.devices.split(","):
d = d.strip()
results[d] = bench(d, image, args.model, args.resolution, args.warmup, args.iters)
if "cpu" in results and "mps" in results:
print(f"\nspeedup mps vs cpu: {results['cpu'] / results['mps']:.2f}x")
if __name__ == "__main__":
main()

View File

@ -1,425 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Background Removal &amp; Segmentation</title>
<style>
:root { color-scheme: dark; }
* { box-sizing: border-box; }
body {
margin: 0; font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif;
background: #15171c; color: #e8e8ea; padding: 24px;
}
h1 { font-size: 1.25rem; font-weight: 600; margin: 0 0 4px; }
.sub { color: #8a8f99; font-size: .85rem; margin-bottom: 16px; }
.wrap { max-width: 1100px; margin: 0 auto; }
.tabs { display: flex; gap: 4px; margin-bottom: 16px; border-bottom: 1px solid #2a2f3a; }
.tab { background: none; border: 0; color: #8a8f99; font-size: .9rem; font-weight: 600;
padding: 10px 16px; cursor: pointer; border-bottom: 2px solid transparent; }
.tab.active { color: #e8e8ea; border-bottom-color: #5b8cff; }
#drop {
border: 2px dashed #3a3f4b; border-radius: 12px; padding: 36px;
text-align: center; cursor: pointer; transition: border-color .15s, background .15s;
}
#drop.over { border-color: #5b8cff; background: #1c2230; }
#drop p { margin: 6px 0; color: #8a8f99; }
.controls { display: flex; gap: 12px; align-items: flex-end; margin: 14px 0; flex-wrap: wrap; }
.controls[hidden] { display: none; }
label.field { display: flex; flex-direction: column; gap: 4px; font-size: .72rem;
color: #8a8f99; text-transform: uppercase; letter-spacing: .04em; }
select, input[type=number], input[type=text] {
background: #2a2f3a; color: #e8e8ea; border: 1px solid #3a3f4b;
border-radius: 8px; padding: 8px 10px; font-size: .9rem;
}
input[type=number] { width: 78px; }
input[type=number]:disabled { opacity: .45; }
input[type=text]#prompt { width: 320px; }
.check { display: flex; align-items: center; gap: 6px; font-size: .85rem;
color: #e8e8ea; cursor: pointer; align-self: end; padding-bottom: 8px; }
.check input { width: 15px; height: 15px; accent-color: #5b8cff; cursor: pointer; }
/* help tooltips */
.help { display: inline-flex; align-items: center; justify-content: center;
width: 14px; height: 14px; margin-left: 5px; border-radius: 50%;
border: 1px solid #4a4f5b; color: #8a8f99; font-size: 9px; font-weight: 700;
font-style: normal; cursor: help; position: relative; vertical-align: middle; }
.help:hover { color: #e8e8ea; border-color: #5b8cff; }
.help:hover::after {
content: attr(data-tip); position: absolute; bottom: 150%; left: 50%;
transform: translateX(-50%); width: 220px; background: #0c0d11;
color: #d8d9dc; border: 1px solid #3a3f4b; border-radius: 6px;
padding: 7px 9px; font-size: .72rem; font-weight: 400; line-height: 1.4;
text-transform: none; letter-spacing: normal; white-space: normal;
z-index: 50; pointer-events: none; }
button.go {
background: #5b8cff; color: #fff; border: 0; border-radius: 8px;
padding: 10px 18px; font-size: .9rem; cursor: pointer; font-weight: 600;
}
button.go:disabled { background: #3a3f4b; cursor: not-allowed; }
button.ghost { background: #2a2f3a; color: #fff; border: 0; border-radius: 8px;
padding: 10px 18px; font-size: .9rem; cursor: pointer; font-weight: 600; }
.go-row { display: flex; gap: 12px; align-items: center; margin: 14px 0; flex-wrap: wrap; }
.status { color: #8a8f99; font-size: .85rem; }
.status.err { color: #ff6b6b; }
.hint { color: #6b7280; font-size: .78rem; margin: -4px 0 4px; }
.panels { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-top: 16px; }
.panel { background: #1c1f27; border-radius: 12px; padding: 12px; }
.panel h2 { font-size: .8rem; font-weight: 600; color: #8a8f99; margin: 0 0 8px;
text-transform: uppercase; letter-spacing: .05em; }
.imgbox {
min-height: 260px; display: flex; align-items: center; justify-content: center;
border-radius: 8px; overflow: hidden;
}
.checker {
background-image:
linear-gradient(45deg, #2a2f3a 25%, transparent 25%),
linear-gradient(-45deg, #2a2f3a 25%, transparent 25%),
linear-gradient(45deg, transparent 75%, #2a2f3a 75%),
linear-gradient(-45deg, transparent 75%, #2a2f3a 75%);
background-size: 22px 22px;
background-position: 0 0, 0 11px, 11px -11px, -11px 0;
background-color: #20242d;
}
.imgbox img { max-width: 100%; max-height: 70vh; display: block; }
.imgbox img[src] { cursor: zoom-in; }
@media (max-width: 720px) { .panels { grid-template-columns: 1fr; } }
/* lightbox */
.lightbox { position: fixed; inset: 0; z-index: 100; background: rgba(12,13,17,.97);
display: flex; align-items: center; justify-content: center; }
.lightbox[hidden] { display: none; }
.lb-stage { width: 100vw; height: 100vh; overflow: hidden;
display: flex; align-items: center; justify-content: center; }
.lb-stage img { max-width: 100vw; max-height: 100vh; transform-origin: 0 0;
cursor: grab; user-select: none; -webkit-user-drag: none; will-change: transform; }
.lb-stage.grabbing img { cursor: grabbing; }
.lb-bar { position: fixed; top: 0; left: 0; right: 0; padding: 14px 20px;
z-index: 2; display: flex; justify-content: space-between; align-items: center;
color: #8a8f99; font-size: .8rem; pointer-events: none; }
.lb-close { pointer-events: auto; background: #2a2f3a; color: #e8e8ea;
border: 1px solid #3a3f4b; border-radius: 8px; width: 34px; height: 34px;
font-size: 1rem; line-height: 1; padding: 0; cursor: pointer;
display: flex; align-items: center; justify-content: center; }
</style>
</head>
<body>
<div class="wrap">
<h1>Background Removal &amp; Segmentation</h1>
<div class="sub">Automatic removal, or prompt-conditioned segmentation.</div>
<div class="tabs">
<button class="tab active" data-tab="auto">Auto remove</button>
<button class="tab" data-tab="prompt">Prompt segment</button>
</div>
<div id="drop">
<p><strong>Drop an image here</strong> or click to choose</p>
<p id="fname">No file selected</p>
<input id="file" type="file" accept="image/*" hidden />
</div>
<!-- Auto (BiRefNet / RMBG-2.0) controls -->
<div class="controls" id="ctl-auto">
<label class="field">Model
<select id="model">
<option value="general">general — clean single subjects (fast)</option>
<option value="HR" selected>HR — large / detailed scenes</option>
<option value="portrait">portrait — people</option>
<option value="matting">matting — soft edges / hair</option>
<option value="lite">lite — fastest</option>
<option value="rmbg2">rmbg2 — BRIA RMBG-2.0</option>
</select>
</label>
<label class="field">Resolution
<select id="resolution">
<option value="1024">1024</option>
<option value="1536">1536</option>
<option value="2048">2048</option>
<option value="2560" selected>2560</option>
</select>
</label>
</div>
<!-- Prompt (GroundingDINO + SAM) controls -->
<div class="controls" id="ctl-prompt" hidden>
<label class="field">Prompt — what to keep
<input type="text" id="prompt" placeholder="e.g. the dog · cow. person." />
</label>
<label class="field">
<span>Box threshold<span class="help" data-tip="Minimum confidence for GroundingDINO to keep a detected box. Lower finds more (and looser) objects; higher keeps only strong matches.">?</span></span>
<input type="number" id="boxThr" value="0.3" min="0" max="1" step="0.05" />
</label>
<label class="field">
<span>Text threshold<span class="help" data-tip="How strongly a detection must match your prompt words. Lower = looser word matching; higher = stricter.">?</span></span>
<input type="number" id="textThr" value="0.25" min="0" max="1" step="0.05" />
</label>
</div>
<!-- Shared output controls -->
<div class="controls">
<label class="field">Background
<select id="background">
<option value="alpha" selected>transparent</option>
<option value="white">white</option>
<option value="black">black</option>
<option value="gray">gray</option>
<option value="green">green</option>
<option value="blue">blue</option>
<option value="red">red</option>
</select>
</label>
<label class="field">
<span>Edge offset (px)<span class="help" data-tip="Grow (+) or shrink () the cutout edge by N pixels. A small negative value trims a leftover background-colored fringe around hair or fur.">?</span></span>
<input type="number" id="maskOffset" value="0" min="-20" max="20" step="1" />
</label>
<label class="field">
<span>Feather (px)<span class="help" data-tip="Gaussian blur applied to the mask edge, in pixels. Softens the cutout for smoother compositing onto a new background.">?</span></span>
<input type="number" id="maskBlur" value="0" min="0" max="64" step="1" />
</label>
<label class="check"><input type="checkbox" id="crop" checked /> Crop to subject</label>
<label class="field">Margin (in)
<input type="number" id="cropMargin" value="0" min="0" step="0.1" />
</label>
</div>
<div class="hint" id="hint"></div>
<div class="go-row">
<button class="go" id="go" disabled>Remove background</button>
<a id="dl" download="cutout.png"><button id="dlbtn" class="ghost" disabled>Download PNG</button></a>
<span id="status" class="status"></span>
</div>
<div class="panels">
<div class="panel">
<h2>Original</h2>
<div class="imgbox"><img id="src" alt="" /></div>
</div>
<div class="panel">
<h2>Result</h2>
<div class="imgbox checker"><img id="out" alt="" /></div>
</div>
</div>
</div>
<div id="lightbox" class="lightbox" hidden>
<div class="lb-bar">
<span>scroll to zoom · drag to pan · double-click resets · Esc closes</span>
<button class="lb-close" id="lbClose" title="Close"></button>
</div>
<div class="lb-stage" id="lbStage"><img id="lbImg" alt="" /></div>
</div>
<script>
const drop = document.getElementById('drop');
const fileInput = document.getElementById('file');
const fname = document.getElementById('fname');
const go = document.getElementById('go');
const dl = document.getElementById('dl');
const dlbtn = document.getElementById('dlbtn');
const statusEl = document.getElementById('status');
const srcImg = document.getElementById('src');
const outImg = document.getElementById('out');
const hint = document.getElementById('hint');
const modelSel = document.getElementById('model');
const resSel = document.getElementById('resolution');
const promptInput = document.getElementById('prompt');
const boxThr = document.getElementById('boxThr');
const textThr = document.getElementById('textThr');
const bgSel = document.getElementById('background');
const maskOffset = document.getElementById('maskOffset');
const maskBlur = document.getElementById('maskBlur');
const cropChk = document.getElementById('crop');
const cropMargin = document.getElementById('cropMargin');
const ctlAuto = document.getElementById('ctl-auto');
const ctlPrompt = document.getElementById('ctl-prompt');
let selectedFile = null;
let tab = 'auto';
const HINTS = {
auto: 'Large or busy scenes segment best with HR at 2048+. The general model expects a clear single subject at 1024.',
prompt: 'Type what to keep, e.g. "the dog" (or several: "cow. person."). Lower the box threshold to detect more / fainter objects.',
};
function setTab(name) {
tab = name;
document.querySelectorAll('.tab').forEach(t =>
t.classList.toggle('active', t.dataset.tab === name));
ctlAuto.hidden = name !== 'auto';
ctlPrompt.hidden = name !== 'prompt';
go.textContent = name === 'auto' ? 'Remove background' : 'Segment';
hint.textContent = HINTS[name];
}
document.querySelectorAll('.tab').forEach(t =>
t.addEventListener('click', () => setTab(t.dataset.tab)));
cropChk.addEventListener('change', () => { cropMargin.disabled = !cropChk.checked; });
function setStatus(msg, isErr) {
statusEl.textContent = msg;
statusEl.className = 'status' + (isErr ? ' err' : '');
}
function pickFile(file) {
if (!file || !file.type.startsWith('image/')) {
setStatus('Please choose an image file.', true);
return;
}
selectedFile = file;
fname.textContent = file.name + ' (' + Math.round(file.size / 1024) + ' KB)';
srcImg.src = URL.createObjectURL(file);
outImg.removeAttribute('src');
dlbtn.disabled = true;
go.disabled = false;
setStatus('');
}
drop.addEventListener('click', () => fileInput.click());
fileInput.addEventListener('change', e => pickFile(e.target.files[0]));
['dragenter', 'dragover'].forEach(ev =>
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.add('over'); }));
['dragleave', 'drop'].forEach(ev =>
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.remove('over'); }));
drop.addEventListener('drop', e => pickFile(e.dataTransfer.files[0]));
function fileToBase64(file) {
return new Promise((resolve, reject) => {
const r = new FileReader();
r.onload = () => resolve(r.result.split(',')[1]); // strip data URL prefix
r.onerror = reject;
r.readAsDataURL(file);
});
}
// --- lightbox: click to inspect, scroll to zoom, drag to pan ---
const lightbox = document.getElementById('lightbox');
const lbStage = document.getElementById('lbStage');
const lbImg = document.getElementById('lbImg');
const lbClose = document.getElementById('lbClose');
let lbScale = 1, lbTx = 0, lbTy = 0, lbDrag = null;
function lbApply() {
lbImg.style.transform = `translate(${lbTx}px, ${lbTy}px) scale(${lbScale})`;
}
function lbReset() { lbScale = 1; lbTx = 0; lbTy = 0; lbApply(); }
function openLightbox(src, isResult) {
if (!src) return;
lbImg.src = src;
lbImg.classList.toggle('checker', !!isResult);
lbReset();
lightbox.hidden = false;
}
function closeLightbox() { lightbox.hidden = true; lbImg.removeAttribute('src'); }
srcImg.addEventListener('click', () => openLightbox(srcImg.getAttribute('src'), false));
outImg.addEventListener('click', () => openLightbox(outImg.getAttribute('src'), true));
lbClose.addEventListener('click', closeLightbox);
lightbox.addEventListener('mousedown', e => {
if (e.target === lightbox || e.target === lbStage) closeLightbox();
});
document.addEventListener('keydown', e => {
if (e.key === 'Escape' && !lightbox.hidden) closeLightbox();
});
lbStage.addEventListener('wheel', e => {
e.preventDefault();
const rect = lbImg.getBoundingClientRect();
const cx = e.clientX - rect.left, cy = e.clientY - rect.top;
const factor = e.deltaY < 0 ? 1.2 : 1 / 1.2;
const newScale = Math.min(8, Math.max(1, lbScale * factor));
const ratio = newScale / lbScale;
lbTx -= cx * (ratio - 1);
lbTy -= cy * (ratio - 1);
lbScale = newScale;
if (lbScale === 1) { lbTx = 0; lbTy = 0; }
lbApply();
}, { passive: false });
lbImg.addEventListener('mousedown', e => {
e.preventDefault();
lbDrag = { x: e.clientX, y: e.clientY, tx: lbTx, ty: lbTy };
lbStage.classList.add('grabbing');
});
window.addEventListener('mousemove', e => {
if (!lbDrag) return;
lbTx = lbDrag.tx + (e.clientX - lbDrag.x);
lbTy = lbDrag.ty + (e.clientY - lbDrag.y);
lbApply();
});
window.addEventListener('mouseup', () => {
lbDrag = null;
lbStage.classList.remove('grabbing');
});
lbImg.addEventListener('dblclick', e => { e.preventDefault(); lbReset(); });
go.addEventListener('click', async () => {
if (!selectedFile) return;
if (tab === 'prompt' && !promptInput.value.trim()) {
setStatus('Enter a prompt describing what to keep.', true);
return;
}
go.disabled = true;
dlbtn.disabled = true;
setStatus('Processing… (first use of a model downloads its weights)');
const t0 = performance.now();
try {
const b64 = await fileToBase64(selectedFile);
const shared = {
image: b64,
background: bgSel.value,
mask_offset: parseInt(maskOffset.value, 10) || 0,
mask_blur: parseInt(maskBlur.value, 10) || 0,
crop: cropChk.checked,
crop_margin: parseFloat(cropMargin.value) || 0,
};
let endpoint, body;
if (tab === 'auto') {
endpoint = '/predict';
body = { ...shared, model: modelSel.value, resolution: parseInt(resSel.value, 10) };
} else {
endpoint = '/segment';
body = { ...shared, prompt: promptInput.value.trim(),
box_threshold: parseFloat(boxThr.value) || 0.3,
text_threshold: parseFloat(textThr.value) || 0.25 };
}
const resp = await fetch(endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
if (!resp.ok) throw new Error('HTTP ' + resp.status + ': ' + (await resp.text()));
const data = await resp.json();
const dataUrl = 'data:image/png;base64,' + data.image;
outImg.src = dataUrl;
dl.href = dataUrl;
dl.download = selectedFile.name.replace(/\.[^.]+$/, '') + '.png';
dlbtn.disabled = false;
const secs = ((performance.now() - t0) / 1000).toFixed(1);
if (tab === 'auto') {
setStatus(`Done — ${data.width}×${data.height} · ${data.model} @ ${data.resolution} · ${secs}s`);
} else {
const n = data.detections;
setStatus(`Done — ${n} object${n === 1 ? '' : 's'} matched "${data.prompt}" · ` +
`${data.width}×${data.height} · ${secs}s` +
(n === 0 ? ' (try a lower box threshold)' : ''));
}
} catch (err) {
setStatus(err.message || String(err), true);
} finally {
go.disabled = false;
}
});
setTab('auto');
</script>
</body>
</html>

View File

@ -130,15 +130,11 @@ class BiRefNetService:
default_model: str = DEFAULT_MODEL,
default_resolution: int = DEFAULT_RESOLUTION,
):
want_cuda = device != "cpu" and torch.cuda.is_available()
if device and device not in ("auto", "cpu"):
self.device = device
elif device != "cpu" and torch.cuda.is_available():
self.device = "cuda"
elif device != "cpu" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
self.device = "mps"
else:
self.device = "cpu"
# fp16 is reliable on CUDA; on MPS it can introduce NaNs in BiRefNet — keep fp32.
self.device = "cuda" if want_cuda else "cpu"
self.use_half = self.device.startswith("cuda")
self.default_model = default_model
self.default_resolution = default_resolution

View File

@ -41,13 +41,15 @@ from pathlib import Path
import litserve as ls
from fastapi import HTTPException
from fastapi.responses import HTMLResponse
from fastapi.responses import HTMLResponse, Response
from PIL import Image, ImageOps
from .model import BiRefNetService
from .prompt_segment import PromptSegmenter
_UI_HTML = (Path(__file__).parent / "static" / "index.html").read_text(encoding="utf-8")
_STATIC = Path(__file__).parent / "static"
_UI_HTML = (_STATIC / "index.html").read_text(encoding="utf-8")
_UI_CSS = (_STATIC / "styles.css").read_text(encoding="utf-8")
# Lazily-created prompt segmenter (DINO + SAM), shared by the /segment route.
_segmenter: PromptSegmenter | None = None
@ -132,6 +134,10 @@ def run() -> None:
def index() -> str:
return _UI_HTML
@server.app.get("/styles.css")
def styles() -> Response:
return Response(_UI_CSS, media_type="text/css")
@server.app.post("/segment")
def segment(payload: dict) -> dict:
"""Prompt-conditioned segmentation (GroundingDINO + SAM)."""

View File

@ -0,0 +1,524 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>RMBG_SERVICE // BACKGROUND REMOVAL</title>
<link rel="stylesheet" href="/styles.css" />
</head>
<body>
<div class="container" id="container">
<button class="sidebar-toggle" id="sidebarToggle" title="Toggle panel" aria-label="Toggle panel">&#8249;</button>
<!-- ============ CONTROL SIDEBAR ============ -->
<aside class="controls panel">
<header class="app-header">
<div class="app-title">RMBG_SERVICE</div>
<div class="app-sub">[ BACKGROUND REMOVAL // SEGMENTATION ]</div>
</header>
<!-- upload -->
<section class="upload-section">
<div class="upload-box" id="drop">
<input id="file" type="file" accept="image/*" />
<div class="upload-text">&#9650; DROP IMAGE</div>
<div class="upload-hint" id="fname">NO FILE SELECTED</div>
<div class="upload-stats" id="fstats"></div>
<img class="preview-thumb" id="thumb" alt="" hidden />
</div>
</section>
<!-- mode -->
<div class="control-group">
<div class="control-label">// MODE</div>
<div class="radio-row">
<label><input type="radio" name="mode" value="auto" checked /> AUTO REMOVE</label>
<label><input type="radio" name="mode" value="prompt" /> PROMPT SEGMENT</label>
</div>
</div>
<!-- AUTO controls -->
<div class="control-group" id="ctl-auto">
<div class="control-label">// MODEL</div>
<div class="select-wrap">
<select id="model">
<option value="general">GENERAL &mdash; CLEAN SINGLE SUBJECTS (FAST)</option>
<option value="HR" selected>HR &mdash; LARGE / DETAILED SCENES</option>
<option value="portrait">PORTRAIT &mdash; PEOPLE</option>
<option value="matting">MATTING &mdash; SOFT EDGES / HAIR</option>
<option value="lite">LITE &mdash; FASTEST</option>
<option value="rmbg2">RMBG2 &mdash; BRIA RMBG-2.0</option>
</select>
</div>
<div class="control-label">// RESOLUTION</div>
<div class="select-wrap">
<select id="resolution">
<option value="1024">1024</option>
<option value="1536">1536</option>
<option value="2048" selected>2048</option>
<option value="2560">2560</option>
</select>
</div>
</div>
<!-- PROMPT controls -->
<div class="control-group" id="ctl-prompt">
<div class="control-label">// PROMPT &mdash; WHAT TO KEEP</div>
<input type="text" id="prompt" placeholder="e.g. THE DOG . COW. PERSON." />
<div class="slider-container">
<div class="control-label">BOX THRESHOLD
<span class="help" data-tip="Minimum confidence for GroundingDINO to keep a detected box. Lower finds more (and looser) objects; higher keeps only strong matches.">?</span>
</div>
<input type="range" class="slider" id="boxThr" min="0" max="1" step="0.05" value="0.3" />
<div class="slider-value" id="boxThrVal">0.30</div>
</div>
<div class="slider-container">
<div class="control-label">TEXT THRESHOLD
<span class="help" data-tip="How strongly a detection must match your prompt words. Lower = looser word matching; higher = stricter.">?</span>
</div>
<input type="range" class="slider" id="textThr" min="0" max="1" step="0.05" value="0.25" />
<div class="slider-value" id="textThrVal">0.25</div>
</div>
</div>
<!-- OUTPUT drawer -->
<div class="control-group drawer" id="outputDrawer">
<div class="drawer-header">
<span class="drawer-caret">&#9656;</span>
<span class="section-title">OUTPUT</span>
</div>
<div class="drawer-content">
<div class="control-label">// BACKGROUND</div>
<div class="select-wrap">
<select id="background">
<option value="alpha" selected>TRANSPARENT</option>
<option value="white">WHITE</option>
<option value="black">BLACK</option>
<option value="gray">GRAY</option>
<option value="green">GREEN</option>
<option value="blue">BLUE</option>
<option value="red">RED</option>
</select>
</div>
<div class="slider-container">
<div class="control-label">EDGE OFFSET <span class="unit">PX</span>
<span class="help" data-tip="Grow (+) or shrink (-) the cutout edge by N pixels. A small negative value trims a leftover background-colored fringe around hair or fur.">?</span>
</div>
<input type="range" class="slider" id="maskOffset" min="-20" max="20" step="1" value="0" />
<div class="slider-value" id="maskOffsetVal">0</div>
</div>
<div class="slider-container">
<div class="control-label">FEATHER <span class="unit">PX</span>
<span class="help" data-tip="Gaussian blur applied to the mask edge, in pixels. Softens the cutout for smoother compositing onto a new background.">?</span>
</div>
<input type="range" class="slider" id="maskBlur" min="0" max="64" step="1" value="0" />
<div class="slider-value" id="maskBlurVal">0</div>
</div>
<label class="check-row"><input type="checkbox" id="crop" checked /> CROP TO SUBJECT</label>
<div class="slider-container" id="cropMarginWrap">
<div class="control-label">CROP MARGIN <span class="unit">IN</span></div>
<input type="range" class="slider" id="cropMargin" min="0" max="2" step="0.25" value="0" />
<div class="slider-value" id="cropMarginVal">0.00</div>
</div>
</div>
</div>
<!-- actions -->
<div class="action-row">
<button class="action-btn primary" id="go" disabled>REMOVE BACKGROUND</button>
<a id="dl" download="cutout.png"><button class="action-btn" id="dlbtn" disabled>&#9660; DOWNLOAD PNG</button></a>
</div>
<button class="reset-btn" id="reset">&#10005; CLEAR</button>
<div class="status" id="status"></div>
</aside>
<!-- ============ PREVIEW ============ -->
<main class="preview-container panel">
<div class="preview-tabs">
<button class="preview-tab active" id="tabInput" data-view="input" disabled>INPUT [I]</button>
<button class="preview-tab" id="tabOutput" data-view="output" disabled>OUTPUT [O]</button>
</div>
<div class="preview-stage">
<div class="no-preview" id="noPreview">[ NO IMAGE LOADED ]</div>
<img class="preview-canvas" id="src" alt="" hidden />
<img class="preview-canvas checker" id="out" alt="" hidden />
</div>
</main>
</div>
<!-- lightbox -->
<div id="lightbox" class="lightbox" hidden>
<div class="lb-bar">
<span>SCROLL / +&minus; ZOOM &middot; DRAG OR HJKL / ARROWS PAN &middot; 0 RESETS &middot; F / Z / ESC CLOSE</span>
<button class="lb-close" id="lbClose" title="Close">&#10005;</button>
</div>
<div class="lb-stage" id="lbStage"><img id="lbImg" alt="" /></div>
</div>
<script>
const $ = id => document.getElementById(id);
const container = $('container');
const sidebarToggle = $('sidebarToggle');
const drop = $('drop');
const fileInput = $('file');
const fname = $('fname');
const fstats = $('fstats');
const thumb = $('thumb');
const go = $('go');
const dl = $('dl');
const dlbtn = $('dlbtn');
const statusEl = $('status');
const reset = $('reset');
const srcImg = $('src');
const outImg = $('out');
const noPreview = $('noPreview');
const modelSel = $('model');
const resSel = $('resolution');
const promptInput= $('prompt');
const boxThr = $('boxThr');
const textThr = $('textThr');
const bgSel = $('background');
const maskOffset = $('maskOffset');
const maskBlur = $('maskBlur');
const cropChk = $('crop');
const cropMargin = $('cropMargin');
const cropMarginWrap = $('cropMarginWrap');
const ctlAuto = $('ctl-auto');
const ctlPrompt = $('ctl-prompt');
const tabInput = $('tabInput');
const tabOutput = $('tabOutput');
let selectedFile = null;
let mode = 'auto';
/* ---- mode ---- */
function setGroupEnabled(group, enabled) {
group.classList.toggle('disabled', !enabled);
group.querySelectorAll('input, select').forEach(el => { el.disabled = !enabled; });
}
function setMode(m) {
mode = m;
setGroupEnabled(ctlAuto, m === 'auto');
setGroupEnabled(ctlPrompt, m === 'prompt');
go.textContent = m === 'auto' ? 'REMOVE BACKGROUND' : 'SEGMENT';
}
document.querySelectorAll('input[name="mode"]').forEach(r =>
r.addEventListener('change', () => setMode(r.value)));
/* ---- drawers ---- */
document.querySelectorAll('.drawer-header').forEach(h =>
h.addEventListener('click', () => h.closest('.drawer').classList.toggle('open')));
/* ---- sidebar toggle (button + 'b' key) ---- */
function toggleSidebar() {
const collapsed = container.classList.toggle('sidebar-collapsed');
sidebarToggle.innerHTML = collapsed ? '&#8250;' : '&#8249;';
}
sidebarToggle.addEventListener('click', toggleSidebar);
/* ---- sliders ---- */
function wireSlider(input, out, fmt) {
const update = () => { out.textContent = fmt(input.value); };
input.addEventListener('input', update);
update();
}
wireSlider(boxThr, $('boxThrVal'), v => (+v).toFixed(2));
wireSlider(textThr, $('textThrVal'), v => (+v).toFixed(2));
wireSlider(maskOffset, $('maskOffsetVal'), v => (v > 0 ? '+' : '') + v);
wireSlider(maskBlur, $('maskBlurVal'), v => String(v));
wireSlider(cropMargin, $('cropMarginVal'), v => (+v).toFixed(2));
function syncCrop() {
cropMarginWrap.classList.toggle('disabled', !cropChk.checked);
}
cropChk.addEventListener('change', syncCrop);
/* ---- status ---- */
function setStatus(msg, isErr) {
statusEl.textContent = msg;
statusEl.className = 'status' + (isErr ? ' err' : '');
}
/* ---- preview tabs ---- */
function refreshTabs() {
const hasSrc = !!srcImg.getAttribute('src');
const hasOut = !!outImg.getAttribute('src');
tabInput.disabled = !hasSrc;
tabOutput.disabled = !hasOut;
noPreview.hidden = hasSrc || hasOut;
}
function showView(v) {
if ((v === 'input' && tabInput.disabled) ||
(v === 'output' && tabOutput.disabled)) return;
tabInput.classList.toggle('active', v === 'input');
tabOutput.classList.toggle('active', v === 'output');
srcImg.hidden = v !== 'input';
outImg.hidden = v !== 'output';
}
document.querySelectorAll('.preview-tab').forEach(t =>
t.addEventListener('click', () => showView(t.dataset.view)));
/* ---- file handling ---- */
function pickFile(file) {
if (!file || !file.type.startsWith('image/')) {
setStatus('PLEASE CHOOSE AN IMAGE FILE.', true);
return;
}
selectedFile = file;
const url = URL.createObjectURL(file);
const kb = Math.round(file.size / 1024);
fname.textContent = file.name;
fstats.textContent = kb + ' KB';
thumb.src = url;
thumb.hidden = false;
drop.classList.add('has-file');
srcImg.onload = () => {
fstats.textContent =
srcImg.naturalWidth + '×' + srcImg.naturalHeight + ' PX · ' + kb + ' KB';
};
srcImg.src = url;
outImg.removeAttribute('src');
dlbtn.disabled = true;
go.disabled = false;
setStatus('');
refreshTabs();
showView('input');
}
fileInput.addEventListener('change', e => pickFile(e.target.files[0]));
['dragenter', 'dragover'].forEach(ev =>
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.add('dragover'); }));
['dragleave', 'drop'].forEach(ev =>
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.remove('dragover'); }));
drop.addEventListener('drop', e => pickFile(e.dataTransfer.files[0]));
reset.addEventListener('click', () => {
selectedFile = null;
fileInput.value = '';
fname.textContent = 'NO FILE SELECTED';
fstats.textContent = '';
thumb.removeAttribute('src');
thumb.hidden = true;
drop.classList.remove('has-file');
srcImg.removeAttribute('src'); srcImg.hidden = true;
outImg.removeAttribute('src'); outImg.hidden = true;
go.disabled = true;
dlbtn.disabled = true;
setStatus('');
refreshTabs();
tabInput.classList.add('active');
tabOutput.classList.remove('active');
});
function fileToBase64(file) {
return new Promise((resolve, reject) => {
const r = new FileReader();
r.onload = () => resolve(r.result.split(',')[1]);
r.onerror = reject;
r.readAsDataURL(file);
});
}
/* ---- lightbox ---- */
const lightbox = $('lightbox');
const lbStage = $('lbStage');
const lbImg = $('lbImg');
const lbClose = $('lbClose');
let lbScale = 1, lbTx = 0, lbTy = 0, lbDrag = null;
function lbApply() { lbImg.style.transform = `translate(${lbTx}px, ${lbTy}px) scale(${lbScale})`; }
function lbReset() { lbScale = 1; lbTx = 0; lbTy = 0; lbApply(); }
function openLightbox(src, isResult) {
if (!src) return;
lbImg.src = src;
lbImg.classList.toggle('checker', !!isResult);
lbReset();
lightbox.hidden = false;
}
function closeLightbox() { lightbox.hidden = true; lbImg.removeAttribute('src'); }
/* which preview image is currently shown */
function currentImg() {
return tabOutput.classList.contains('active') ? outImg : srcImg;
}
/* 'f' / 'z' — open the zoom view for the visible image, or close it */
function toggleZoom() {
if (!lightbox.hidden) { closeLightbox(); return; }
const img = currentImg();
const src = img.getAttribute('src');
if (src) openLightbox(src, img === outImg);
}
/* keyboard zoom — anchored on the centre of the stage */
function lbZoom(factor) {
const stageRect = lbStage.getBoundingClientRect();
const imgRect = lbImg.getBoundingClientRect();
const cx = stageRect.left + stageRect.width / 2 - imgRect.left;
const cy = stageRect.top + stageRect.height / 2 - imgRect.top;
const newScale = Math.min(8, Math.max(1, lbScale * factor));
const ratio = newScale / lbScale;
lbTx -= cx * (ratio - 1);
lbTy -= cy * (ratio - 1);
lbScale = newScale;
if (lbScale === 1) { lbTx = 0; lbTy = 0; }
lbApply();
}
/* keyboard pan — dx/dy in {-1,0,1}; only meaningful while zoomed in */
function lbPan(dx, dy) {
if (lbScale <= 1) return;
lbTx += dx * 80;
lbTy += dy * 80;
lbApply();
}
srcImg.addEventListener('click', () => openLightbox(srcImg.getAttribute('src'), false));
outImg.addEventListener('click', () => openLightbox(outImg.getAttribute('src'), true));
lbClose.addEventListener('click', closeLightbox);
lightbox.addEventListener('mousedown', e => {
if (e.target === lightbox || e.target === lbStage) closeLightbox();
});
document.addEventListener('keydown', e => {
if (e.metaKey || e.ctrlKey || e.altKey) return;
const k = e.key.toLowerCase();
// --- zoom view: capture all navigation keys while it's open ---
if (!lightbox.hidden) {
if (e.key === 'Escape' || k === 'f' || k === 'z') closeLightbox();
else if (k === '+' || k === '=') lbZoom(1.25);
else if (k === '-' || k === '_') lbZoom(1 / 1.25);
else if (k === '0') lbReset();
else if (e.key === 'ArrowLeft' || k === 'h') lbPan( 1, 0);
else if (e.key === 'ArrowRight' || k === 'l') lbPan(-1, 0);
else if (e.key === 'ArrowUp' || k === 'k') lbPan( 0, 1);
else if (e.key === 'ArrowDown' || k === 'j') lbPan( 0, -1);
else return;
e.preventDefault();
return;
}
// --- main UI shortcuts (ignored while typing in a field) ---
const t = e.target;
if (t && (t.tagName === 'INPUT' || t.tagName === 'TEXTAREA' || t.tagName === 'SELECT')) return;
if (k === 'b') toggleSidebar();
else if (k === 'u') fileInput.click();
else if (k === 'i') showView('input');
else if (k === 'o') showView('output');
else if (k === 'f' || k === 'z') toggleZoom();
else if (k === 's' && !dlbtn.disabled) dl.click();
});
lbStage.addEventListener('wheel', e => {
e.preventDefault();
const rect = lbImg.getBoundingClientRect();
const cx = e.clientX - rect.left, cy = e.clientY - rect.top;
const factor = e.deltaY < 0 ? 1.2 : 1 / 1.2;
const newScale = Math.min(8, Math.max(1, lbScale * factor));
const ratio = newScale / lbScale;
lbTx -= cx * (ratio - 1);
lbTy -= cy * (ratio - 1);
lbScale = newScale;
if (lbScale === 1) { lbTx = 0; lbTy = 0; }
lbApply();
}, { passive: false });
lbImg.addEventListener('mousedown', e => {
e.preventDefault();
lbDrag = { x: e.clientX, y: e.clientY, tx: lbTx, ty: lbTy };
lbStage.classList.add('grabbing');
});
window.addEventListener('mousemove', e => {
if (!lbDrag) return;
lbTx = lbDrag.tx + (e.clientX - lbDrag.x);
lbTy = lbDrag.ty + (e.clientY - lbDrag.y);
lbApply();
});
window.addEventListener('mouseup', () => {
lbDrag = null;
lbStage.classList.remove('grabbing');
});
lbImg.addEventListener('dblclick', e => { e.preventDefault(); lbReset(); });
/* ---- run ---- */
go.addEventListener('click', async () => {
if (!selectedFile) return;
if (mode === 'prompt' && !promptInput.value.trim()) {
setStatus('ENTER A PROMPT DESCRIBING WHAT TO KEEP.', true);
return;
}
go.disabled = true;
dlbtn.disabled = true;
setStatus('PROCESSING… (FIRST USE OF A MODEL DOWNLOADS ITS WEIGHTS)');
const t0 = performance.now();
try {
const b64 = await fileToBase64(selectedFile);
const shared = {
image: b64,
background: bgSel.value,
mask_offset: parseInt(maskOffset.value, 10) || 0,
mask_blur: parseInt(maskBlur.value, 10) || 0,
crop: cropChk.checked,
crop_margin: parseFloat(cropMargin.value) || 0,
};
let endpoint, body;
if (mode === 'auto') {
endpoint = '/predict';
body = { ...shared, model: modelSel.value, resolution: parseInt(resSel.value, 10) };
} else {
endpoint = '/segment';
body = { ...shared, prompt: promptInput.value.trim(),
box_threshold: parseFloat(boxThr.value) || 0.3,
text_threshold: parseFloat(textThr.value) || 0.25 };
}
const resp = await fetch(endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
if (!resp.ok) throw new Error('HTTP ' + resp.status + ': ' + (await resp.text()));
const data = await resp.json();
const dataUrl = 'data:image/png;base64,' + data.image;
outImg.src = dataUrl;
dl.href = dataUrl;
dl.download = selectedFile.name.replace(/\.[^.]+$/, '') + '.png';
dlbtn.disabled = false;
refreshTabs();
showView('output');
const secs = ((performance.now() - t0) / 1000).toFixed(1);
if (mode === 'auto') {
setStatus(`DONE — ${data.width}×${data.height} · ${data.model} @ ${data.resolution} · ${secs}S`);
} else {
const n = data.detections;
setStatus(`DONE — ${n} OBJECT${n === 1 ? '' : 'S'} MATCHED "${data.prompt}" · ` +
`${data.width}×${data.height} · ${secs}S` +
(n === 0 ? ' (TRY A LOWER BOX THRESHOLD)' : ''));
}
} catch (err) {
setStatus(err.message || String(err), true);
} finally {
go.disabled = false;
}
});
/* ---- init ---- */
setMode('auto');
syncCrop();
refreshTabs();
</script>
</body>
</html>

File diff suppressed because it is too large Load Diff

929
uv.lock generated

File diff suppressed because it is too large Load Diff