smaller runtime image

2026-05-16 23:30:58 -06:00 · 2026-05-16 23:30:58 -06:00 · 0b53dade06
commit 0b53dade06
parent 27846f661e
3 changed files with 34 additions and 38 deletions
--- a/20
+++ b/20
@ -1,10 +1,11 @@
-# BiRefNet background removal service — CUDA 12.6 runtime image.
-FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu22.04
+# BiRefNet background removal service — CUDA 12.6 inference image.
+# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is
+# needed; the GPU driver is injected by the nvidia container runtime at run time.
+FROM python:3.12-slim

-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    UV_PYTHON_INSTALL_DIR=/opt/python \
+ENV PYTHONUNBUFFERED=1 \
    UV_PROJECT_ENVIRONMENT=/app/.venv \
+    UV_PYTHON_PREFERENCE=only-system \
    UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy \
    HF_HOME=/app/hf_cache \
@ -13,18 +14,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
 # uv: fast, reproducible Python + dependency management.
 COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/

-RUN apt-get update \
-    && apt-get install -y --no-install-recommends ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
 WORKDIR /app

-# Install Python + dependencies first so this layer is cached across code changes.
+# Install dependencies first so this layer is cached across code changes.
 # The BuildKit cache mount keeps the uv download cache warm across rebuilds.
 COPY pyproject.toml uv.lock ./
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv python install 3.12 \
-    && uv sync --no-install-project --no-dev --frozen
+    uv sync --no-install-project --no-dev --frozen

 # Application code.
 COPY src ./src
--- a/build/Dockerfile
+++ b/build/Dockerfile
@ -1,32 +1,32 @@
-# BiRefNet background removal service — installs the published package on the CUDA 12.6 runtime.
-# Same CUDA base family as the main ../Dockerfile, but installs rmbg-as-a-service from TestPyPI
-# with a plain pip rather than building from the local source tree.
-FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu24.04
+# rmbg-as-a-service — slim CUDA inference image.
+# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed;
+# the GPU driver is injected by the nvidia container runtime at run time.

-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    PIP_BREAK_SYSTEM_PACKAGES=1 \
+# ---- builder: install everything into an isolated venv ----
+FROM python:3.12-slim AS builder
+
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:${PATH}"
+
+# rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build
+# outranks the plain wheel by local-version ordering).
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install \
+        --extra-index-url https://download.pytorch.org/whl/cu126 \
+        rmbg-as-a-service==0.0.3 hf-transfer
+
+# ---- runtime: just Python + the prepared venv ----
+FROM python:3.12-slim
+
+ENV PYTHONUNBUFFERED=1 \
    HF_HOME=/app/hf_cache \
    HF_HUB_ENABLE_HF_TRANSFER=1 \
-    PORT=8000
+    PORT=8000 \
+    PATH="/opt/venv/bin:${PATH}"

-# Ubuntu 24.04 ships Python 3.12 (the project requires >=3.12).
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends python3 python3-pip ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
-# Fetch only the rmbg-as-a-service wheel from TestPyPI (--no-deps), so TestPyPI never takes
-# part in dependency resolution -- otherwise junk squatted packages there (e.g. "FASTAPI")
-# outrank the real ones. Then install it with real PyPI as the index, CUDA torch from the
-# PyTorch index (its +cu126 build outranks the plain wheel by local-version ordering).
-RUN --mount=type=cache,target=/root/.cache/pip \
-    pip download --no-deps --dest /tmp/pkg \
-        --index-url https://test.pypi.org/simple/ \
-        "rmbg-as-a-service==0.0.3" \
-    && pip install \
-        --extra-index-url https://download.pytorch.org/whl/cu126 \
-        /tmp/pkg/*.whl hf-transfer \
-    && rm -rf /tmp/pkg
+COPY --from=builder /opt/venv /opt/venv

 EXPOSE 8000
 CMD ["rmbg-as-a-service"]
--- a/build/Makefile
+++ b/build/Makefile
@ -43,7 +43,7 @@ test: ## Send INPUT to the running service, save OUTPUT
 	$(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \
 		--input $(INPUT) --output $(OUTPUT) --background $(BG)

-push: ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
+push: build ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
 	docker push $(IMAGE):$(TAG)

 release: build push ## Build then push mindthemath/rmbg:$(TAG)