smaller runtime image

2026-05-16 23:30:58 -06:00 · 2026-05-16 23:30:58 -06:00 · 0b53dade06
commit 0b53dade06
parent 27846f661e
3 changed files with 34 additions and 38 deletions
--- a/20
+++ b/20
@ -1,10 +1,11 @@
-# BiRefNet background removal service — CUDA 12.6 runtime image.
+# BiRefNet background removal service — CUDA 12.6 inference image.
-FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu22.04
+# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is
 # needed; the GPU driver is injected by the nvidia container runtime at run time.
 FROM python:3.12-slim
-ENV DEBIAN_FRONTEND=noninteractive \
+ENV PYTHONUNBUFFERED=1 \
    PYTHONUNBUFFERED=1 \
    UV_PYTHON_INSTALL_DIR=/opt/python \
    UV_PROJECT_ENVIRONMENT=/app/.venv \
    UV_PYTHON_PREFERENCE=only-system \
    UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy \
    HF_HOME=/app/hf_cache \
@ -13,18 +14,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
 # uv: fast, reproducible Python + dependency management.
 COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/
 RUN apt-get update \
    && apt-get install -y --no-install-recommends ca-certificates \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
-# Install Python + dependencies first so this layer is cached across code changes.
+# Install dependencies first so this layer is cached across code changes.
 # The BuildKit cache mount keeps the uv download cache warm across rebuilds.
 COPY pyproject.toml uv.lock ./
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv python install 3.12 \
+    uv sync --no-install-project --no-dev --frozen
    && uv sync --no-install-project --no-dev --frozen
 # Application code.
 COPY src ./src
--- a/build/Dockerfile
+++ b/build/Dockerfile
@ -1,32 +1,32 @@
-# BiRefNet background removal service — installs the published package on the CUDA 12.6 runtime.
+# rmbg-as-a-service — slim CUDA inference image.
-# Same CUDA base family as the main ../Dockerfile, but installs rmbg-as-a-service from TestPyPI
+# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed;
-# with a plain pip rather than building from the local source tree.
+# the GPU driver is injected by the nvidia container runtime at run time.
 FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu24.04
-ENV DEBIAN_FRONTEND=noninteractive \
+# ---- builder: install everything into an isolated venv ----
-    PYTHONUNBUFFERED=1 \
+FROM python:3.12-slim AS builder
-    PIP_BREAK_SYSTEM_PACKAGES=1 \
+
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:${PATH}"
 # rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build
 # outranks the plain wheel by local-version ordering).
 RUN --mount=type=cache,target=/root/.cache/pip \
    pip install \
        --extra-index-url https://download.pytorch.org/whl/cu126 \
        rmbg-as-a-service==0.0.3 hf-transfer
 # ---- runtime: just Python + the prepared venv ----
 FROM python:3.12-slim
 ENV PYTHONUNBUFFERED=1 \
    HF_HOME=/app/hf_cache \
    HF_HUB_ENABLE_HF_TRANSFER=1 \
-    PORT=8000
+    PORT=8000 \
    PATH="/opt/venv/bin:${PATH}"
-# Ubuntu 24.04 ships Python 3.12 (the project requires >=3.12).
+COPY --from=builder /opt/venv /opt/venv
 RUN apt-get update \
    && apt-get install -y --no-install-recommends python3 python3-pip ca-certificates \
    && rm -rf /var/lib/apt/lists/*
 # Fetch only the rmbg-as-a-service wheel from TestPyPI (--no-deps), so TestPyPI never takes
 # part in dependency resolution -- otherwise junk squatted packages there (e.g. "FASTAPI")
 # outrank the real ones. Then install it with real PyPI as the index, CUDA torch from the
 # PyTorch index (its +cu126 build outranks the plain wheel by local-version ordering).
 RUN --mount=type=cache,target=/root/.cache/pip \
    pip download --no-deps --dest /tmp/pkg \
        --index-url https://test.pypi.org/simple/ \
        "rmbg-as-a-service==0.0.3" \
    && pip install \
        --extra-index-url https://download.pytorch.org/whl/cu126 \
        /tmp/pkg/*.whl hf-transfer \
    && rm -rf /tmp/pkg
 EXPOSE 8000
 CMD ["rmbg-as-a-service"]
--- a/build/Makefile
+++ b/build/Makefile
@ -43,7 +43,7 @@ test: ## Send INPUT to the running service, save OUTPUT
 	$(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \
 		--input $(INPUT) --output $(OUTPUT) --background $(BG)
-push: ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
+push: build ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
 	docker push $(IMAGE):$(TAG)
 release: build push ## Build then push mindthemath/rmbg:$(TAG)