diff --git a/Dockerfile b/Dockerfile
index 8fb4c5d..4ce5c2a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,10 +1,11 @@
-# BiRefNet background removal service — CUDA 12.6 runtime image.
-FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu22.04
+# BiRefNet background removal service — CUDA 12.6 inference image.
+# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is
+# needed; the GPU driver is injected by the nvidia container runtime at run time.
+FROM python:3.12-slim
 
-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    UV_PYTHON_INSTALL_DIR=/opt/python \
+ENV PYTHONUNBUFFERED=1 \
     UV_PROJECT_ENVIRONMENT=/app/.venv \
+    UV_PYTHON_PREFERENCE=only-system \
     UV_COMPILE_BYTECODE=1 \
     UV_LINK_MODE=copy \
     HF_HOME=/app/hf_cache \
@@ -13,18 +14,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
 # uv: fast, reproducible Python + dependency management.
 COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/
 
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
 WORKDIR /app
 
-# Install Python + dependencies first so this layer is cached across code changes.
+# Install dependencies first so this layer is cached across code changes.
 # The BuildKit cache mount keeps the uv download cache warm across rebuilds.
 COPY pyproject.toml uv.lock ./
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv python install 3.12 \
-    && uv sync --no-install-project --no-dev --frozen
+    uv sync --no-install-project --no-dev --frozen
 
 # Application code.
 COPY src ./src
diff --git a/build/Dockerfile b/build/Dockerfile
index c3fea0f..b0b57cc 100644
--- a/build/Dockerfile
+++ b/build/Dockerfile
@@ -1,32 +1,32 @@
-# BiRefNet background removal service — installs the published package on the CUDA 12.6 runtime.
-# Same CUDA base family as the main ../Dockerfile, but installs rmbg-as-a-service from TestPyPI
-# with a plain pip rather than building from the local source tree.
-FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu24.04
+# rmbg-as-a-service — slim CUDA inference image.
+# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed;
+# the GPU driver is injected by the nvidia container runtime at run time.
 
-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    PIP_BREAK_SYSTEM_PACKAGES=1 \
+# ---- builder: install everything into an isolated venv ----
+FROM python:3.12-slim AS builder
+
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:${PATH}"
+
+# rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build
+# outranks the plain wheel by local-version ordering).
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install \
+        --extra-index-url https://download.pytorch.org/whl/cu126 \
+        rmbg-as-a-service==0.0.3 hf-transfer
+
+# ---- runtime: just Python + the prepared venv ----
+FROM python:3.12-slim
+
+ENV PYTHONUNBUFFERED=1 \
     HF_HOME=/app/hf_cache \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
-    PORT=8000
+    PORT=8000 \
+    PATH="/opt/venv/bin:${PATH}"
 
-# Ubuntu 24.04 ships Python 3.12 (the project requires >=3.12).
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends python3 python3-pip ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
-# Fetch only the rmbg-as-a-service wheel from TestPyPI (--no-deps), so TestPyPI never takes
-# part in dependency resolution -- otherwise junk squatted packages there (e.g. "FASTAPI")
-# outrank the real ones. Then install it with real PyPI as the index, CUDA torch from the
-# PyTorch index (its +cu126 build outranks the plain wheel by local-version ordering).
-RUN --mount=type=cache,target=/root/.cache/pip \
-    pip download --no-deps --dest /tmp/pkg \
-        --index-url https://test.pypi.org/simple/ \
-        "rmbg-as-a-service==0.0.3" \
-    && pip install \
-        --extra-index-url https://download.pytorch.org/whl/cu126 \
-        /tmp/pkg/*.whl hf-transfer \
-    && rm -rf /tmp/pkg
+COPY --from=builder /opt/venv /opt/venv
 
 EXPOSE 8000
 CMD ["rmbg-as-a-service"]
diff --git a/build/Makefile b/build/Makefile
index 0a59b9e..edd9706 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -43,7 +43,7 @@ test: ## Send INPUT to the running service, save OUTPUT
 	$(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \
 		--input $(INPUT) --output $(OUTPUT) --background $(BG)
 
-push: ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
+push: build ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
 	docker push $(IMAGE):$(TAG)
 
 release: build push ## Build then push mindthemath/rmbg:$(TAG)