smaller runtime image

This commit is contained in:
Michael Pilosov 2026-05-16 23:30:58 -06:00
parent 27846f661e
commit 0b53dade06
3 changed files with 34 additions and 38 deletions

View File

@ -1,10 +1,11 @@
# BiRefNet background removal service — CUDA 12.6 runtime image. # BiRefNet background removal service — CUDA 12.6 inference image.
FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu22.04 # torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is
# needed; the GPU driver is injected by the nvidia container runtime at run time.
FROM python:3.12-slim
ENV DEBIAN_FRONTEND=noninteractive \ ENV PYTHONUNBUFFERED=1 \
PYTHONUNBUFFERED=1 \
UV_PYTHON_INSTALL_DIR=/opt/python \
UV_PROJECT_ENVIRONMENT=/app/.venv \ UV_PROJECT_ENVIRONMENT=/app/.venv \
UV_PYTHON_PREFERENCE=only-system \
UV_COMPILE_BYTECODE=1 \ UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy \ UV_LINK_MODE=copy \
HF_HOME=/app/hf_cache \ HF_HOME=/app/hf_cache \
@ -13,18 +14,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
# uv: fast, reproducible Python + dependency management. # uv: fast, reproducible Python + dependency management.
COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app WORKDIR /app
# Install Python + dependencies first so this layer is cached across code changes. # Install dependencies first so this layer is cached across code changes.
# The BuildKit cache mount keeps the uv download cache warm across rebuilds. # The BuildKit cache mount keeps the uv download cache warm across rebuilds.
COPY pyproject.toml uv.lock ./ COPY pyproject.toml uv.lock ./
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv python install 3.12 \ uv sync --no-install-project --no-dev --frozen
&& uv sync --no-install-project --no-dev --frozen
# Application code. # Application code.
COPY src ./src COPY src ./src

View File

@ -1,32 +1,32 @@
# BiRefNet background removal service — installs the published package on the CUDA 12.6 runtime. # rmbg-as-a-service — slim CUDA inference image.
# Same CUDA base family as the main ../Dockerfile, but installs rmbg-as-a-service from TestPyPI # torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed;
# with a plain pip rather than building from the local source tree. # the GPU driver is injected by the nvidia container runtime at run time.
FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu24.04
ENV DEBIAN_FRONTEND=noninteractive \ # ---- builder: install everything into an isolated venv ----
PYTHONUNBUFFERED=1 \ FROM python:3.12-slim AS builder
PIP_BREAK_SYSTEM_PACKAGES=1 \
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:${PATH}"
# rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build
# outranks the plain wheel by local-version ordering).
RUN --mount=type=cache,target=/root/.cache/pip \
pip install \
--extra-index-url https://download.pytorch.org/whl/cu126 \
rmbg-as-a-service==0.0.3 hf-transfer
# ---- runtime: just Python + the prepared venv ----
FROM python:3.12-slim
ENV PYTHONUNBUFFERED=1 \
HF_HOME=/app/hf_cache \ HF_HOME=/app/hf_cache \
HF_HUB_ENABLE_HF_TRANSFER=1 \ HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=8000 PORT=8000 \
PATH="/opt/venv/bin:${PATH}"
# Ubuntu 24.04 ships Python 3.12 (the project requires >=3.12). COPY --from=builder /opt/venv /opt/venv
RUN apt-get update \
&& apt-get install -y --no-install-recommends python3 python3-pip ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Fetch only the rmbg-as-a-service wheel from TestPyPI (--no-deps), so TestPyPI never takes
# part in dependency resolution -- otherwise junk squatted packages there (e.g. "FASTAPI")
# outrank the real ones. Then install it with real PyPI as the index, CUDA torch from the
# PyTorch index (its +cu126 build outranks the plain wheel by local-version ordering).
RUN --mount=type=cache,target=/root/.cache/pip \
pip download --no-deps --dest /tmp/pkg \
--index-url https://test.pypi.org/simple/ \
"rmbg-as-a-service==0.0.3" \
&& pip install \
--extra-index-url https://download.pytorch.org/whl/cu126 \
/tmp/pkg/*.whl hf-transfer \
&& rm -rf /tmp/pkg
EXPOSE 8000 EXPOSE 8000
CMD ["rmbg-as-a-service"] CMD ["rmbg-as-a-service"]

View File

@ -43,7 +43,7 @@ test: ## Send INPUT to the running service, save OUTPUT
$(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \ $(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \
--input $(INPUT) --output $(OUTPUT) --background $(BG) --input $(INPUT) --output $(OUTPUT) --background $(BG)
push: ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login) push: build ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login)
docker push $(IMAGE):$(TAG) docker push $(IMAGE):$(TAG)
release: build push ## Build then push mindthemath/rmbg:$(TAG) release: build push ## Build then push mindthemath/rmbg:$(TAG)