diff --git a/Dockerfile b/Dockerfile index 8fb4c5d..4ce5c2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,11 @@ -# BiRefNet background removal service — CUDA 12.6 runtime image. -FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu22.04 +# BiRefNet background removal service — CUDA 12.6 inference image. +# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is +# needed; the GPU driver is injected by the nvidia container runtime at run time. +FROM python:3.12-slim -ENV DEBIAN_FRONTEND=noninteractive \ - PYTHONUNBUFFERED=1 \ - UV_PYTHON_INSTALL_DIR=/opt/python \ +ENV PYTHONUNBUFFERED=1 \ UV_PROJECT_ENVIRONMENT=/app/.venv \ + UV_PYTHON_PREFERENCE=only-system \ UV_COMPILE_BYTECODE=1 \ UV_LINK_MODE=copy \ HF_HOME=/app/hf_cache \ @@ -13,18 +14,13 @@ ENV DEBIAN_FRONTEND=noninteractive \ # uv: fast, reproducible Python + dependency management. COPY --from=ghcr.io/astral-sh/uv:0.9 /uv /uvx /bin/ -RUN apt-get update \ - && apt-get install -y --no-install-recommends ca-certificates \ - && rm -rf /var/lib/apt/lists/* - WORKDIR /app -# Install Python + dependencies first so this layer is cached across code changes. +# Install dependencies first so this layer is cached across code changes. # The BuildKit cache mount keeps the uv download cache warm across rebuilds. COPY pyproject.toml uv.lock ./ RUN --mount=type=cache,target=/root/.cache/uv \ - uv python install 3.12 \ - && uv sync --no-install-project --no-dev --frozen + uv sync --no-install-project --no-dev --frozen # Application code. COPY src ./src diff --git a/build/Dockerfile b/build/Dockerfile index c3fea0f..b0b57cc 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -1,32 +1,32 @@ -# BiRefNet background removal service — installs the published package on the CUDA 12.6 runtime. -# Same CUDA base family as the main ../Dockerfile, but installs rmbg-as-a-service from TestPyPI -# with a plain pip rather than building from the local source tree. -FROM nvidia/cuda:12.6.1-cudnn-runtime-ubuntu24.04 +# rmbg-as-a-service — slim CUDA inference image. +# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed; +# the GPU driver is injected by the nvidia container runtime at run time. -ENV DEBIAN_FRONTEND=noninteractive \ - PYTHONUNBUFFERED=1 \ - PIP_BREAK_SYSTEM_PACKAGES=1 \ +# ---- builder: install everything into an isolated venv ---- +FROM python:3.12-slim AS builder + +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 + +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:${PATH}" + +# rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build +# outranks the plain wheel by local-version ordering). +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install \ + --extra-index-url https://download.pytorch.org/whl/cu126 \ + rmbg-as-a-service==0.0.3 hf-transfer + +# ---- runtime: just Python + the prepared venv ---- +FROM python:3.12-slim + +ENV PYTHONUNBUFFERED=1 \ HF_HOME=/app/hf_cache \ HF_HUB_ENABLE_HF_TRANSFER=1 \ - PORT=8000 + PORT=8000 \ + PATH="/opt/venv/bin:${PATH}" -# Ubuntu 24.04 ships Python 3.12 (the project requires >=3.12). -RUN apt-get update \ - && apt-get install -y --no-install-recommends python3 python3-pip ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -# Fetch only the rmbg-as-a-service wheel from TestPyPI (--no-deps), so TestPyPI never takes -# part in dependency resolution -- otherwise junk squatted packages there (e.g. "FASTAPI") -# outrank the real ones. Then install it with real PyPI as the index, CUDA torch from the -# PyTorch index (its +cu126 build outranks the plain wheel by local-version ordering). -RUN --mount=type=cache,target=/root/.cache/pip \ - pip download --no-deps --dest /tmp/pkg \ - --index-url https://test.pypi.org/simple/ \ - "rmbg-as-a-service==0.0.3" \ - && pip install \ - --extra-index-url https://download.pytorch.org/whl/cu126 \ - /tmp/pkg/*.whl hf-transfer \ - && rm -rf /tmp/pkg +COPY --from=builder /opt/venv /opt/venv EXPOSE 8000 CMD ["rmbg-as-a-service"] diff --git a/build/Makefile b/build/Makefile index 0a59b9e..edd9706 100644 --- a/build/Makefile +++ b/build/Makefile @@ -43,7 +43,7 @@ test: ## Send INPUT to the running service, save OUTPUT $(PYTHON) ../scripts/client.py --url http://localhost:$(PORT) \ --input $(INPUT) --output $(OUTPUT) --background $(BG) -push: ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login) +push: build ## Push mindthemath/rmbg:$(TAG) to Docker Hub (needs docker login) docker push $(IMAGE):$(TAG) release: build push ## Build then push mindthemath/rmbg:$(TAG)