# rmbg-as-a-service — slim CUDA inference image.
# torch's +cu126 wheels bundle their own CUDA/cuDNN, so no nvidia/cuda base is needed;
# the GPU driver is injected by the nvidia container runtime at run time.

# ---- builder: install everything into an isolated venv ----
FROM python:3.12-slim AS builder

ENV PIP_DISABLE_PIP_VERSION_CHECK=1

RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:${PATH}"

# rmbg-as-a-service from PyPI; CUDA torch from the PyTorch index (its +cu126 build
# outranks the plain wheel by local-version ordering).
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install \
        --extra-index-url https://download.pytorch.org/whl/cu126 \
        rmbg-as-a-service==0.0.3 hf-transfer

# ---- runtime: just Python + the prepared venv ----
FROM python:3.12-slim

ENV PYTHONUNBUFFERED=1 \
    HF_HOME=/app/hf_cache \
    HF_HUB_ENABLE_HF_TRANSFER=1 \
    PORT=8000 \
    PATH="/opt/venv/bin:${PATH}"

COPY --from=builder /opt/venv /opt/venv

EXPOSE 8000
CMD ["rmbg-as-a-service"]
