more keybindings

This commit is contained in:
Michael Pilosov 2026-05-16 22:23:04 -06:00
parent befd3caa1d
commit a544572e8a
4 changed files with 82 additions and 8 deletions

View File

@ -30,15 +30,37 @@ while a model downloads and loads.
### Web UI
Open **http://localhost:8000/** — a two-tab test page (handy over SSH):
Open **http://localhost:8000/** — a single-page test app (handy over SSH):
- **Auto remove** — pick a model variant + resolution.
- **Prompt segment** — type what to keep (e.g. `the dog`), tune the
GroundingDINO box / text thresholds.
Both tabs support a transparency checkerboard preview, click-to-zoom lightbox,
Both modes support a transparency checkerboard preview, click-to-zoom lightbox,
optional crop-to-subject, and download.
#### Keyboard shortcuts
The UI is fully keyboard-drivable. Shortcuts are ignored while typing in a
field and while Ctrl/Cmd/Alt is held.
| Key | Action |
|---------------------|-----------------------------------------------|
| `B` | Toggle the controls sidebar |
| `U` | Open the file picker to upload an image |
| `I` / `O` | Show the input / output image |
| `F` / `Z` | Open the zoom view for the visible image |
| `S` | Save (download PNG), once a result exists |
In the zoom view:
| Key | Action |
|---------------------------|-----------------------------------------|
| `F` / `Z` / `Esc` | Close the zoom view |
| `+` / `-` | Zoom in / out (1×8×) |
| `0` | Reset zoom & pan |
| Arrows or `H` `J` `K` `L` | Pan (while zoomed past 1×) |
## API
### `POST /predict` — automatic background removal
@ -108,7 +130,7 @@ make dev # uv sync + run the server locally
src/rmbg_as_a_service/model.py BiRefNet / RMBG-2.0 wrapper + compositing
src/rmbg_as_a_service/prompt_segment.py GroundingDINO + SAM pipeline
src/rmbg_as_a_service/server.py LitServe /predict + /segment + web UI
src/rmbg_as_a_service/static/ web UI (index.html)
src/rmbg_as_a_service/static/ web UI (index.html + styles.css)
scripts/client.py stdlib-only test client
Dockerfile / compose.yml CUDA image + nvidia runtime
Makefile build / run / test shortcuts

View File

@ -1,6 +1,6 @@
[project]
name = "rmbg-as-a-service"
version = "0.0.2"
version = "0.0.3dev1"
description = "Background removal as a GPU-accelerated API"
readme = "README.md"
requires-python = ">=3.12"

View File

@ -161,7 +161,7 @@
<!-- lightbox -->
<div id="lightbox" class="lightbox" hidden>
<div class="lb-bar">
<span>SCROLL TO ZOOM &middot; DRAG TO PAN &middot; DOUBLE-CLICK RESETS &middot; ESC CLOSES</span>
<span>SCROLL / +&minus; ZOOM &middot; DRAG OR HJKL / ARROWS PAN &middot; 0 RESETS &middot; F / Z / ESC CLOSE</span>
<button class="lb-close" id="lbClose" title="Close">&#10005;</button>
</div>
<div class="lb-stage" id="lbStage"><img id="lbImg" alt="" /></div>
@ -350,6 +350,39 @@ function openLightbox(src, isResult) {
}
function closeLightbox() { lightbox.hidden = true; lbImg.removeAttribute('src'); }
/* which preview image is currently shown */
function currentImg() {
return tabOutput.classList.contains('active') ? outImg : srcImg;
}
/* 'f' / 'z' — open the zoom view for the visible image, or close it */
function toggleZoom() {
if (!lightbox.hidden) { closeLightbox(); return; }
const img = currentImg();
const src = img.getAttribute('src');
if (src) openLightbox(src, img === outImg);
}
/* keyboard zoom — anchored on the centre of the stage */
function lbZoom(factor) {
const stageRect = lbStage.getBoundingClientRect();
const imgRect = lbImg.getBoundingClientRect();
const cx = stageRect.left + stageRect.width / 2 - imgRect.left;
const cy = stageRect.top + stageRect.height / 2 - imgRect.top;
const newScale = Math.min(8, Math.max(1, lbScale * factor));
const ratio = newScale / lbScale;
lbTx -= cx * (ratio - 1);
lbTy -= cy * (ratio - 1);
lbScale = newScale;
if (lbScale === 1) { lbTx = 0; lbTy = 0; }
lbApply();
}
/* keyboard pan — dx/dy in {-1,0,1}; only meaningful while zoomed in */
function lbPan(dx, dy) {
if (lbScale <= 1) return;
lbTx += dx * 80;
lbTy += dy * 80;
lbApply();
}
srcImg.addEventListener('click', () => openLightbox(srcImg.getAttribute('src'), false));
outImg.addEventListener('click', () => openLightbox(outImg.getAttribute('src'), true));
lbClose.addEventListener('click', closeLightbox);
@ -357,14 +390,33 @@ lightbox.addEventListener('mousedown', e => {
if (e.target === lightbox || e.target === lbStage) closeLightbox();
});
document.addEventListener('keydown', e => {
if (e.key === 'Escape' && !lightbox.hidden) closeLightbox();
if (e.metaKey || e.ctrlKey || e.altKey) return;
const k = e.key.toLowerCase();
// --- zoom view: capture all navigation keys while it's open ---
if (!lightbox.hidden) {
if (e.key === 'Escape' || k === 'f' || k === 'z') closeLightbox();
else if (k === '+' || k === '=') lbZoom(1.25);
else if (k === '-' || k === '_') lbZoom(1 / 1.25);
else if (k === '0') lbReset();
else if (e.key === 'ArrowLeft' || k === 'h') lbPan( 1, 0);
else if (e.key === 'ArrowRight' || k === 'l') lbPan(-1, 0);
else if (e.key === 'ArrowUp' || k === 'k') lbPan( 0, 1);
else if (e.key === 'ArrowDown' || k === 'j') lbPan( 0, -1);
else return;
e.preventDefault();
return;
}
// --- main UI shortcuts (ignored while typing in a field) ---
const t = e.target;
if (t && (t.tagName === 'INPUT' || t.tagName === 'TEXTAREA' || t.tagName === 'SELECT')) return;
const k = e.key.toLowerCase();
if (k === 'b') toggleSidebar();
else if (k === 'u') fileInput.click();
else if (k === 'i') showView('input');
else if (k === 'o') showView('output');
else if (k === 'f' || k === 'z') toggleZoom();
else if (k === 's' && !dlbtn.disabled) dl.click();
});
lbStage.addEventListener('wheel', e => {

2
uv.lock generated
View File

@ -1163,7 +1163,7 @@ wheels = [
[[package]]
name = "rmbg-as-a-service"
version = "0.0.2"
version = "0.0.3.dev1"
source = { editable = "." }
dependencies = [
{ name = "einops" },