Spaces:

stephenebert
/

Image_Tagger

Sleeping

App Files Files Community

stephenebert commited on Aug 13, 2025

Commit

0e857c8

verified ·

1 Parent(s): cb8b918

Update tagger.py

Browse files

Files changed (1) hide show

tagger.py +72 -63

tagger.py CHANGED Viewed

@@ -1,83 +1,92 @@
 from __future__ import annotations
-"""
-Caption with BLIP and derive simple tags (no POS/NLTK).
-- Tags are first unique non-stopword tokens from the caption.
-- Sidecar saved to ./data/<stem>.json
-"""
 import os
-import datetime as _dt
-import json as _json
-import pathlib as _pl
-import re as _re
-from typing import List, Tuple
-import torch
 from PIL import Image
 from transformers import BlipForConditionalGeneration, BlipProcessor
-# Writable sidecar directory (writable on Spaces)
-CAP_TAG_DIR = _pl.Path(os.environ.get("CAP_TAG_DIR", "./data")).resolve()
-CAP_TAG_DIR.mkdir(parents=True, exist_ok=True)
-# Device + singletons
-_device = "cuda" if torch.cuda.is_available() else "cpu"
-_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-_model = BlipForConditionalGeneration.from_pretrained(
-    "Salesforce/blip-image-captioning-base"
-).to(_device)
-# very small stopword set to clean tags
 _STOP = {
-    "a","an","the","and","or","but","if","then","so","to","from",
-    "of","in","on","at","by","for","with","without","into","out",
-    "is","are","was","were","be","being","been","it","its","this",
-    "that","these","those","as","over","under","near","above","below",
-    "up","down","left","right"
 }
-def _caption_to_tags(caption: str, k: int) -> List[str]:
-    tokens = _re.findall(r"[a-z0-9-]+", caption.lower())
-    out, seen = [], set()
-    for w in tokens:
-        if len(w) <= 2 or w in _STOP:
             continue
-        if w not in seen:
-            out.append(w)
-            seen.add(w)
-            if len(out) >= k:
-                break
-    return out
 def tag_pil_image(
     img: Image.Image,
     stem: str,
     *,
     top_k: int = 5,
-) -> Tuple[str, List[str]]:
-    # sanitize stem for filesystem
-    safe_stem = _re.sub(r"[^A-Za-z0-9_.-]+", "_", stem) or "upload"
-    # caption
-    inputs = _processor(images=img, return_tensors="pt")
-    if _device == "cuda":
-        inputs = {k: v.to(_device) for k, v in inputs.items()}
-    with torch.inference_mode():
-        ids = _model.generate(**inputs, max_length=30)
-    caption = _processor.decode(ids[0], skip_special_tokens=True)
-    # tags
-    tags = _caption_to_tags(caption, top_k)
-    # sidecar
-    payload = {
-        "caption": caption,
-        "tags": tags,
-        "timestamp": _dt.datetime.now(_dt.timezone.utc).isoformat(),
-    }
-    (CAP_TAG_DIR / f"{safe_stem}.json").write_text(_json.dumps(payload, indent=2))
-    return caption, tags

 from __future__ import annotations
+import json
 import os
+import re
+from pathlib import Path
+from typing import List, Optional
 from PIL import Image
 from transformers import BlipForConditionalGeneration, BlipProcessor
+# -------------------- config --------------------
+MODEL_ID = "Salesforce/blip-image-captioning-base"
+DATA_DIR = Path(os.getenv("DATA_DIR", "/app/data"))
+DATA_DIR.mkdir(parents=True, exist_ok=True)  # safe if already exists
+# light, built-in stopword list (keeps us NLTK-free)
 _STOP = {
+    "a", "an", "the", "and", "or", "of", "to", "in", "on", "with", "near",
+    "at", "over", "under", "by", "from", "for", "into", "along", "through",
+    "is", "are", "be", "being", "been", "it", "its", "this", "that",
+    "as", "while", "than", "then", "there", "here",
 }
+# -------------------- model cache --------------------
+_processor: Optional[BlipProcessor] = None
+_model: Optional[BlipForConditionalGeneration] = None
+def init_models() -> None:
+    """Load BLIP once (idempotent)."""
+    global _processor, _model
+    if _processor is None or _model is None:
+        _processor = BlipProcessor.from_pretrained(MODEL_ID)
+        _model = BlipForConditionalGeneration.from_pretrained(MODEL_ID)
+# -------------------- core functionality --------------------
+def caption_image(img: Image.Image, max_len: int = 30) -> str:
+    """Generate a short caption for the image."""
+    assert _processor and _model, "Call init_models() first"
+    inputs = _processor(images=img, return_tensors="pt")
+    ids = _model.generate(**inputs, max_length=max_len)
+    return _processor.decode(ids[0], skip_special_tokens=True)
+_TAG_RE = re.compile(r"[a-z0-9-]+")
+def caption_to_tags(caption: str, top_k: int = 5) -> List[str]:
+    """
+    Convert a caption into up to K simple tags:
+    - normalize to lowercase alnum/hyphen tokens
+    - remove tiny stopword list
+    - keep order of appearance, dedup
+    """
+    tags: List[str] = []
+    seen = set()
+    for tok in _TAG_RE.findall(caption.lower()):
+        if tok in _STOP or tok in seen:
             continue
+        seen.add(tok)
+        tags.append(tok)
+        if len(tags) >= top_k:
+            break
+    return tags
 def tag_pil_image(
     img: Image.Image,
     stem: str,
     *,
     top_k: int = 5,
+    write_sidecar: bool = True,
+) -> List[str]:
+    """
+    Return ONLY the tags list.
+    (We optionally persist a sidecar JSON with caption + tags.)
+    """
+    cap = caption_image(img)
+    tags = caption_to_tags(cap, top_k=top_k)
+    if write_sidecar:
+        payload = {"caption": cap, "tags": tags}
+        sidecar = DATA_DIR / f"{stem}.json"
+        try:
+            sidecar.write_text(json.dumps(payload, indent=2))
+        except Exception:
+            # best-effort; tagging should still succeed
+            pass
+    return tags