from fastapi import FastAPI, File, UploadFile, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import Response import httpx import asyncio import os import base64 import logging logger = logging.getLogger("lampdra") app = FastAPI(title="Lampdra Router", version="2.0") ALLOWED_ORIGINS = [ "https://lampdra.com", "https://www.lampdra.com", "https://paul1k-lampdra-router.hf.space", ] app.add_middleware( CORSMiddleware, allow_origins=ALLOWED_ORIGINS, allow_methods=["*"], allow_headers=["*"], ) # ── API Keys ─────────────────────────────────────────────────────── API_KEY = os.environ.get("LAMPDRA_API_KEY", "") INTERNAL_API_KEY = os.environ.get("INTERNAL_API_KEY", "") HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "") def verify_key(x_api_key: str = Header(default="")): if not API_KEY or x_api_key != API_KEY: raise HTTPException(status_code=401, detail="Invalid API key") # ── Worker Spaces ────────────────────────────────────────────────── WORKER_SPACES = [ "https://paul1k-lampdra-api.hf.space", "https://paul1k-lampdra-api-2.hf.space", "https://paul1k-lampdra-api-3.hf.space", "https://paul1k-lampdra-api-4.hf.space", "https://paul1k-lampdra-api-5.hf.space", ] MAX_WAIT_SECONDS = 300 POLL_INTERVAL = 1.0 REQUEST_TIMEOUT = 180.0 CACHE_REFRESH = 1.0 MAX_SLOTS_PER_SPACE = 2 _http_client: httpx.AsyncClient | None = None _availability: dict = {url: 0 for url in WORKER_SPACES} HOLD_SECONDS = 15.0 _prev_avail : dict = {url: 0 for url in WORKER_SPACES} _hold_until : dict = {url: 0.0 for url in WORKER_SPACES} async def _ping(client: httpx.AsyncClient, url: str) -> int: try: r = await client.get(f"{url}/", timeout=5.0) return int(r.json().get("pool_available", 0)) except Exception: return -1 async def _refresh_loop(): while True: await asyncio.sleep(CACHE_REFRESH) try: results = await asyncio.gather( *[_ping(_http_client, url) for url in WORKER_SPACES], return_exceptions=True ) now = asyncio.get_event_loop().time() for url, result in zip(WORKER_SPACES, results): ping_val = result if isinstance(result, int) else -1 new_val = max(ping_val, 0) if _prev_avail[url] == -1 and ping_val > 0: _hold_until[url] = now + HOLD_SECONDS _prev_avail[url] = ping_val _availability[url] = new_val except Exception: pass _rr_index = 0 def _pick_round_robin() -> str | None: global _rr_index now = asyncio.get_event_loop().time() n = len(WORKER_SPACES) for i in range(n): idx = (_rr_index + i) % n url = WORKER_SPACES[idx] if _hold_until[url] > now: continue if _availability[url] > 0: _rr_index = (idx + 1) % n return url return None @app.on_event("startup") async def startup(): global _http_client _http_client = httpx.AsyncClient( limits=httpx.Limits(max_connections=50, max_keepalive_connections=20), timeout=httpx.Timeout(REQUEST_TIMEOUT), ) results = await asyncio.gather( *[_ping(_http_client, url) for url in WORKER_SPACES], return_exceptions=True ) for url, result in zip(WORKER_SPACES, results): ping_val = result if isinstance(result, int) else -1 _availability[url] = max(ping_val, 0) _prev_avail[url] = ping_val asyncio.create_task(_refresh_loop()) @app.on_event("shutdown") async def shutdown(): if _http_client: await _http_client.aclose() @app.get("/") @app.head("/") async def root(): return { "status" : "Lampdra Router is running", "total_free_slots": sum(_availability.values()), "total_capacity" : len(WORKER_SPACES) * MAX_SLOTS_PER_SPACE, "spaces" : [ {"url": url, "pool_available": slots} for url, slots in _availability.items() ], } @app.post("/remove-background") async def remove_background( file : UploadFile = File(...), x_api_key : str = Header(default=""), origin : str = Header(default="") ): ALLOWED_POST_ORIGINS = { "https://lampdra.com", "https://www.lampdra.com", } if origin not in ALLOWED_POST_ORIGINS: raise HTTPException(status_code=403, detail="Forbidden") verify_key(x_api_key) contents = await file.read() waited = 0.0 target_url = None while waited < MAX_WAIT_SECONDS: target_url = _pick_round_robin() if target_url: _availability[target_url] = max(0, _availability[target_url] - 1) break await asyncio.sleep(POLL_INTERVAL) waited += POLL_INTERVAL if not target_url: raise HTTPException(status_code=503, detail="All processing slots are busy. Please try again shortly.") try: response = await _http_client.post( url = f"{target_url}/remove-background", files = {"file": (file.filename, contents, file.content_type)}, headers = {"x-api-key": INTERNAL_API_KEY, "origin": "https://paul1k-lampdra-router.hf.space"}, timeout = REQUEST_TIMEOUT, ) except httpx.TimeoutException: _availability[target_url] = min(MAX_SLOTS_PER_SPACE, _availability[target_url] + 1) raise HTTPException(status_code=504, detail="Worker timed out. Please try again.") except httpx.RequestError: _availability[target_url] = 0 raise HTTPException(status_code=502, detail="Could not reach worker. Please try again.") if response.status_code != 200: raise HTTPException(status_code=response.status_code, detail=response.text) return Response(content=response.content, media_type="image/webp") # ── /analyze-image ───────────────────────────────────────────────── # Uses HF Inference Providers via router.huggingface.co/v1 # (OpenAI-compatible endpoint). # # Key lessons learned: # - api-inference.huggingface.co is DEAD → 404 on all models # - router.huggingface.co/hf-inference/... does NOT support vision models # - Correct URL: router.huggingface.co/v1/chat/completions # - Provider is selected via ":provider" suffix on the model name # - Featherless-AI and Cohere both support VLMs; we try both in order # HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions" # Models verified against live HF hub (inference=warm, image-text-to-text). # Hyperbolic free tier blocks vision models; featherless needs special auth. # Cohere confirmed working. Groq Llama-4-Scout also confirmed live. HF_VISION_MODELS = [ "CohereLabs/aya-vision-32b:cohere", # confirmed working "meta-llama/Llama-4-Scout-17B-16E-Instruct:groq", # groq — fast, free vision ] HF_SYSTEM_PROMPT = ( "You are an image analysis assistant. " "When given an image respond with EXACTLY two lines and nothing else:\n" "CAPTION: \n" "LABEL: " ) HF_CALL_TIMEOUT = 60.0 async def _analyze_with_vision_model(contents: bytes, mime: str = "image/jpeg") -> tuple[str, str]: """ Try each model in HF_VISION_MODELS in order. Returns (caption, label) from the first successful response, or ("", "") if all fail. """ if not HF_API_TOKEN: return "", "" b64 = base64.b64encode(contents).decode() data_url = f"data:{mime};base64,{b64}" headers = { "Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type" : "application/json", } for model_id in HF_VISION_MODELS: payload = { "model" : model_id, "max_tokens": 80, "messages" : [ { "role" : "system", "content": HF_SYSTEM_PROMPT, }, { "role" : "user", "content": [ {"type": "image_url", "image_url": {"url": data_url}}, {"type": "text", "text" : "Analyze this image."}, ], }, ], } try: resp = await _http_client.post( HF_CHAT_URL, json=payload, headers=headers, timeout=HF_CALL_TIMEOUT ) except Exception as exc: logger.warning("Vision model network error (model=%s): %s", model_id, exc) continue # try next model logger.info("Vision model status=%d model=%s", resp.status_code, model_id) if resp.status_code != 200: logger.warning("Vision model error (model=%s): %s", model_id, resp.text[:300]) continue # try next model # Parse the two-line response try: text = resp.json()["choices"][0]["message"]["content"].strip() logger.info("Vision model response (model=%s): %s", model_id, text) except Exception: continue caption, label = "", "" for line in text.splitlines(): line = line.strip() if line.upper().startswith("CAPTION:"): caption = line.split(":", 1)[1].strip() elif line.upper().startswith("LABEL:"): label = line.split(":", 1)[1].strip() # If model didn't follow the format, use raw text as caption if not caption and not label and text: caption = text[:120] if caption or label: return caption, label # success — stop trying return "", "" # all models failed @app.post("/analyze-image") async def analyze_image( file : UploadFile = File(...), x_api_key : str = Header(default=""), ): verify_key(x_api_key) if not HF_API_TOKEN: raise HTTPException(status_code=503, detail="AI analysis unavailable — HF token not configured.") contents = await file.read() mime = file.content_type or "image/jpeg" if mime not in ("image/jpeg", "image/png", "image/webp", "image/gif"): mime = "image/jpeg" caption, vit_label = await _analyze_with_vision_model(contents, mime) if not caption and not vit_label: raise HTTPException(status_code=503, detail="AI vision model unavailable. Please try again.") return { "caption" : caption, "vit_label": vit_label, }