"""Remote PaddleOCR client — calls Hetzner service.""" import logging import os from typing import Dict, List, Tuple import httpx logger = logging.getLogger(__name__) PADDLEOCR_REMOTE_URL = os.environ.get("PADDLEOCR_REMOTE_URL", "") PADDLEOCR_API_KEY = os.environ.get("PADDLEOCR_API_KEY", "") _TIMEOUT = 120.0 # Full-page OCR can take 30-90s on large scans async def ocr_remote_paddle( image_bytes: bytes, filename: str = "scan.png", ) -> Tuple[List[Dict], int, int]: """Send image to remote PaddleOCR, return (word_dicts, img_w, img_h).""" if not PADDLEOCR_REMOTE_URL: raise RuntimeError("PADDLEOCR_REMOTE_URL not configured") headers = {} if PADDLEOCR_API_KEY: headers["X-API-Key"] = PADDLEOCR_API_KEY async with httpx.AsyncClient(timeout=_TIMEOUT, verify=False) as client: resp = await client.post( f"{PADDLEOCR_REMOTE_URL.rstrip('/')}/ocr", files={"file": (filename, image_bytes, "image/png")}, headers=headers, ) resp.raise_for_status() data = resp.json() words = data.get("words", []) logger.info( "PaddleOCR remote returned %d words (img %dx%d)", len(words), data.get("image_width", 0), data.get("image_height", 0), ) return words, data["image_width"], data["image_height"]