""" TrOCR Client - Connects to external TrOCR service (Mac Mini). This client forwards OCR requests to the TrOCR service running on the Mac Mini, enabling handwriting recognition without requiring local GPU/ML dependencies. Privacy: Images are sent over the local network only - no cloud. """ import os import httpx import logging from typing import Optional, List, Dict from dataclasses import dataclass logger = logging.getLogger(__name__) # Mac Mini TrOCR Service URL TROCR_SERVICE_URL = os.environ.get( "TROCR_SERVICE_URL", "http://192.168.178.163:8084" ) @dataclass class OCRResult: """Result from TrOCR extraction.""" text: str confidence: float processing_time_ms: int device: str = "remote" class TrOCRClient: """ Client for external TrOCR service. Usage: client = TrOCRClient() # Check if service is available if await client.is_available(): result = await client.extract_text(image_bytes) print(result.text) """ def __init__(self, base_url: Optional[str] = None): self.base_url = base_url or TROCR_SERVICE_URL self._client: Optional[httpx.AsyncClient] = None async def _get_client(self) -> httpx.AsyncClient: """Get or create HTTP client.""" if self._client is None or self._client.is_closed: self._client = httpx.AsyncClient( base_url=self.base_url, timeout=300.0 # 5 min timeout for model loading ) return self._client async def close(self): """Close the HTTP client.""" if self._client and not self._client.is_closed: await self._client.aclose() async def is_available(self) -> bool: """Check if TrOCR service is available.""" try: client = await self._get_client() response = await client.get("/health", timeout=5.0) return response.status_code == 200 except Exception as e: logger.warning(f"TrOCR service not available: {e}") return False async def get_status(self) -> Dict: """Get TrOCR service status.""" try: client = await self._get_client() response = await client.get("/api/v1/status") response.raise_for_status() return response.json() except Exception as e: logger.error(f"Failed to get TrOCR status: {e}") return { "status": "unavailable", "error": str(e) } async def extract_text( self, image_data: bytes, filename: str = "image.png", detect_lines: bool = True ) -> OCRResult: """ Extract text from an image using TrOCR. Args: image_data: Raw image bytes filename: Original filename detect_lines: Whether to detect individual lines Returns: OCRResult with extracted text """ try: client = await self._get_client() files = {"file": (filename, image_data, "image/png")} params = {"detect_lines": str(detect_lines).lower()} response = await client.post( "/api/v1/extract", files=files, params=params ) response.raise_for_status() data = response.json() return OCRResult( text=data.get("text", ""), confidence=data.get("confidence", 0.0), processing_time_ms=data.get("processing_time_ms", 0), device=data.get("device", "remote") ) except httpx.TimeoutException: logger.error("TrOCR request timed out (model may be loading)") raise except Exception as e: logger.error(f"TrOCR extraction failed: {e}") raise async def batch_extract( self, images: List[bytes], filenames: Optional[List[str]] = None, detect_lines: bool = True ) -> List[OCRResult]: """ Extract text from multiple images. Args: images: List of image bytes filenames: Optional list of filenames detect_lines: Whether to detect individual lines Returns: List of OCRResult """ if filenames is None: filenames = [f"image_{i}.png" for i in range(len(images))] try: client = await self._get_client() files = [ ("files", (fn, img, "image/png")) for fn, img in zip(filenames, images) ] response = await client.post( "/api/v1/batch-extract", files=files ) response.raise_for_status() data = response.json() results = [] for item in data.get("results", []): results.append(OCRResult( text=item.get("text", ""), confidence=item.get("confidence", 0.85), processing_time_ms=0, device="remote" )) return results except Exception as e: logger.error(f"TrOCR batch extraction failed: {e}") raise # Singleton instance _trocr_client: Optional[TrOCRClient] = None def get_trocr_client() -> TrOCRClient: """Get the TrOCR client singleton.""" global _trocr_client if _trocr_client is None: _trocr_client = TrOCRClient() return _trocr_client async def extract_text_from_image( image_data: bytes, filename: str = "image.png" ) -> OCRResult: """ Convenience function to extract text from an image. Args: image_data: Raw image bytes filename: Original filename Returns: OCRResult with extracted text """ client = get_trocr_client() return await client.extract_text(image_data, filename)