breakpilot-pwa/backend/klausur/services/trocr_client.py

"""
TrOCR Client - Connects to external TrOCR service (Mac Mini).

This client forwards OCR requests to the TrOCR service running on
the Mac Mini, enabling handwriting recognition without requiring
local GPU/ML dependencies.

Privacy: Images are sent over the local network only - no cloud.
"""
import os
import httpx
import logging
from typing import Optional, List, Dict
from dataclasses import dataclass

logger = logging.getLogger(__name__)

# Mac Mini TrOCR Service URL
TROCR_SERVICE_URL = os.environ.get(
    "TROCR_SERVICE_URL",
    "http://192.168.178.163:8084"
)


@dataclass
class OCRResult:
    """Result from TrOCR extraction."""
    text: str
    confidence: float
    processing_time_ms: int
    device: str = "remote"


class TrOCRClient:
    """
    Client for external TrOCR service.

    Usage:
        client = TrOCRClient()

        # Check if service is available
        if await client.is_available():
            result = await client.extract_text(image_bytes)
            print(result.text)
    """

    def __init__(self, base_url: Optional[str] = None):
        self.base_url = base_url or TROCR_SERVICE_URL
        self._client: Optional[httpx.AsyncClient] = None

    async def _get_client(self) -> httpx.AsyncClient:
        """Get or create HTTP client."""
        if self._client is None or self._client.is_closed:
            self._client = httpx.AsyncClient(
                base_url=self.base_url,
                timeout=300.0  # 5 min timeout for model loading
            )
        return self._client

    async def close(self):
        """Close the HTTP client."""
        if self._client and not self._client.is_closed:
            await self._client.aclose()

    async def is_available(self) -> bool:
        """Check if TrOCR service is available."""
        try:
            client = await self._get_client()
            response = await client.get("/health", timeout=5.0)
            return response.status_code == 200
        except Exception as e:
            logger.warning(f"TrOCR service not available: {e}")
            return False

    async def get_status(self) -> Dict:
        """Get TrOCR service status."""
        try:
            client = await self._get_client()
            response = await client.get("/api/v1/status")
            response.raise_for_status()
            return response.json()
        except Exception as e:
            logger.error(f"Failed to get TrOCR status: {e}")
            return {
                "status": "unavailable",
                "error": str(e)
            }

    async def extract_text(
        self,
        image_data: bytes,
        filename: str = "image.png",
        detect_lines: bool = True
    ) -> OCRResult:
        """
        Extract text from an image using TrOCR.

        Args:
            image_data: Raw image bytes
            filename: Original filename
            detect_lines: Whether to detect individual lines

        Returns:
            OCRResult with extracted text
        """
        try:
            client = await self._get_client()

            files = {"file": (filename, image_data, "image/png")}
            params = {"detect_lines": str(detect_lines).lower()}

            response = await client.post(
                "/api/v1/extract",
                files=files,
                params=params
            )
            response.raise_for_status()

            data = response.json()

            return OCRResult(
                text=data.get("text", ""),
                confidence=data.get("confidence", 0.0),
                processing_time_ms=data.get("processing_time_ms", 0),
                device=data.get("device", "remote")
            )

        except httpx.TimeoutException:
            logger.error("TrOCR request timed out (model may be loading)")
            raise
        except Exception as e:
            logger.error(f"TrOCR extraction failed: {e}")
            raise

    async def batch_extract(
        self,
        images: List[bytes],
        filenames: Optional[List[str]] = None,
        detect_lines: bool = True
    ) -> List[OCRResult]:
        """
        Extract text from multiple images.

        Args:
            images: List of image bytes
            filenames: Optional list of filenames
            detect_lines: Whether to detect individual lines

        Returns:
            List of OCRResult
        """
        if filenames is None:
            filenames = [f"image_{i}.png" for i in range(len(images))]

        try:
            client = await self._get_client()

            files = [
                ("files", (fn, img, "image/png"))
                for fn, img in zip(filenames, images)
            ]

            response = await client.post(
                "/api/v1/batch-extract",
                files=files
            )
            response.raise_for_status()

            data = response.json()
            results = []

            for item in data.get("results", []):
                results.append(OCRResult(
                    text=item.get("text", ""),
                    confidence=item.get("confidence", 0.85),
                    processing_time_ms=0,
                    device="remote"
                ))

            return results

        except Exception as e:
            logger.error(f"TrOCR batch extraction failed: {e}")
            raise


# Singleton instance
_trocr_client: Optional[TrOCRClient] = None


def get_trocr_client() -> TrOCRClient:
    """Get the TrOCR client singleton."""
    global _trocr_client
    if _trocr_client is None:
        _trocr_client = TrOCRClient()
    return _trocr_client


async def extract_text_from_image(
    image_data: bytes,
    filename: str = "image.png"
) -> OCRResult:
    """
    Convenience function to extract text from an image.

    Args:
        image_data: Raw image bytes
        filename: Original filename

    Returns:
        OCRResult with extracted text
    """
    client = get_trocr_client()
    return await client.extract_text(image_data, filename)