This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/klausur/services/trocr_client.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

215 lines
5.9 KiB
Python

"""
TrOCR Client - Connects to external TrOCR service (Mac Mini).
This client forwards OCR requests to the TrOCR service running on
the Mac Mini, enabling handwriting recognition without requiring
local GPU/ML dependencies.
Privacy: Images are sent over the local network only - no cloud.
"""
import os
import httpx
import logging
from typing import Optional, List, Dict
from dataclasses import dataclass
logger = logging.getLogger(__name__)
# Mac Mini TrOCR Service URL
TROCR_SERVICE_URL = os.environ.get(
"TROCR_SERVICE_URL",
"http://192.168.178.163:8084"
)
@dataclass
class OCRResult:
"""Result from TrOCR extraction."""
text: str
confidence: float
processing_time_ms: int
device: str = "remote"
class TrOCRClient:
"""
Client for external TrOCR service.
Usage:
client = TrOCRClient()
# Check if service is available
if await client.is_available():
result = await client.extract_text(image_bytes)
print(result.text)
"""
def __init__(self, base_url: Optional[str] = None):
self.base_url = base_url or TROCR_SERVICE_URL
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
base_url=self.base_url,
timeout=300.0 # 5 min timeout for model loading
)
return self._client
async def close(self):
"""Close the HTTP client."""
if self._client and not self._client.is_closed:
await self._client.aclose()
async def is_available(self) -> bool:
"""Check if TrOCR service is available."""
try:
client = await self._get_client()
response = await client.get("/health", timeout=5.0)
return response.status_code == 200
except Exception as e:
logger.warning(f"TrOCR service not available: {e}")
return False
async def get_status(self) -> Dict:
"""Get TrOCR service status."""
try:
client = await self._get_client()
response = await client.get("/api/v1/status")
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to get TrOCR status: {e}")
return {
"status": "unavailable",
"error": str(e)
}
async def extract_text(
self,
image_data: bytes,
filename: str = "image.png",
detect_lines: bool = True
) -> OCRResult:
"""
Extract text from an image using TrOCR.
Args:
image_data: Raw image bytes
filename: Original filename
detect_lines: Whether to detect individual lines
Returns:
OCRResult with extracted text
"""
try:
client = await self._get_client()
files = {"file": (filename, image_data, "image/png")}
params = {"detect_lines": str(detect_lines).lower()}
response = await client.post(
"/api/v1/extract",
files=files,
params=params
)
response.raise_for_status()
data = response.json()
return OCRResult(
text=data.get("text", ""),
confidence=data.get("confidence", 0.0),
processing_time_ms=data.get("processing_time_ms", 0),
device=data.get("device", "remote")
)
except httpx.TimeoutException:
logger.error("TrOCR request timed out (model may be loading)")
raise
except Exception as e:
logger.error(f"TrOCR extraction failed: {e}")
raise
async def batch_extract(
self,
images: List[bytes],
filenames: Optional[List[str]] = None,
detect_lines: bool = True
) -> List[OCRResult]:
"""
Extract text from multiple images.
Args:
images: List of image bytes
filenames: Optional list of filenames
detect_lines: Whether to detect individual lines
Returns:
List of OCRResult
"""
if filenames is None:
filenames = [f"image_{i}.png" for i in range(len(images))]
try:
client = await self._get_client()
files = [
("files", (fn, img, "image/png"))
for fn, img in zip(filenames, images)
]
response = await client.post(
"/api/v1/batch-extract",
files=files
)
response.raise_for_status()
data = response.json()
results = []
for item in data.get("results", []):
results.append(OCRResult(
text=item.get("text", ""),
confidence=item.get("confidence", 0.85),
processing_time_ms=0,
device="remote"
))
return results
except Exception as e:
logger.error(f"TrOCR batch extraction failed: {e}")
raise
# Singleton instance
_trocr_client: Optional[TrOCRClient] = None
def get_trocr_client() -> TrOCRClient:
"""Get the TrOCR client singleton."""
global _trocr_client
if _trocr_client is None:
_trocr_client = TrOCRClient()
return _trocr_client
async def extract_text_from_image(
image_data: bytes,
filename: str = "image.png"
) -> OCRResult:
"""
Convenience function to extract text from an image.
Args:
image_data: Raw image bytes
filename: Original filename
Returns:
OCRResult with extracted text
"""
client = get_trocr_client()
return await client.extract_text(image_data, filename)