This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/klausur/services/trocr_client.py
Benjamin Admin bfdaf63ba9 fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

215 lines
5.9 KiB
Python

"""
TrOCR Client - Connects to external TrOCR service (Mac Mini).
This client forwards OCR requests to the TrOCR service running on
the Mac Mini, enabling handwriting recognition without requiring
local GPU/ML dependencies.
Privacy: Images are sent over the local network only - no cloud.
"""
import os
import httpx
import logging
from typing import Optional, List, Dict
from dataclasses import dataclass
logger = logging.getLogger(__name__)
# Mac Mini TrOCR Service URL
TROCR_SERVICE_URL = os.environ.get(
"TROCR_SERVICE_URL",
"http://192.168.178.163:8084"
)
@dataclass
class OCRResult:
"""Result from TrOCR extraction."""
text: str
confidence: float
processing_time_ms: int
device: str = "remote"
class TrOCRClient:
"""
Client for external TrOCR service.
Usage:
client = TrOCRClient()
# Check if service is available
if await client.is_available():
result = await client.extract_text(image_bytes)
print(result.text)
"""
def __init__(self, base_url: Optional[str] = None):
self.base_url = base_url or TROCR_SERVICE_URL
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
base_url=self.base_url,
timeout=300.0 # 5 min timeout for model loading
)
return self._client
async def close(self):
"""Close the HTTP client."""
if self._client and not self._client.is_closed:
await self._client.aclose()
async def is_available(self) -> bool:
"""Check if TrOCR service is available."""
try:
client = await self._get_client()
response = await client.get("/health", timeout=5.0)
return response.status_code == 200
except Exception as e:
logger.warning(f"TrOCR service not available: {e}")
return False
async def get_status(self) -> Dict:
"""Get TrOCR service status."""
try:
client = await self._get_client()
response = await client.get("/api/v1/status")
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to get TrOCR status: {e}")
return {
"status": "unavailable",
"error": str(e)
}
async def extract_text(
self,
image_data: bytes,
filename: str = "image.png",
detect_lines: bool = True
) -> OCRResult:
"""
Extract text from an image using TrOCR.
Args:
image_data: Raw image bytes
filename: Original filename
detect_lines: Whether to detect individual lines
Returns:
OCRResult with extracted text
"""
try:
client = await self._get_client()
files = {"file": (filename, image_data, "image/png")}
params = {"detect_lines": str(detect_lines).lower()}
response = await client.post(
"/api/v1/extract",
files=files,
params=params
)
response.raise_for_status()
data = response.json()
return OCRResult(
text=data.get("text", ""),
confidence=data.get("confidence", 0.0),
processing_time_ms=data.get("processing_time_ms", 0),
device=data.get("device", "remote")
)
except httpx.TimeoutException:
logger.error("TrOCR request timed out (model may be loading)")
raise
except Exception as e:
logger.error(f"TrOCR extraction failed: {e}")
raise
async def batch_extract(
self,
images: List[bytes],
filenames: Optional[List[str]] = None,
detect_lines: bool = True
) -> List[OCRResult]:
"""
Extract text from multiple images.
Args:
images: List of image bytes
filenames: Optional list of filenames
detect_lines: Whether to detect individual lines
Returns:
List of OCRResult
"""
if filenames is None:
filenames = [f"image_{i}.png" for i in range(len(images))]
try:
client = await self._get_client()
files = [
("files", (fn, img, "image/png"))
for fn, img in zip(filenames, images)
]
response = await client.post(
"/api/v1/batch-extract",
files=files
)
response.raise_for_status()
data = response.json()
results = []
for item in data.get("results", []):
results.append(OCRResult(
text=item.get("text", ""),
confidence=item.get("confidence", 0.85),
processing_time_ms=0,
device="remote"
))
return results
except Exception as e:
logger.error(f"TrOCR batch extraction failed: {e}")
raise
# Singleton instance
_trocr_client: Optional[TrOCRClient] = None
def get_trocr_client() -> TrOCRClient:
"""Get the TrOCR client singleton."""
global _trocr_client
if _trocr_client is None:
_trocr_client = TrOCRClient()
return _trocr_client
async def extract_text_from_image(
image_data: bytes,
filename: str = "image.png"
) -> OCRResult:
"""
Convenience function to extract text from an image.
Args:
image_data: Raw image bytes
filename: Original filename
Returns:
OCRResult with extracted text
"""
client = get_trocr_client()
return await client.extract_text(image_data, filename)