fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit 21a844cb8a
1986 changed files with 744143 additions and 1731 deletions
--- a/backend/klausur/services/trocr_client.py
+++ b/backend/klausur/services/trocr_client.py
@@ -0,0 +1,214 @@
+"""
+TrOCR Client - Connects to external TrOCR service (Mac Mini).
+
+This client forwards OCR requests to the TrOCR service running on
+the Mac Mini, enabling handwriting recognition without requiring
+local GPU/ML dependencies.
+
+Privacy: Images are sent over the local network only - no cloud.
+"""
+import os
+import httpx
+import logging
+from typing import Optional, List, Dict
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+# Mac Mini TrOCR Service URL
+TROCR_SERVICE_URL = os.environ.get(
+    "TROCR_SERVICE_URL",
+    "http://192.168.178.163:8084"
+)
+
+
+@dataclass
+class OCRResult:
+    """Result from TrOCR extraction."""
+    text: str
+    confidence: float
+    processing_time_ms: int
+    device: str = "remote"
+
+
+class TrOCRClient:
+    """
+    Client for external TrOCR service.
+
+    Usage:
+        client = TrOCRClient()
+
+        # Check if service is available
+        if await client.is_available():
+            result = await client.extract_text(image_bytes)
+            print(result.text)
+    """
+
+    def __init__(self, base_url: Optional[str] = None):
+        self.base_url = base_url or TROCR_SERVICE_URL
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                base_url=self.base_url,
+                timeout=300.0  # 5 min timeout for model loading
+            )
+        return self._client
+
+    async def close(self):
+        """Close the HTTP client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+
+    async def is_available(self) -> bool:
+        """Check if TrOCR service is available."""
+        try:
+            client = await self._get_client()
+            response = await client.get("/health", timeout=5.0)
+            return response.status_code == 200
+        except Exception as e:
+            logger.warning(f"TrOCR service not available: {e}")
+            return False
+
+    async def get_status(self) -> Dict:
+        """Get TrOCR service status."""
+        try:
+            client = await self._get_client()
+            response = await client.get("/api/v1/status")
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"Failed to get TrOCR status: {e}")
+            return {
+                "status": "unavailable",
+                "error": str(e)
+            }
+
+    async def extract_text(
+        self,
+        image_data: bytes,
+        filename: str = "image.png",
+        detect_lines: bool = True
+    ) -> OCRResult:
+        """
+        Extract text from an image using TrOCR.
+
+        Args:
+            image_data: Raw image bytes
+            filename: Original filename
+            detect_lines: Whether to detect individual lines
+
+        Returns:
+            OCRResult with extracted text
+        """
+        try:
+            client = await self._get_client()
+
+            files = {"file": (filename, image_data, "image/png")}
+            params = {"detect_lines": str(detect_lines).lower()}
+
+            response = await client.post(
+                "/api/v1/extract",
+                files=files,
+                params=params
+            )
+            response.raise_for_status()
+
+            data = response.json()
+
+            return OCRResult(
+                text=data.get("text", ""),
+                confidence=data.get("confidence", 0.0),
+                processing_time_ms=data.get("processing_time_ms", 0),
+                device=data.get("device", "remote")
+            )
+
+        except httpx.TimeoutException:
+            logger.error("TrOCR request timed out (model may be loading)")
+            raise
+        except Exception as e:
+            logger.error(f"TrOCR extraction failed: {e}")
+            raise
+
+    async def batch_extract(
+        self,
+        images: List[bytes],
+        filenames: Optional[List[str]] = None,
+        detect_lines: bool = True
+    ) -> List[OCRResult]:
+        """
+        Extract text from multiple images.
+
+        Args:
+            images: List of image bytes
+            filenames: Optional list of filenames
+            detect_lines: Whether to detect individual lines
+
+        Returns:
+            List of OCRResult
+        """
+        if filenames is None:
+            filenames = [f"image_{i}.png" for i in range(len(images))]
+
+        try:
+            client = await self._get_client()
+
+            files = [
+                ("files", (fn, img, "image/png"))
+                for fn, img in zip(filenames, images)
+            ]
+
+            response = await client.post(
+                "/api/v1/batch-extract",
+                files=files
+            )
+            response.raise_for_status()
+
+            data = response.json()
+            results = []
+
+            for item in data.get("results", []):
+                results.append(OCRResult(
+                    text=item.get("text", ""),
+                    confidence=item.get("confidence", 0.85),
+                    processing_time_ms=0,
+                    device="remote"
+                ))
+
+            return results
+
+        except Exception as e:
+            logger.error(f"TrOCR batch extraction failed: {e}")
+            raise
+
+
+# Singleton instance
+_trocr_client: Optional[TrOCRClient] = None
+
+
+def get_trocr_client() -> TrOCRClient:
+    """Get the TrOCR client singleton."""
+    global _trocr_client
+    if _trocr_client is None:
+        _trocr_client = TrOCRClient()
+    return _trocr_client
+
+
+async def extract_text_from_image(
+    image_data: bytes,
+    filename: str = "image.png"
+) -> OCRResult:
+    """
+    Convenience function to extract text from an image.
+
+    Args:
+        image_data: Raw image bytes
+        filename: Original filename
+
+    Returns:
+        OCRResult with extracted text
+    """
+    client = get_trocr_client()
+    return await client.extract_text(image_data, filename)