fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
--- a/backend/claude_vision.py
+++ b/backend/claude_vision.py
@@ -0,0 +1,299 @@
+"""
+Claude Vision API Integration for Worksheet Analysis
+
+Uses Anthropic's Claude 3.5 Sonnet for superior OCR and layout understanding.
+"""
+
+import os
+import base64
+import json
+from pathlib import Path
+from typing import Dict, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Try to import Anthropic SDK
+try:
+    from anthropic import Anthropic
+    ANTHROPIC_AVAILABLE = True
+except ImportError:
+    ANTHROPIC_AVAILABLE = False
+    logger.warning("Anthropic SDK not installed. Run: pip install anthropic")
+
+
+def _get_anthropic_api_key() -> str:
+    """Get Anthropic API key from environment variable"""
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise RuntimeError(
+            "ANTHROPIC_API_KEY ist nicht gesetzt. "
+            "Bitte API-Schlüssel als Umgebungsvariable setzen:\n"
+            "export ANTHROPIC_API_KEY='sk-ant-api03-...'"
+        )
+    return api_key
+
+
+def _encode_image_to_base64(image_path: Path) -> tuple[str, str]:
+    """
+    Encode image to base64 for Claude API.
+
+    Returns:
+        (base64_string, media_type)
+    """
+    image_bytes = image_path.read_bytes()
+    image_b64 = base64.standard_b64encode(image_bytes).decode("utf-8")
+
+    # Determine media type from extension
+    ext = image_path.suffix.lower()
+    media_type_map = {
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.png': 'image/png',
+        '.gif': 'image/gif',
+        '.webp': 'image/webp'
+    }
+    media_type = media_type_map.get(ext, 'image/jpeg')
+
+    return image_b64, media_type
+
+
+def analyze_worksheet_with_claude(
+    image_path: Path,
+    max_tokens: int = 2500,
+    model: str = "claude-3-5-sonnet-20241022"
+) -> Dict:
+    """
+    Analyze worksheet using Claude Vision API.
+
+    Args:
+        image_path: Path to worksheet image
+        max_tokens: Maximum tokens in response (default 2500)
+        model: Claude model to use (default: Claude 3.5 Sonnet)
+
+    Returns:
+        Analysis dict with same structure as OpenAI version
+
+    Raises:
+        RuntimeError: If API key not set or SDK not installed
+        Exception: If API call fails
+    """
+    if not ANTHROPIC_AVAILABLE:
+        raise RuntimeError("Anthropic SDK nicht installiert. Run: pip install anthropic")
+
+    if not image_path.exists():
+        raise FileNotFoundError(f"Image not found: {image_path}")
+
+    # Get API key
+    api_key = _get_anthropic_api_key()
+
+    # Initialize Anthropic client
+    client = Anthropic(api_key=api_key)
+
+    # Encode image
+    image_b64, media_type = _encode_image_to_base64(image_path)
+
+    # System prompt (instructions)
+    system_prompt = """Du bist ein Experte für die Analyse von Schul-Arbeitsblättern.
+
+Deine Aufgabe ist es, das Arbeitsblatt detailliert zu analysieren und strukturierte Informationen zu extrahieren:
+
+1. **Gedruckter Text**: Erkenne den VOLLSTÄNDIGEN gedruckten Text inklusive durchgestrichener Wörter
+2. **Handschrift**: Identifiziere alle handschriftlichen Eintragungen (Schülerantworten, Korrekturen, Notizen)
+3. **Layout**: Bestimme räumliche Positionen aller Elemente (Bounding Boxes in Pixeln)
+4. **Diagramme**: Erkenne gedruckte Illustrationen, Grafiken, Diagramme
+5. **Farben**: Klassifiziere Handschrift nach Farbe (blau/schwarz/rot/Bleistift)
+
+WICHTIG: Gib deine Antwort als gültiges JSON zurück, nicht als Markdown Code Block!"""
+
+    # User prompt with JSON schema
+    user_prompt = """Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurück:
+
+{
+  "title": string | null,
+  "subject": string | null,
+  "grade_level": string | null,
+  "instructions": string | null,
+  "canonical_text": string | null,
+  "printed_blocks": [
+    {
+      "id": string,
+      "role": "title" | "instructions" | "body" | "other",
+      "text": string
+    }
+  ],
+  "layout": {
+    "page_structure": {
+      "has_diagram": boolean,
+      "orientation": "portrait" | "landscape"
+    },
+    "text_regions": [
+      {
+        "id": string,
+        "type": "title" | "paragraph" | "list" | "instruction",
+        "text": string,
+        "bounding_box": {"x": int, "y": int, "width": int, "height": int},
+        "font_characteristics": {
+          "is_bold": boolean,
+          "approximate_size": "large" | "medium" | "small"
+        }
+      }
+    ],
+    "diagram_elements": [
+      {
+        "id": string,
+        "type": "illustration" | "chart" | "graph" | "shape",
+        "description": string,
+        "bounding_box": {"x": int, "y": int, "width": int, "height": int},
+        "preserve": boolean
+      }
+    ]
+  },
+  "handwriting_regions": [
+    {
+      "id": string,
+      "text": string,
+      "type": "student_answer" | "correction" | "note" | "drawing",
+      "bounding_box": {"x": int, "y": int, "width": int, "height": int},
+      "color_hint": "blue" | "black" | "red" | "pencil" | "unknown"
+    }
+  ],
+  "handwritten_annotations": [
+    {
+      "text": string,
+      "approx_location": string
+    }
+  ],
+  "struck_through_words": [
+    {
+      "text": string,
+      "context": string
+    }
+  ],
+  "tasks": [
+    {
+      "id": string,
+      "type": "cloze" | "mcq" | "short_answer" | "math" | "other",
+      "description": string,
+      "text_with_gaps": string | null,
+      "gaps": [
+        {
+          "id": string,
+          "solution": string,
+          "position_hint": string
+        }
+      ]
+    }
+  ]
+}
+
+WICHTIGE HINWEISE:
+- "canonical_text" enthält den KORRIGIERTEN gedruckten Text OHNE Handschrift und OHNE durchgestrichene Wörter
+- "struck_through_words" enthält alle durchgestrichenen Wörter mit Kontext
+- Bounding Boxes sind ungefähre Pixel-Positionen (x, y von oben links, width/height in Pixeln)
+- "layout.text_regions" sollte alle gedruckten Textbereiche mit genauen Positionen enthalten
+- "handwriting_regions" sollte alle handschriftlichen Bereiche mit Farb-Hinweisen enthalten
+- Setze "preserve": true für Diagramm-Elemente die erhalten bleiben sollen
+- Durchgestrichene Wörter NUR in "struck_through_words", NICHT in "canonical_text"
+
+Gib NUR das JSON zurück, ohne Code-Block-Marker!"""
+
+    try:
+        logger.info(f"Calling Claude API for analysis of {image_path.name}")
+
+        # Call Claude API
+        response = client.messages.create(
+            model=model,
+            max_tokens=max_tokens,
+            system=system_prompt,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": media_type,
+                                "data": image_b64,
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": user_prompt
+                        }
+                    ],
+                }
+            ],
+        )
+
+        # Extract text from response
+        if not response.content:
+            raise RuntimeError("Empty response from Claude API")
+
+        # Get first text block
+        text_content = None
+        for block in response.content:
+            if block.type == "text":
+                text_content = block.text
+                break
+
+        if not text_content:
+            raise RuntimeError("No text content in Claude response")
+
+        logger.info(f"Received response from Claude ({len(text_content)} chars)")
+
+        # Parse JSON
+        # Claude might wrap JSON in ```json ... ```, remove if present
+        text_content = text_content.strip()
+        if text_content.startswith("```json"):
+            text_content = text_content[7:]
+        if text_content.startswith("```"):
+            text_content = text_content[3:]
+        if text_content.endswith("```"):
+            text_content = text_content[:-3]
+        text_content = text_content.strip()
+
+        try:
+            analysis_data = json.loads(text_content)
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse Claude JSON response: {e}")
+            logger.error(f"Response text: {text_content[:500]}...")
+            raise RuntimeError(f"Invalid JSON from Claude: {e}\nContent: {text_content[:200]}...") from e
+
+        logger.info("Successfully parsed Claude analysis")
+        return analysis_data
+
+    except Exception as e:
+        logger.error(f"Claude API call failed: {e}")
+        raise
+
+
+def test_claude_connection() -> bool:
+    """
+    Test if Claude API is accessible with current credentials.
+
+    Returns:
+        True if connection successful, False otherwise
+    """
+    if not ANTHROPIC_AVAILABLE:
+        logger.error("Anthropic SDK not installed")
+        return False
+
+    try:
+        api_key = _get_anthropic_api_key()
+        client = Anthropic(api_key=api_key)
+
+        # Simple test call
+        response = client.messages.create(
+            model="claude-3-5-sonnet-20241022",
+            max_tokens=10,
+            messages=[{"role": "user", "content": "Test"}]
+        )
+
+        logger.info("✅ Claude API connection successful")
+        return True
+
+    except Exception as e:
+        logger.error(f"❌ Claude API connection failed: {e}")
+        return False