fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
--- a/backend/ai_processing/analysis.py
+++ b/backend/ai_processing/analysis.py
@@ -0,0 +1,209 @@
+"""
+AI Processing - Worksheet Analysis.
+
+Strukturierte Analyse von Arbeitsblättern mit OpenAI oder Claude.
+"""
+
+from pathlib import Path
+import json
+import requests
+import logging
+
+from .core import (
+    get_openai_api_key,
+    encode_image_to_data_url,
+    BEREINIGT_DIR,
+    get_vision_api,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def describe_scan_with_ai(input_path: Path) -> Path:
+    """Vision-Modell gibt eine kurze Beschreibung des Arbeitsblatts zurück."""
+    if not input_path.exists():
+        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
+
+    api_key = get_openai_api_key()
+    image_data_url = encode_image_to_data_url(input_path)
+
+    url = "https://api.openai.com/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    payload = {
+        "model": "gpt-4o-mini",
+        "messages": [
+            {
+                "role": "system",
+                "content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblätter knapp beschreibt.",
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": (
+                            "Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben "
+                            "(z.B. Lückentext, Multiple Choice, Rechenaufgaben) und groben Inhalt."
+                        ),
+                    },
+                    {"type": "image_url", "image_url": {"url": image_data_url}},
+                ],
+            },
+        ],
+        "max_tokens": 400,
+    }
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    data = response.json()
+
+    try:
+        description = data["choices"][0]["message"]["content"]
+    except Exception as e:
+        raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e
+
+    out_name = input_path.stem + "_beschreibung.txt"
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text(description, encoding="utf-8")
+    return out_path
+
+
+def _get_analysis_system_prompt() -> str:
+    """Gibt den System-Prompt für die Arbeitsblatt-Analyse zurück."""
+    return (
+        "Du bist ein Experte für die Analyse von Schul-Arbeitsblättern.\n\n"
+        "HAUPTAUFGABEN:\n"
+        "1. Erkenne ALLE gedruckten Elemente: Text, Überschriften, Tabellen, Linien, Kästchen, Diagramme, Illustrationen\n"
+        "2. Identifiziere ALLE handschriftlichen Ergänzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n"
+        "3. Bestimme präzise Positionen (Bounding Boxes in Pixeln) für JEDES Element\n\n"
+        "KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n"
+        "- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n"
+        "- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n"
+        "- Für JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n"
+        "- Beschrifte-Linien (von Beschriftung zu Bildteil) gehören zum Diagramm!\n\n"
+        "HANDSCHRIFT ERKENNUNG:\n"
+        "- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n"
+        "- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n"
+        "- Durchgestrichene Wörter separat auflisten\n\n"
+        "AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück (kein Markdown, keine Code-Blöcke)."
+    )
+
+
+def _analyze_with_openai(input_path: Path) -> Path:
+    """Strukturierte JSON-Analyse des Arbeitsblatts mit OpenAI."""
+    if not input_path.exists():
+        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
+
+    api_key = get_openai_api_key()
+    image_data_url = encode_image_to_data_url(input_path)
+
+    url = "https://api.openai.com/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    system_prompt = _get_analysis_system_prompt()
+
+    user_text = '''Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurück:
+
+{
+  "title": string | null,
+  "subject": string | null,
+  "grade_level": string | null,
+  "instructions": string | null,
+  "canonical_text": string,
+  "printed_blocks": [...],
+  "handwritten_annotations": [...],
+  "struck_through_words": [...],
+  "tasks": [...],
+  "has_diagram": boolean,
+  "diagram_elements": [...]
+}'''
+
+    payload = {
+        "model": "gpt-4o-mini",
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": user_text},
+                    {"type": "image_url", "image_url": {"url": image_data_url}},
+                ],
+            },
+        ],
+        "max_tokens": 2500,
+    }
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    data = response.json()
+
+    try:
+        raw_content = data["choices"][0]["message"]["content"]
+        # JSON-Block extrahieren falls in Markdown eingebettet
+        if "```json" in raw_content:
+            raw_content = raw_content.split("```json")[1].split("```")[0].strip()
+        elif "```" in raw_content:
+            raw_content = raw_content.split("```")[1].split("```")[0].strip()
+        obj = json.loads(raw_content)
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"KI hat kein valides JSON zurückgegeben: {e}\nAntwort: {raw_content}") from e
+    except Exception as e:
+        raise RuntimeError(f"Unerwartete Antwortstruktur: {e}\nAntwort: {data}") from e
+
+    out_name = input_path.stem + "_analyse.json"
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
+    return out_path
+
+
+def _analyze_with_claude(input_path: Path) -> Path:
+    """Strukturierte JSON-Analyse mit Claude Vision API."""
+    from claude_vision import analyze_worksheet_with_claude
+
+    if not input_path.exists():
+        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
+
+    logger.info(f"Analyzing with Claude Vision: {input_path.name}")
+
+    try:
+        analysis_data = analyze_worksheet_with_claude(input_path, max_tokens=2500)
+
+        out_name = input_path.stem + "_analyse.json"
+        out_path = BEREINIGT_DIR / out_name
+        out_path.write_text(
+            json.dumps(analysis_data, ensure_ascii=False, indent=2),
+            encoding="utf-8"
+        )
+
+        logger.info(f"Claude analysis saved: {out_path.name}")
+        return out_path
+
+    except Exception as e:
+        logger.error(f"Claude analysis failed: {e}")
+        raise
+
+
+def analyze_scan_structure_with_ai(input_path: Path) -> Path:
+    """
+    Strukturierte JSON-Analyse des Arbeitsblatts (Hybrid-Modus).
+
+    Verwendet die in VISION_API konfigurierte API:
+    - "claude" (Standard): Claude 3.5 Sonnet - bessere OCR, Layout-Erkennung
+    - "openai": OpenAI GPT-4o-mini - günstiger, schneller
+    """
+    vision_api = get_vision_api()
+    logger.info(f"Using Vision API: {vision_api}")
+
+    if vision_api == "claude":
+        try:
+            return _analyze_with_claude(input_path)
+        except Exception as e:
+            logger.warning(f"Claude failed, falling back to OpenAI: {e}")
+            return _analyze_with_openai(input_path)
+
+    elif vision_api == "openai":
+        return _analyze_with_openai(input_path)
+
+    else:
+        logger.warning(f"Unknown VISION_API '{vision_api}', using Claude as default")
+        return _analyze_with_claude(input_path)