fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
--- a/backend/ai_processor/generators/cloze.py
+++ b/backend/ai_processor/generators/cloze.py
@@ -0,0 +1,312 @@
+"""
+AI Processor - Cloze Text Generator
+
+Generate cloze (fill-in-the-blank) texts from worksheet analysis.
+"""
+
+from pathlib import Path
+import json
+import logging
+import os
+import requests
+
+from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
+
+logger = logging.getLogger(__name__)
+
+# Language codes to names
+LANGUAGE_NAMES = {
+    "tr": "Tuerkisch",
+    "ar": "Arabisch",
+    "ru": "Russisch",
+    "en": "Englisch",
+    "fr": "Franzoesisch",
+    "es": "Spanisch",
+    "pl": "Polnisch",
+    "uk": "Ukrainisch",
+}
+
+
+def _generate_cloze_with_openai(analysis_data: dict, target_language: str = "tr") -> dict:
+    """
+    Generate cloze texts based on worksheet analysis.
+
+    Important didactic requirements:
+    - Multiple meaningful gaps per sentence (not just one!)
+    - Difficulty level matches the original
+    - Translation with the same gaps
+
+    Args:
+        analysis_data: The analysis JSON of the worksheet
+        target_language: Target language for translation (default: "tr" for Turkish)
+
+    Returns:
+        Dict with cloze_items and metadata
+    """
+    api_key = get_openai_api_key()
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        logger.warning("Kein Textinhalt fuer Lueckentext-Generierung gefunden")
+        return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
+
+    url = "https://api.openai.com/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    system_prompt = f"""Du bist ein erfahrener Paedagoge, der Lueckentexte fuer Schueler erstellt.
+
+WICHTIGE REGELN FUER LUECKENTEXTE:
+
+1. MEHRERE LUECKEN PRO SATZ:
+   - Erstelle IMMER mehrere sinnvolle Luecken pro Satz
+   - Beispiel: "Ich habe gestern meine Hausaufgaben gemacht."
+     → Luecken: "habe" UND "gemacht" (nicht nur eine!)
+
+2. SCHWIERIGKEITSGRAD:
+   - Niveau muss exakt "{grade_level}" entsprechen
+
+3. SINNVOLLE LUECKENWOERTER:
+   - Verben (konjugiert)
+   - Wichtige Nomen
+   - Adjektive
+   - KEINE Artikel oder Praepositionen allein
+
+4. UEBERSETZUNG:
+   - Uebersetze den VOLLSTAENDIGEN Satz auf {target_lang_name}
+   - Die GLEICHEN Woerter muessen als Luecken markiert sein
+
+5. AUSGABE: Nur gueltiges JSON, kein Markdown."""
+
+    user_prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt:
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+TEXT:
+{worksheet_content}
+
+Erstelle 5-8 Saetze mit Luecken. Gib das Ergebnis als JSON zurueck:
+
+{{
+  "cloze_items": [
+    {{
+      "id": "c1",
+      "original_sentence": "Der vollstaendige Originalsatz ohne Luecken",
+      "sentence_with_gaps": "Der Satz mit ___ fuer jede Luecke",
+      "gaps": [
+        {{
+          "id": "g1",
+          "word": "das fehlende Wort",
+          "position": 0,
+          "hint": "optionaler Hinweis"
+        }}
+      ],
+      "translation": {{
+        "language": "{target_language}",
+        "language_name": "{target_lang_name}",
+        "full_sentence": "Vollstaendige Uebersetzung",
+        "sentence_with_gaps": "Uebersetzung mit ___ an gleichen Stellen"
+      }}
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "target_language": "{target_language}",
+    "total_gaps": 0
+  }}
+}}
+
+WICHTIG:
+- Jeder Satz MUSS mindestens 2 Luecken haben!
+- Position ist der Index des Wortes im Satz (0-basiert)"""
+
+    payload = {
+        "model": "gpt-4o-mini",
+        "response_format": {"type": "json_object"},
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        "max_tokens": 3000,
+        "temperature": 0.7,
+    }
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    data = response.json()
+
+    try:
+        content = data["choices"][0]["message"]["content"]
+        cloze_data = json.loads(content)
+    except (KeyError, json.JSONDecodeError) as e:
+        raise RuntimeError(f"Fehler bei Lueckentext-Generierung: {e}")
+
+    # Calculate total number of gaps
+    total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
+    if "metadata" in cloze_data:
+        cloze_data["metadata"]["total_gaps"] = total_gaps
+
+    return cloze_data
+
+
+def _generate_cloze_with_claude(analysis_data: dict, target_language: str = "tr") -> dict:
+    """Generate cloze texts with Claude API."""
+    import anthropic
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
+
+    client = anthropic.Anthropic(api_key=api_key)
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
+
+    prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt.
+
+WICHTIGE REGELN:
+1. MEHRERE LUECKEN PRO SATZ (mindestens 2!)
+   Beispiel: "Ich habe gestern Hausaufgaben gemacht" → Luecken: "habe" UND "gemacht"
+2. Schwierigkeitsgrad: exakt "{grade_level}"
+3. Uebersetzung auf {target_lang_name} mit gleichen Luecken
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+TEXT:
+{worksheet_content}
+
+Antworte NUR mit diesem JSON (5-8 Saetze):
+{{
+  "cloze_items": [
+    {{
+      "id": "c1",
+      "original_sentence": "Vollstaendiger Satz",
+      "sentence_with_gaps": "Satz mit ___ fuer Luecken",
+      "gaps": [
+        {{"id": "g1", "word": "Lueckenwort", "position": 0, "hint": "Hinweis"}}
+      ],
+      "translation": {{
+        "language": "{target_language}",
+        "language_name": "{target_lang_name}",
+        "full_sentence": "Uebersetzung",
+        "sentence_with_gaps": "Uebersetzung mit ___"
+      }}
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "target_language": "{target_language}",
+    "total_gaps": 0
+  }}
+}}"""
+
+    message = client.messages.create(
+        model="claude-3-5-sonnet-20241022",
+        max_tokens=3000,
+        messages=[{"role": "user", "content": prompt}]
+    )
+
+    content = message.content[0].text
+
+    try:
+        if "```json" in content:
+            content = content.split("```json")[1].split("```")[0]
+        elif "```" in content:
+            content = content.split("```")[1].split("```")[0]
+        cloze_data = json.loads(content.strip())
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
+
+    # Calculate total number of gaps
+    total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
+    if "metadata" in cloze_data:
+        cloze_data["metadata"]["total_gaps"] = total_gaps
+
+    return cloze_data
+
+
+def generate_cloze_from_analysis(analysis_path: Path, target_language: str = "tr") -> Path:
+    """
+    Generate cloze texts from an analysis JSON file.
+
+    The cloze texts will:
+    - Have multiple meaningful gaps per sentence
+    - Match the difficulty level of the original
+    - Include translation to target language
+
+    Args:
+        analysis_path: Path to *_analyse.json file
+        target_language: Language code for translation (default: "tr" for Turkish)
+
+    Returns:
+        Path to generated *_cloze.json file
+    """
+    if not analysis_path.exists():
+        raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
+
+    try:
+        analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
+
+    logger.info(f"Generiere Lueckentexte fuer: {analysis_path.name}")
+
+    # Generate cloze texts (use configured API)
+    if VISION_API == "claude":
+        try:
+            cloze_data = _generate_cloze_with_claude(analysis_data, target_language)
+        except Exception as e:
+            logger.warning(f"Claude Lueckentext-Generierung fehlgeschlagen, nutze OpenAI: {e}")
+            cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
+    else:
+        cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
+
+    # Save cloze data
+    out_name = analysis_path.stem.replace("_analyse", "") + "_cloze.json"
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text(json.dumps(cloze_data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    logger.info(f"Lueckentexte gespeichert: {out_path.name}")
+    return out_path