fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
@@ -0,0 +1,21 @@
+"""
+AI Processor - Generators Module
+
+Content generation for multiple choice, cloze, and Q&A.
+"""
+
+from .multiple_choice import generate_mc_from_analysis
+from .cloze import generate_cloze_from_analysis
+from .qa import (
+    generate_qa_from_analysis,
+    update_leitner_progress,
+    get_next_review_items,
+)
+
+__all__ = [
+    "generate_mc_from_analysis",
+    "generate_cloze_from_analysis",
+    "generate_qa_from_analysis",
+    "update_leitner_progress",
+    "get_next_review_items",
+]
@@ -0,0 +1,312 @@
+"""
+AI Processor - Cloze Text Generator
+
+Generate cloze (fill-in-the-blank) texts from worksheet analysis.
+"""
+
+from pathlib import Path
+import json
+import logging
+import os
+import requests
+
+from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
+
+logger = logging.getLogger(__name__)
+
+# Language codes to names
+LANGUAGE_NAMES = {
+    "tr": "Tuerkisch",
+    "ar": "Arabisch",
+    "ru": "Russisch",
+    "en": "Englisch",
+    "fr": "Franzoesisch",
+    "es": "Spanisch",
+    "pl": "Polnisch",
+    "uk": "Ukrainisch",
+}
+
+
+def _generate_cloze_with_openai(analysis_data: dict, target_language: str = "tr") -> dict:
+    """
+    Generate cloze texts based on worksheet analysis.
+
+    Important didactic requirements:
+    - Multiple meaningful gaps per sentence (not just one!)
+    - Difficulty level matches the original
+    - Translation with the same gaps
+
+    Args:
+        analysis_data: The analysis JSON of the worksheet
+        target_language: Target language for translation (default: "tr" for Turkish)
+
+    Returns:
+        Dict with cloze_items and metadata
+    """
+    api_key = get_openai_api_key()
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        logger.warning("Kein Textinhalt fuer Lueckentext-Generierung gefunden")
+        return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
+
+    url = "https://api.openai.com/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    system_prompt = f"""Du bist ein erfahrener Paedagoge, der Lueckentexte fuer Schueler erstellt.
+
+WICHTIGE REGELN FUER LUECKENTEXTE:
+
+1. MEHRERE LUECKEN PRO SATZ:
+   - Erstelle IMMER mehrere sinnvolle Luecken pro Satz
+   - Beispiel: "Ich habe gestern meine Hausaufgaben gemacht."
+     → Luecken: "habe" UND "gemacht" (nicht nur eine!)
+
+2. SCHWIERIGKEITSGRAD:
+   - Niveau muss exakt "{grade_level}" entsprechen
+
+3. SINNVOLLE LUECKENWOERTER:
+   - Verben (konjugiert)
+   - Wichtige Nomen
+   - Adjektive
+   - KEINE Artikel oder Praepositionen allein
+
+4. UEBERSETZUNG:
+   - Uebersetze den VOLLSTAENDIGEN Satz auf {target_lang_name}
+   - Die GLEICHEN Woerter muessen als Luecken markiert sein
+
+5. AUSGABE: Nur gueltiges JSON, kein Markdown."""
+
+    user_prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt:
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+TEXT:
+{worksheet_content}
+
+Erstelle 5-8 Saetze mit Luecken. Gib das Ergebnis als JSON zurueck:
+
+{{
+  "cloze_items": [
+    {{
+      "id": "c1",
+      "original_sentence": "Der vollstaendige Originalsatz ohne Luecken",
+      "sentence_with_gaps": "Der Satz mit ___ fuer jede Luecke",
+      "gaps": [
+        {{
+          "id": "g1",
+          "word": "das fehlende Wort",
+          "position": 0,
+          "hint": "optionaler Hinweis"
+        }}
+      ],
+      "translation": {{
+        "language": "{target_language}",
+        "language_name": "{target_lang_name}",
+        "full_sentence": "Vollstaendige Uebersetzung",
+        "sentence_with_gaps": "Uebersetzung mit ___ an gleichen Stellen"
+      }}
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "target_language": "{target_language}",
+    "total_gaps": 0
+  }}
+}}
+
+WICHTIG:
+- Jeder Satz MUSS mindestens 2 Luecken haben!
+- Position ist der Index des Wortes im Satz (0-basiert)"""
+
+    payload = {
+        "model": "gpt-4o-mini",
+        "response_format": {"type": "json_object"},
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        "max_tokens": 3000,
+        "temperature": 0.7,
+    }
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    data = response.json()
+
+    try:
+        content = data["choices"][0]["message"]["content"]
+        cloze_data = json.loads(content)
+    except (KeyError, json.JSONDecodeError) as e:
+        raise RuntimeError(f"Fehler bei Lueckentext-Generierung: {e}")
+
+    # Calculate total number of gaps
+    total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
+    if "metadata" in cloze_data:
+        cloze_data["metadata"]["total_gaps"] = total_gaps
+
+    return cloze_data
+
+
+def _generate_cloze_with_claude(analysis_data: dict, target_language: str = "tr") -> dict:
+    """Generate cloze texts with Claude API."""
+    import anthropic
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
+
+    client = anthropic.Anthropic(api_key=api_key)
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
+
+    prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt.
+
+WICHTIGE REGELN:
+1. MEHRERE LUECKEN PRO SATZ (mindestens 2!)
+   Beispiel: "Ich habe gestern Hausaufgaben gemacht" → Luecken: "habe" UND "gemacht"
+2. Schwierigkeitsgrad: exakt "{grade_level}"
+3. Uebersetzung auf {target_lang_name} mit gleichen Luecken
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+TEXT:
+{worksheet_content}
+
+Antworte NUR mit diesem JSON (5-8 Saetze):
+{{
+  "cloze_items": [
+    {{
+      "id": "c1",
+      "original_sentence": "Vollstaendiger Satz",
+      "sentence_with_gaps": "Satz mit ___ fuer Luecken",
+      "gaps": [
+        {{"id": "g1", "word": "Lueckenwort", "position": 0, "hint": "Hinweis"}}
+      ],
+      "translation": {{
+        "language": "{target_language}",
+        "language_name": "{target_lang_name}",
+        "full_sentence": "Uebersetzung",
+        "sentence_with_gaps": "Uebersetzung mit ___"
+      }}
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "target_language": "{target_language}",
+    "total_gaps": 0
+  }}
+}}"""
+
+    message = client.messages.create(
+        model="claude-3-5-sonnet-20241022",
+        max_tokens=3000,
+        messages=[{"role": "user", "content": prompt}]
+    )
+
+    content = message.content[0].text
+
+    try:
+        if "```json" in content:
+            content = content.split("```json")[1].split("```")[0]
+        elif "```" in content:
+            content = content.split("```")[1].split("```")[0]
+        cloze_data = json.loads(content.strip())
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
+
+    # Calculate total number of gaps
+    total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
+    if "metadata" in cloze_data:
+        cloze_data["metadata"]["total_gaps"] = total_gaps
+
+    return cloze_data
+
+
+def generate_cloze_from_analysis(analysis_path: Path, target_language: str = "tr") -> Path:
+    """
+    Generate cloze texts from an analysis JSON file.
+
+    The cloze texts will:
+    - Have multiple meaningful gaps per sentence
+    - Match the difficulty level of the original
+    - Include translation to target language
+
+    Args:
+        analysis_path: Path to *_analyse.json file
+        target_language: Language code for translation (default: "tr" for Turkish)
+
+    Returns:
+        Path to generated *_cloze.json file
+    """
+    if not analysis_path.exists():
+        raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
+
+    try:
+        analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
+
+    logger.info(f"Generiere Lueckentexte fuer: {analysis_path.name}")
+
+    # Generate cloze texts (use configured API)
+    if VISION_API == "claude":
+        try:
+            cloze_data = _generate_cloze_with_claude(analysis_data, target_language)
+        except Exception as e:
+            logger.warning(f"Claude Lueckentext-Generierung fehlgeschlagen, nutze OpenAI: {e}")
+            cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
+    else:
+        cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
+
+    # Save cloze data
+    out_name = analysis_path.stem.replace("_analyse", "") + "_cloze.json"
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text(json.dumps(cloze_data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    logger.info(f"Lueckentexte gespeichert: {out_path.name}")
+    return out_path
@@ -0,0 +1,291 @@
+"""
+AI Processor - Multiple Choice Generator
+
+Generate multiple choice questions from worksheet analysis.
+"""
+
+from pathlib import Path
+import json
+import logging
+import random
+import os
+import requests
+
+from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
+
+logger = logging.getLogger(__name__)
+
+
+def _generate_mc_with_openai(analysis_data: dict, num_questions: int = 5) -> dict:
+    """
+    Generate multiple choice questions based on worksheet analysis.
+
+    Uses OpenAI GPT-4o-mini for generation.
+    Difficulty level matches the original (grade_level from analysis).
+    """
+    api_key = get_openai_api_key()
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        logger.warning("Kein Textinhalt fuer MC-Generierung gefunden")
+        return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    url = "https://api.openai.com/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    system_prompt = f"""Du bist ein erfahrener Paedagoge, der Multiple-Choice-Fragen fuer Schueler erstellt.
+
+WICHTIGE REGELN:
+1. SCHWIERIGKEITSGRAD: Die Fragen muessen exakt dem Niveau "{grade_level}" entsprechen.
+2. INHALTSTREUE: Alle Fragen muessen sich direkt auf den gegebenen Text beziehen.
+3. QUALITAET DER DISTRAKTOREN: Muessen plausibel klingen, nicht offensichtlich falsch.
+4. AUSGABEFORMAT: Gib deine Antwort AUSSCHLIESSLICH als gueltiges JSON zurueck."""
+
+    user_prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt:
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+INHALT DES ARBEITSBLATTS:
+{worksheet_content}
+
+Gib die Fragen als JSON zurueck:
+{{
+  "questions": [
+    {{
+      "id": "q1",
+      "question": "Die Fragestellung hier",
+      "options": [
+        {{"id": "a", "text": "Antwort A"}},
+        {{"id": "b", "text": "Antwort B"}},
+        {{"id": "c", "text": "Antwort C"}},
+        {{"id": "d", "text": "Antwort D"}}
+      ],
+      "correct_answer": "a",
+      "explanation": "Kurze Erklaerung warum diese Antwort richtig ist"
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "num_questions": {num_questions}
+  }}
+}}"""
+
+    payload = {
+        "model": "gpt-4o-mini",
+        "response_format": {"type": "json_object"},
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        "max_tokens": 2000,
+        "temperature": 0.7,
+    }
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    data = response.json()
+
+    try:
+        content = data["choices"][0]["message"]["content"]
+        mc_data = json.loads(content)
+    except (KeyError, json.JSONDecodeError) as e:
+        raise RuntimeError(f"Fehler bei MC-Generierung: {e}")
+
+    return mc_data
+
+
+def _generate_mc_with_claude(analysis_data: dict, num_questions: int = 5) -> dict:
+    """Generate multiple choice questions with Claude API."""
+    import anthropic
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
+
+    client = anthropic.Anthropic(api_key=api_key)
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt.
+
+WICHTIGE REGELN:
+1. SCHWIERIGKEITSGRAD: Exakt Niveau "{grade_level}" - nicht leichter, nicht schwerer
+2. INHALTSTREUE: Nur Fragen zum gegebenen Text
+3. QUALITAET: Plausible Distraktoren (falsche Antworten)
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+INHALT:
+{worksheet_content}
+
+Antworte NUR mit diesem JSON-Format:
+{{
+  "questions": [
+    {{
+      "id": "q1",
+      "question": "Fragestellung",
+      "options": [
+        {{"id": "a", "text": "Antwort A"}},
+        {{"id": "b", "text": "Antwort B"}},
+        {{"id": "c", "text": "Antwort C"}},
+        {{"id": "d", "text": "Antwort D"}}
+      ],
+      "correct_answer": "a",
+      "explanation": "Erklaerung"
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "num_questions": {num_questions}
+  }}
+}}"""
+
+    message = client.messages.create(
+        model="claude-3-5-sonnet-20241022",
+        max_tokens=2000,
+        messages=[{"role": "user", "content": prompt}]
+    )
+
+    content = message.content[0].text
+
+    try:
+        if "```json" in content:
+            content = content.split("```json")[1].split("```")[0]
+        elif "```" in content:
+            content = content.split("```")[1].split("```")[0]
+        mc_data = json.loads(content.strip())
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
+
+    return mc_data
+
+
+def _shuffle_mc_options(mc_data: dict) -> dict:
+    """
+    Shuffle the answer options for each question randomly.
+    Also updates correct_answer accordingly.
+    """
+    if "questions" not in mc_data:
+        return mc_data
+
+    for question in mc_data["questions"]:
+        options = question.get("options", [])
+        correct_id = question.get("correct_answer")
+
+        if not options or not correct_id:
+            continue
+
+        # Find the text of the correct answer
+        correct_text = None
+        for opt in options:
+            if opt.get("id") == correct_id:
+                correct_text = opt.get("text")
+                break
+
+        # Shuffle the options
+        random.shuffle(options)
+
+        # Assign new IDs and find new position of correct answer
+        new_ids = ["a", "b", "c", "d"]
+        new_correct = None
+
+        for i, opt in enumerate(options):
+            if i < len(new_ids):
+                if opt.get("text") == correct_text:
+                    new_correct = new_ids[i]
+                opt["id"] = new_ids[i]
+
+        if new_correct:
+            question["correct_answer"] = new_correct
+
+        question["options"] = options
+
+    return mc_data
+
+
+def generate_mc_from_analysis(analysis_path: Path, num_questions: int = 5) -> Path:
+    """
+    Generate multiple choice questions from an analysis JSON file.
+
+    The questions will:
+    - Be based on the extracted text
+    - Match the difficulty level of the original
+    - Have randomly arranged answers
+
+    Args:
+        analysis_path: Path to *_analyse.json file
+        num_questions: Number of questions to generate (default: 5)
+
+    Returns:
+        Path to generated *_mc.json file
+    """
+    if not analysis_path.exists():
+        raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
+
+    try:
+        analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
+
+    logger.info(f"Generiere MC-Fragen fuer: {analysis_path.name}")
+
+    # Generate MC questions (use configured API)
+    if VISION_API == "claude":
+        try:
+            mc_data = _generate_mc_with_claude(analysis_data, num_questions)
+        except Exception as e:
+            logger.warning(f"Claude MC-Generierung fehlgeschlagen, nutze OpenAI: {e}")
+            mc_data = _generate_mc_with_openai(analysis_data, num_questions)
+    else:
+        mc_data = _generate_mc_with_openai(analysis_data, num_questions)
+
+    # Shuffle answer positions
+    mc_data = _shuffle_mc_options(mc_data)
+
+    # Save MC data
+    out_name = analysis_path.stem.replace("_analyse", "") + "_mc.json"
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text(json.dumps(mc_data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    logger.info(f"MC-Fragen gespeichert: {out_path.name}")
+    return out_path
@@ -0,0 +1,458 @@
+"""
+AI Processor - Q&A Generator
+
+Generate question-answer pairs with Leitner system for spaced repetition.
+"""
+
+from pathlib import Path
+from datetime import datetime, timedelta
+import json
+import logging
+import os
+import requests
+
+from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
+
+logger = logging.getLogger(__name__)
+
+
+def _generate_qa_with_openai(analysis_data: dict, num_questions: int = 8) -> dict:
+    """
+    Generate question-answer pairs based on worksheet analysis.
+
+    Important didactic requirements:
+    - Questions based almost verbatim on the existing material
+    - Only minimal rephrasing allowed
+    - Key terms/technical terms marked as important
+    - Difficulty level matches the original (grade_level)
+
+    Args:
+        analysis_data: The analysis JSON of the worksheet
+        num_questions: Number of questions to generate (default: 8)
+
+    Returns:
+        Dict with qa_items and metadata
+    """
+    api_key = get_openai_api_key()
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+    tasks = analysis_data.get("tasks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+    for task in tasks:
+        desc = task.get("description", "").strip()
+        text = task.get("text_with_gaps", "").strip()
+        if desc:
+            content_parts.append(f"Aufgabe: {desc}")
+        if text:
+            content_parts.append(text)
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        logger.warning("Kein Textinhalt fuer Q&A-Generierung gefunden")
+        return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    url = "https://api.openai.com/v1/chat/completions"
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    system_prompt = f"""Du bist ein erfahrener Paedagoge, der Frage-Antwort-Paare fuer Schueler erstellt.
+
+WICHTIGE REGELN:
+
+1. INHALTE NUR AUS DEM TEXT:
+   - Verwende FAST WOERTLICH den vorhandenen Stoff
+   - KEINE neuen Fakten oder Inhalte einfuehren!
+   - Alles muss aus dem gegebenen Text ableitbar sein
+
+2. SCHWIERIGKEITSGRAD:
+   - Niveau muss exakt "{grade_level}" entsprechen
+
+3. SCHLUESSELWOERTER MARKIEREN:
+   - Identifiziere wichtige Fachbegriffe als "key_terms"
+
+4. FRAGETYPEN:
+   - Wissensfragen: "Was ist...?", "Nenne..."
+   - Verstaendnisfragen: "Erklaere...", "Beschreibe..."
+   - Anwendungsfragen: "Warum...?", "Was passiert, wenn...?"
+
+5. ANTWORT-FORMAT:
+   - Kurze, praezise Antworten (1-3 Saetze)
+
+6. AUSGABE: Nur gueltiges JSON, kein Markdown."""
+
+    user_prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt:
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+TEXT:
+{worksheet_content}
+
+Gib das Ergebnis als JSON zurueck:
+
+{{
+  "qa_items": [
+    {{
+      "id": "qa1",
+      "question": "Die Frage hier (fast woertlich aus dem Text)",
+      "answer": "Die korrekte Antwort (direkt aus dem Text)",
+      "question_type": "knowledge" | "understanding" | "application",
+      "key_terms": ["wichtiger Begriff 1", "wichtiger Begriff 2"],
+      "difficulty": 1-3,
+      "source_hint": "Kurzer Hinweis, wo im Text die Antwort steht",
+      "leitner_box": 0
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "total_questions": {num_questions},
+    "key_terms_summary": ["alle", "wichtigen", "Fachbegriffe", "gesammelt"]
+  }}
+}}
+
+WICHTIG:
+- Alle Antworten muessen aus dem Text ableitbar sein!
+- "leitner_box": 0 bedeutet "neu" (noch nicht gelernt)
+- "difficulty": 1=leicht, 2=mittel, 3=schwer"""
+
+    payload = {
+        "model": "gpt-4o-mini",
+        "response_format": {"type": "json_object"},
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        "max_tokens": 3000,
+        "temperature": 0.5,
+    }
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    data = response.json()
+
+    try:
+        content = data["choices"][0]["message"]["content"]
+        qa_data = json.loads(content)
+    except (KeyError, json.JSONDecodeError) as e:
+        raise RuntimeError(f"Fehler bei Q&A-Generierung: {e}")
+
+    # Initialize Leitner-Box fields for all items
+    _initialize_leitner_fields(qa_data)
+
+    return qa_data
+
+
+def _generate_qa_with_claude(analysis_data: dict, num_questions: int = 8) -> dict:
+    """Generate question-answer pairs with Claude API."""
+    import anthropic
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
+
+    client = anthropic.Anthropic(api_key=api_key)
+
+    title = analysis_data.get("title") or "Arbeitsblatt"
+    subject = analysis_data.get("subject") or "Allgemein"
+    grade_level = analysis_data.get("grade_level") or "unbekannt"
+    canonical_text = analysis_data.get("canonical_text") or ""
+    printed_blocks = analysis_data.get("printed_blocks") or []
+    tasks = analysis_data.get("tasks") or []
+
+    content_parts = []
+    if canonical_text:
+        content_parts.append(canonical_text)
+    for block in printed_blocks:
+        text = block.get("text", "").strip()
+        if text and text not in content_parts:
+            content_parts.append(text)
+    for task in tasks:
+        desc = task.get("description", "").strip()
+        if desc:
+            content_parts.append(f"Aufgabe: {desc}")
+
+    worksheet_content = "\n\n".join(content_parts)
+
+    if not worksheet_content.strip():
+        return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
+
+    prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt.
+
+WICHTIGE REGELN:
+1. Verwende FAST WOERTLICH den vorhandenen Stoff - KEINE neuen Fakten!
+2. Schwierigkeitsgrad: exakt "{grade_level}"
+3. Markiere wichtige Fachbegriffe als "key_terms"
+
+TITEL: {title}
+FACH: {subject}
+KLASSENSTUFE: {grade_level}
+
+TEXT:
+{worksheet_content}
+
+Antworte NUR mit diesem JSON:
+{{
+  "qa_items": [
+    {{
+      "id": "qa1",
+      "question": "Frage (fast woertlich aus Text)",
+      "answer": "Antwort (direkt aus Text)",
+      "question_type": "knowledge",
+      "key_terms": ["Begriff1", "Begriff2"],
+      "difficulty": 1,
+      "source_hint": "Wo im Text",
+      "leitner_box": 0
+    }}
+  ],
+  "metadata": {{
+    "subject": "{subject}",
+    "grade_level": "{grade_level}",
+    "source_title": "{title}",
+    "total_questions": {num_questions},
+    "key_terms_summary": ["alle", "Fachbegriffe"]
+  }}
+}}"""
+
+    message = client.messages.create(
+        model="claude-3-5-sonnet-20241022",
+        max_tokens=3000,
+        messages=[{"role": "user", "content": prompt}]
+    )
+
+    content = message.content[0].text
+
+    try:
+        if "```json" in content:
+            content = content.split("```json")[1].split("```")[0]
+        elif "```" in content:
+            content = content.split("```")[1].split("```")[0]
+        qa_data = json.loads(content.strip())
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
+
+    # Initialize Leitner-Box fields
+    _initialize_leitner_fields(qa_data)
+
+    return qa_data
+
+
+def _initialize_leitner_fields(qa_data: dict) -> None:
+    """Initialize Leitner-Box fields for all Q&A items."""
+    for item in qa_data.get("qa_items", []):
+        if "leitner_box" not in item:
+            item["leitner_box"] = 0
+        if "correct_count" not in item:
+            item["correct_count"] = 0
+        if "incorrect_count" not in item:
+            item["incorrect_count"] = 0
+        if "last_seen" not in item:
+            item["last_seen"] = None
+        if "next_review" not in item:
+            item["next_review"] = None
+
+
+def generate_qa_from_analysis(analysis_path: Path, num_questions: int = 8) -> Path:
+    """
+    Generate question-answer pairs from an analysis JSON file.
+
+    The Q&A pairs will:
+    - Be based almost verbatim on the original text
+    - Be prepared with Leitner-Box system for repetition
+    - Have key terms marked for reinforcement
+
+    Args:
+        analysis_path: Path to *_analyse.json file
+        num_questions: Number of questions to generate
+
+    Returns:
+        Path to generated *_qa.json file
+    """
+    if not analysis_path.exists():
+        raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
+
+    try:
+        analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
+
+    logger.info(f"Generiere Q&A-Paare fuer: {analysis_path.name}")
+
+    # Generate Q&A (use configured API)
+    if VISION_API == "claude":
+        try:
+            qa_data = _generate_qa_with_claude(analysis_data, num_questions)
+        except Exception as e:
+            logger.warning(f"Claude Q&A-Generierung fehlgeschlagen, nutze OpenAI: {e}")
+            qa_data = _generate_qa_with_openai(analysis_data, num_questions)
+    else:
+        qa_data = _generate_qa_with_openai(analysis_data, num_questions)
+
+    # Save Q&A data
+    out_name = analysis_path.stem.replace("_analyse", "") + "_qa.json"
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    logger.info(f"Q&A-Paare gespeichert: {out_path.name}")
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Leitner-Box System for Spaced Repetition
+# ---------------------------------------------------------------------------
+
+
+def update_leitner_progress(qa_path: Path, item_id: str, correct: bool) -> dict:
+    """
+    Update the learning progress of a Q&A item using the Leitner system.
+
+    Leitner Boxes:
+    - Box 0: New (not yet learned)
+    - Box 1: Learned (on error → back to Box 0)
+    - Box 2: Consolidated (on error → back to Box 1)
+
+    On correct answer: Increase box (max 2)
+    On wrong answer: Decrease box (min 0)
+
+    Args:
+        qa_path: Path to *_qa.json file
+        item_id: ID of the Q&A item
+        correct: True if answered correctly
+
+    Returns:
+        Dict with updated item and status
+    """
+    if not qa_path.exists():
+        raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
+
+    qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
+
+    # Find the item
+    item = None
+    for qa_item in qa_data.get("qa_items", []):
+        if qa_item.get("id") == item_id:
+            item = qa_item
+            break
+
+    if not item:
+        return {"status": "NOT_FOUND", "message": f"Item {item_id} nicht gefunden"}
+
+    # Update statistics
+    now = datetime.now().isoformat()
+    item["last_seen"] = now
+
+    if correct:
+        item["correct_count"] = item.get("correct_count", 0) + 1
+        # Increase box (max 2)
+        current_box = item.get("leitner_box", 0)
+        if current_box < 2:
+            item["leitner_box"] = current_box + 1
+        # Next review based on box
+        # Box 0→1: After 1 day, Box 1→2: After 3 days, Box 2: After 7 days
+        days = [1, 3, 7][item["leitner_box"]]
+        item["next_review"] = (datetime.now() + timedelta(days=days)).isoformat()
+    else:
+        item["incorrect_count"] = item.get("incorrect_count", 0) + 1
+        # Decrease box (min 0)
+        current_box = item.get("leitner_box", 0)
+        if current_box > 0:
+            item["leitner_box"] = current_box - 1
+        # On error: review soon
+        item["next_review"] = (datetime.now() + timedelta(hours=4)).isoformat()
+
+    # Save updated data
+    qa_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    box_names = ["Neu", "Gelernt", "Gefestigt"]
+    return {
+        "status": "OK",
+        "item_id": item_id,
+        "correct": correct,
+        "new_box": item["leitner_box"],
+        "box_name": box_names[item["leitner_box"]],
+        "correct_count": item["correct_count"],
+        "incorrect_count": item["incorrect_count"],
+        "next_review": item["next_review"]
+    }
+
+
+def get_next_review_items(qa_path: Path, limit: int = 5) -> list:
+    """
+    Get the next items to review.
+
+    Prioritization:
+    1. Wrongly answered items (Box 0) - more frequent
+    2. Learned items (Box 1) whose review is due
+    3. Consolidated items (Box 2) for occasional refresh
+
+    Args:
+        qa_path: Path to *_qa.json file
+        limit: Maximum number of items
+
+    Returns:
+        List of items to review (sorted by priority)
+    """
+    if not qa_path.exists():
+        return []
+
+    qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
+    items = qa_data.get("qa_items", [])
+
+    now = datetime.now()
+    review_items = []
+
+    for item in items:
+        box = item.get("leitner_box", 0)
+        next_review = item.get("next_review")
+        incorrect = item.get("incorrect_count", 0)
+
+        # Calculate priority (lower = more important)
+        priority = box * 10  # Box 0 has highest priority
+
+        # Bonus for frequently wrong answers
+        priority -= incorrect * 2
+
+        # Check if review is due
+        is_due = True
+        if next_review:
+            try:
+                review_time = datetime.fromisoformat(next_review)
+                is_due = now >= review_time
+                # Overdue items get higher priority
+                if is_due:
+                    overdue_hours = (now - review_time).total_seconds() / 3600
+                    priority -= overdue_hours
+            except (ValueError, TypeError):
+                is_due = True
+
+        # New items (Box 0) always included
+        if box == 0 or is_due:
+            review_items.append({
+                **item,
+                "_priority": priority,
+                "_is_due": is_due
+            })
+
+    # Sort by priority (lowest first)
+    review_items.sort(key=lambda x: x["_priority"])
+
+    # Remove internal fields and limit
+    result = []
+    for item in review_items[:limit]:
+        clean_item = {k: v for k, v in item.items() if not k.startswith("_")}
+        result.append(clean_item)
+
+    return result