fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
--- a/backend/generators/cloze_generator.py
+++ b/backend/generators/cloze_generator.py
@@ -0,0 +1,380 @@
+"""
+Cloze Generator - Erstellt Lückentexte aus Quelltexten.
+
+Generiert:
+- Lückentexte mit ausgeblendeten Schlüsselwörtern
+- Verschiedene Schwierigkeitsgrade
+- Hinweise und Erklärungen
+"""
+
+import logging
+import json
+import re
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+
+logger = logging.getLogger(__name__)
+
+
+class ClozeType(str, Enum):
+    """Typen von Lückentexten."""
+    FILL_IN = "fill_in"         # Freies Ausfüllen
+    DRAG_DROP = "drag_drop"     # Drag & Drop
+    DROPDOWN = "dropdown"       # Dropdown-Auswahl
+
+
+@dataclass
+class ClozeGap:
+    """Eine Lücke im Text."""
+    position: int           # Position im Text (0-basiert)
+    answer: str             # Korrekte Antwort
+    alternatives: List[str] # Alternative korrekte Antworten
+    hint: Optional[str]     # Hinweis
+    distractors: List[str]  # Falsche Optionen (für Dropdown/Drag-Drop)
+
+
+@dataclass
+class ClozeText:
+    """Ein kompletter Lückentext."""
+    text_with_gaps: str     # Text mit Platzhaltern
+    original_text: str      # Originaltext
+    gaps: List[ClozeGap]    # Liste der Lücken
+    cloze_type: ClozeType   # Typ des Lückentexts
+    topic: Optional[str]    # Thema
+    difficulty: str         # easy, medium, hard
+
+
+class ClozeGenerator:
+    """
+    Generiert Lückentexte aus Quelltexten.
+
+    Unterstützt verschiedene Modi:
+    - Automatische Erkennung wichtiger Begriffe
+    - LLM-basierte intelligente Auswahl
+    - Manuelle Vorgabe von Lücken
+    """
+
+    def __init__(self, llm_client=None):
+        """
+        Initialisiert den Generator.
+
+        Args:
+            llm_client: Optional - LLM-Client für intelligente Generierung
+        """
+        self.llm_client = llm_client
+        logger.info("ClozeGenerator initialized")
+
+        # Wortarten, die oft als Lücken geeignet sind
+        self._important_pos = {"NOUN", "VERB", "ADJ"}  # Substantive, Verben, Adjektive
+
+    def generate(
+        self,
+        source_text: str,
+        num_gaps: int = 5,
+        difficulty: str = "medium",
+        cloze_type: ClozeType = ClozeType.FILL_IN,
+        topic: Optional[str] = None
+    ) -> ClozeText:
+        """
+        Generiert einen Lückentext aus einem Quelltext.
+
+        Args:
+            source_text: Der Ausgangstext
+            num_gaps: Anzahl der Lücken
+            difficulty: Schwierigkeitsgrad (easy, medium, hard)
+            cloze_type: Art des Lückentexts
+            topic: Optionales Thema
+
+        Returns:
+            ClozeText-Objekt
+        """
+        logger.info(f"Generating cloze text with {num_gaps} gaps (difficulty: {difficulty})")
+
+        if not source_text or len(source_text.strip()) < 50:
+            logger.warning("Source text too short")
+            return self._empty_cloze(source_text, cloze_type)
+
+        if self.llm_client:
+            return self._generate_with_llm(
+                source_text, num_gaps, difficulty, cloze_type, topic
+            )
+        else:
+            return self._generate_automatic(
+                source_text, num_gaps, difficulty, cloze_type, topic
+            )
+
+    def _generate_with_llm(
+        self,
+        source_text: str,
+        num_gaps: int,
+        difficulty: str,
+        cloze_type: ClozeType,
+        topic: Optional[str]
+    ) -> ClozeText:
+        """Generiert Lückentext mit LLM."""
+        prompt = f"""
+Erstelle einen Lückentext auf Deutsch basierend auf folgendem Text.
+Ersetze {num_gaps} wichtige Begriffe durch Lücken.
+Schwierigkeitsgrad: {difficulty}
+{f'Thema: {topic}' if topic else ''}
+
+Originaltext:
+{source_text}
+
+Wähle {num_gaps} wichtige Begriffe (Substantive, Verben, Fachbegriffe) aus.
+Für jeden Begriff gib an:
+- Das Wort, das ausgeblendet wird
+- Alternative Schreibweisen (falls vorhanden)
+- Einen Hinweis
+- 3 ähnliche aber falsche Wörter (Distraktoren)
+
+Antworte im JSON-Format:
+{{
+  "gaps": [
+    {{
+      "word": "Photosynthese",
+      "alternatives": ["Fotosynthese"],
+      "hint": "Prozess bei dem Pflanzen Licht nutzen",
+      "distractors": ["Zellatmung", "Osmose", "Diffusion"]
+    }}
+  ]
+}}
+"""
+
+        try:
+            response = self.llm_client.generate(prompt)
+            data = json.loads(response)
+            return self._create_cloze_from_llm(
+                source_text, data, difficulty, cloze_type, topic
+            )
+        except Exception as e:
+            logger.error(f"Error generating with LLM: {e}")
+            return self._generate_automatic(
+                source_text, num_gaps, difficulty, cloze_type, topic
+            )
+
+    def _generate_automatic(
+        self,
+        source_text: str,
+        num_gaps: int,
+        difficulty: str,
+        cloze_type: ClozeType,
+        topic: Optional[str]
+    ) -> ClozeText:
+        """Generiert Lückentext automatisch ohne LLM."""
+        # Finde wichtige Wörter
+        words = self._find_important_words(source_text)
+
+        # Wähle Wörter basierend auf Schwierigkeit
+        selected = self._select_words_by_difficulty(words, num_gaps, difficulty)
+
+        # Erstelle Lücken
+        gaps = []
+        text_with_gaps = source_text
+
+        for i, (word, pos) in enumerate(selected):
+            # Position im aktuellen Text finden
+            match = re.search(r'\b' + re.escape(word) + r'\b', text_with_gaps)
+            if match:
+                # Ersetze durch Platzhalter
+                placeholder = f"[_{i+1}_]"
+                text_with_gaps = text_with_gaps[:match.start()] + placeholder + text_with_gaps[match.end():]
+
+                gap = ClozeGap(
+                    position=i,
+                    answer=word,
+                    alternatives=[word.lower(), word.upper()],
+                    hint=self._generate_hint(word, source_text),
+                    distractors=self._generate_distractors(word, words)
+                )
+                gaps.append(gap)
+
+        return ClozeText(
+            text_with_gaps=text_with_gaps,
+            original_text=source_text,
+            gaps=gaps,
+            cloze_type=cloze_type,
+            topic=topic,
+            difficulty=difficulty
+        )
+
+    def _find_important_words(self, text: str) -> List[tuple]:
+        """Findet wichtige Wörter im Text."""
+        # Einfache Heuristik: Längere Wörter sind oft wichtiger
+        words = re.findall(r'\b[A-Za-zäöüÄÖÜß]{4,}\b', text)
+
+        # Zähle Häufigkeit
+        word_count = {}
+        for word in words:
+            word_lower = word.lower()
+            word_count[word_lower] = word_count.get(word_lower, 0) + 1
+
+        # Sortiere nach Länge und Häufigkeit
+        unique_words = list(set(words))
+        scored = []
+        for word in unique_words:
+            score = len(word) + word_count[word.lower()] * 2
+            # Bevorzuge Wörter mit Großbuchstaben (Substantive)
+            if word[0].isupper():
+                score += 3
+            scored.append((word, score))
+
+        scored.sort(key=lambda x: x[1], reverse=True)
+        return [(w, s) for w, s in scored]
+
+    def _select_words_by_difficulty(
+        self,
+        words: List[tuple],
+        num_gaps: int,
+        difficulty: str
+    ) -> List[tuple]:
+        """Wählt Wörter basierend auf Schwierigkeit."""
+        if difficulty == "easy":
+            # Einfach: Häufige, wichtige Wörter
+            return words[:num_gaps]
+        elif difficulty == "hard":
+            # Schwer: Weniger häufige Wörter
+            return words[num_gaps:num_gaps*2] if len(words) > num_gaps else words[:num_gaps]
+        else:
+            # Medium: Mischung
+            return words[:num_gaps]
+
+    def _generate_hint(self, word: str, text: str) -> str:
+        """Generiert einen Hinweis für ein Wort."""
+        # Einfacher Hinweis basierend auf Kontext
+        sentences = text.split('.')
+        for sentence in sentences:
+            if word in sentence:
+                # Extrahiere Kontext
+                words_in_sentence = sentence.split()
+                if len(words_in_sentence) > 5:
+                    return f"Beginnt mit '{word[0]}' ({len(word)} Buchstaben)"
+        return f"Beginnt mit '{word[0]}'"
+
+    def _generate_distractors(self, word: str, all_words: List[tuple]) -> List[str]:
+        """Generiert Distraktoren (falsche Optionen)."""
+        distractors = []
+        word_len = len(word)
+
+        # Finde ähnlich lange Wörter
+        for w, _ in all_words:
+            if w.lower() != word.lower():
+                if abs(len(w) - word_len) <= 2:
+                    distractors.append(w)
+                    if len(distractors) >= 3:
+                        break
+
+        # Falls nicht genug, füge generische hinzu
+        while len(distractors) < 3:
+            distractors.append(f"[Option {len(distractors)+1}]")
+
+        return distractors[:3]
+
+    def _create_cloze_from_llm(
+        self,
+        source_text: str,
+        data: Dict[str, Any],
+        difficulty: str,
+        cloze_type: ClozeType,
+        topic: Optional[str]
+    ) -> ClozeText:
+        """Erstellt ClozeText aus LLM-Antwort."""
+        text_with_gaps = source_text
+        gaps = []
+
+        for i, gap_data in enumerate(data.get("gaps", [])):
+            word = gap_data.get("word", "")
+            if word:
+                # Ersetze im Text
+                pattern = r'\b' + re.escape(word) + r'\b'
+                placeholder = f"[_{i+1}_]"
+                text_with_gaps = re.sub(pattern, placeholder, text_with_gaps, count=1)
+
+                gap = ClozeGap(
+                    position=i,
+                    answer=word,
+                    alternatives=gap_data.get("alternatives", []),
+                    hint=gap_data.get("hint"),
+                    distractors=gap_data.get("distractors", [])
+                )
+                gaps.append(gap)
+
+        return ClozeText(
+            text_with_gaps=text_with_gaps,
+            original_text=source_text,
+            gaps=gaps,
+            cloze_type=cloze_type,
+            topic=topic,
+            difficulty=difficulty
+        )
+
+    def _empty_cloze(self, text: str, cloze_type: ClozeType) -> ClozeText:
+        """Erstellt leeren ClozeText bei Fehler."""
+        return ClozeText(
+            text_with_gaps=text,
+            original_text=text,
+            gaps=[],
+            cloze_type=cloze_type,
+            topic=None,
+            difficulty="medium"
+        )
+
+    def to_h5p_format(self, cloze: ClozeText) -> Dict[str, Any]:
+        """
+        Konvertiert Lückentext ins H5P-Format.
+
+        Args:
+            cloze: ClozeText-Objekt
+
+        Returns:
+            H5P-kompatibles Dict
+        """
+        # H5P Fill in the Blanks Format
+        h5p_text = cloze.text_with_gaps
+
+        # Ersetze Platzhalter durch H5P-Format
+        for i, gap in enumerate(cloze.gaps):
+            placeholder = f"[_{i+1}_]"
+            answers = [gap.answer] + gap.alternatives
+            h5p_answer = "/".join(answers)
+
+            if cloze.cloze_type == ClozeType.DROPDOWN:
+                # Mit Distraktoren
+                all_options = answers + gap.distractors
+                h5p_answer = "/".join(all_options)
+
+            h5p_text = h5p_text.replace(placeholder, f"*{h5p_answer}*")
+
+        return {
+            "library": "H5P.Blanks",
+            "params": {
+                "text": h5p_text,
+                "behaviour": {
+                    "enableRetry": True,
+                    "enableSolutionsButton": True,
+                    "caseSensitive": False,
+                    "showSolutionsRequiresInput": True
+                }
+            }
+        }
+
+    def to_dict(self, cloze: ClozeText) -> Dict[str, Any]:
+        """Konvertiert ClozeText zu Dictionary-Format."""
+        return {
+            "text_with_gaps": cloze.text_with_gaps,
+            "original_text": cloze.original_text,
+            "gaps": [
+                {
+                    "position": gap.position,
+                    "answer": gap.answer,
+                    "alternatives": gap.alternatives,
+                    "hint": gap.hint,
+                    "distractors": gap.distractors
+                }
+                for gap in cloze.gaps
+            ],
+            "cloze_type": cloze.cloze_type.value,
+            "topic": cloze.topic,
+            "difficulty": cloze.difficulty
+        }