fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
380
backend/generators/cloze_generator.py
Normal file
380
backend/generators/cloze_generator.py
Normal file
@@ -0,0 +1,380 @@
|
||||
"""
|
||||
Cloze Generator - Erstellt Lückentexte aus Quelltexten.
|
||||
|
||||
Generiert:
|
||||
- Lückentexte mit ausgeblendeten Schlüsselwörtern
|
||||
- Verschiedene Schwierigkeitsgrade
|
||||
- Hinweise und Erklärungen
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
from typing import List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClozeType(str, Enum):
|
||||
"""Typen von Lückentexten."""
|
||||
FILL_IN = "fill_in" # Freies Ausfüllen
|
||||
DRAG_DROP = "drag_drop" # Drag & Drop
|
||||
DROPDOWN = "dropdown" # Dropdown-Auswahl
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClozeGap:
|
||||
"""Eine Lücke im Text."""
|
||||
position: int # Position im Text (0-basiert)
|
||||
answer: str # Korrekte Antwort
|
||||
alternatives: List[str] # Alternative korrekte Antworten
|
||||
hint: Optional[str] # Hinweis
|
||||
distractors: List[str] # Falsche Optionen (für Dropdown/Drag-Drop)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClozeText:
|
||||
"""Ein kompletter Lückentext."""
|
||||
text_with_gaps: str # Text mit Platzhaltern
|
||||
original_text: str # Originaltext
|
||||
gaps: List[ClozeGap] # Liste der Lücken
|
||||
cloze_type: ClozeType # Typ des Lückentexts
|
||||
topic: Optional[str] # Thema
|
||||
difficulty: str # easy, medium, hard
|
||||
|
||||
|
||||
class ClozeGenerator:
|
||||
"""
|
||||
Generiert Lückentexte aus Quelltexten.
|
||||
|
||||
Unterstützt verschiedene Modi:
|
||||
- Automatische Erkennung wichtiger Begriffe
|
||||
- LLM-basierte intelligente Auswahl
|
||||
- Manuelle Vorgabe von Lücken
|
||||
"""
|
||||
|
||||
def __init__(self, llm_client=None):
|
||||
"""
|
||||
Initialisiert den Generator.
|
||||
|
||||
Args:
|
||||
llm_client: Optional - LLM-Client für intelligente Generierung
|
||||
"""
|
||||
self.llm_client = llm_client
|
||||
logger.info("ClozeGenerator initialized")
|
||||
|
||||
# Wortarten, die oft als Lücken geeignet sind
|
||||
self._important_pos = {"NOUN", "VERB", "ADJ"} # Substantive, Verben, Adjektive
|
||||
|
||||
def generate(
|
||||
self,
|
||||
source_text: str,
|
||||
num_gaps: int = 5,
|
||||
difficulty: str = "medium",
|
||||
cloze_type: ClozeType = ClozeType.FILL_IN,
|
||||
topic: Optional[str] = None
|
||||
) -> ClozeText:
|
||||
"""
|
||||
Generiert einen Lückentext aus einem Quelltext.
|
||||
|
||||
Args:
|
||||
source_text: Der Ausgangstext
|
||||
num_gaps: Anzahl der Lücken
|
||||
difficulty: Schwierigkeitsgrad (easy, medium, hard)
|
||||
cloze_type: Art des Lückentexts
|
||||
topic: Optionales Thema
|
||||
|
||||
Returns:
|
||||
ClozeText-Objekt
|
||||
"""
|
||||
logger.info(f"Generating cloze text with {num_gaps} gaps (difficulty: {difficulty})")
|
||||
|
||||
if not source_text or len(source_text.strip()) < 50:
|
||||
logger.warning("Source text too short")
|
||||
return self._empty_cloze(source_text, cloze_type)
|
||||
|
||||
if self.llm_client:
|
||||
return self._generate_with_llm(
|
||||
source_text, num_gaps, difficulty, cloze_type, topic
|
||||
)
|
||||
else:
|
||||
return self._generate_automatic(
|
||||
source_text, num_gaps, difficulty, cloze_type, topic
|
||||
)
|
||||
|
||||
def _generate_with_llm(
|
||||
self,
|
||||
source_text: str,
|
||||
num_gaps: int,
|
||||
difficulty: str,
|
||||
cloze_type: ClozeType,
|
||||
topic: Optional[str]
|
||||
) -> ClozeText:
|
||||
"""Generiert Lückentext mit LLM."""
|
||||
prompt = f"""
|
||||
Erstelle einen Lückentext auf Deutsch basierend auf folgendem Text.
|
||||
Ersetze {num_gaps} wichtige Begriffe durch Lücken.
|
||||
Schwierigkeitsgrad: {difficulty}
|
||||
{f'Thema: {topic}' if topic else ''}
|
||||
|
||||
Originaltext:
|
||||
{source_text}
|
||||
|
||||
Wähle {num_gaps} wichtige Begriffe (Substantive, Verben, Fachbegriffe) aus.
|
||||
Für jeden Begriff gib an:
|
||||
- Das Wort, das ausgeblendet wird
|
||||
- Alternative Schreibweisen (falls vorhanden)
|
||||
- Einen Hinweis
|
||||
- 3 ähnliche aber falsche Wörter (Distraktoren)
|
||||
|
||||
Antworte im JSON-Format:
|
||||
{{
|
||||
"gaps": [
|
||||
{{
|
||||
"word": "Photosynthese",
|
||||
"alternatives": ["Fotosynthese"],
|
||||
"hint": "Prozess bei dem Pflanzen Licht nutzen",
|
||||
"distractors": ["Zellatmung", "Osmose", "Diffusion"]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
try:
|
||||
response = self.llm_client.generate(prompt)
|
||||
data = json.loads(response)
|
||||
return self._create_cloze_from_llm(
|
||||
source_text, data, difficulty, cloze_type, topic
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating with LLM: {e}")
|
||||
return self._generate_automatic(
|
||||
source_text, num_gaps, difficulty, cloze_type, topic
|
||||
)
|
||||
|
||||
def _generate_automatic(
|
||||
self,
|
||||
source_text: str,
|
||||
num_gaps: int,
|
||||
difficulty: str,
|
||||
cloze_type: ClozeType,
|
||||
topic: Optional[str]
|
||||
) -> ClozeText:
|
||||
"""Generiert Lückentext automatisch ohne LLM."""
|
||||
# Finde wichtige Wörter
|
||||
words = self._find_important_words(source_text)
|
||||
|
||||
# Wähle Wörter basierend auf Schwierigkeit
|
||||
selected = self._select_words_by_difficulty(words, num_gaps, difficulty)
|
||||
|
||||
# Erstelle Lücken
|
||||
gaps = []
|
||||
text_with_gaps = source_text
|
||||
|
||||
for i, (word, pos) in enumerate(selected):
|
||||
# Position im aktuellen Text finden
|
||||
match = re.search(r'\b' + re.escape(word) + r'\b', text_with_gaps)
|
||||
if match:
|
||||
# Ersetze durch Platzhalter
|
||||
placeholder = f"[_{i+1}_]"
|
||||
text_with_gaps = text_with_gaps[:match.start()] + placeholder + text_with_gaps[match.end():]
|
||||
|
||||
gap = ClozeGap(
|
||||
position=i,
|
||||
answer=word,
|
||||
alternatives=[word.lower(), word.upper()],
|
||||
hint=self._generate_hint(word, source_text),
|
||||
distractors=self._generate_distractors(word, words)
|
||||
)
|
||||
gaps.append(gap)
|
||||
|
||||
return ClozeText(
|
||||
text_with_gaps=text_with_gaps,
|
||||
original_text=source_text,
|
||||
gaps=gaps,
|
||||
cloze_type=cloze_type,
|
||||
topic=topic,
|
||||
difficulty=difficulty
|
||||
)
|
||||
|
||||
def _find_important_words(self, text: str) -> List[tuple]:
|
||||
"""Findet wichtige Wörter im Text."""
|
||||
# Einfache Heuristik: Längere Wörter sind oft wichtiger
|
||||
words = re.findall(r'\b[A-Za-zäöüÄÖÜß]{4,}\b', text)
|
||||
|
||||
# Zähle Häufigkeit
|
||||
word_count = {}
|
||||
for word in words:
|
||||
word_lower = word.lower()
|
||||
word_count[word_lower] = word_count.get(word_lower, 0) + 1
|
||||
|
||||
# Sortiere nach Länge und Häufigkeit
|
||||
unique_words = list(set(words))
|
||||
scored = []
|
||||
for word in unique_words:
|
||||
score = len(word) + word_count[word.lower()] * 2
|
||||
# Bevorzuge Wörter mit Großbuchstaben (Substantive)
|
||||
if word[0].isupper():
|
||||
score += 3
|
||||
scored.append((word, score))
|
||||
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
return [(w, s) for w, s in scored]
|
||||
|
||||
def _select_words_by_difficulty(
|
||||
self,
|
||||
words: List[tuple],
|
||||
num_gaps: int,
|
||||
difficulty: str
|
||||
) -> List[tuple]:
|
||||
"""Wählt Wörter basierend auf Schwierigkeit."""
|
||||
if difficulty == "easy":
|
||||
# Einfach: Häufige, wichtige Wörter
|
||||
return words[:num_gaps]
|
||||
elif difficulty == "hard":
|
||||
# Schwer: Weniger häufige Wörter
|
||||
return words[num_gaps:num_gaps*2] if len(words) > num_gaps else words[:num_gaps]
|
||||
else:
|
||||
# Medium: Mischung
|
||||
return words[:num_gaps]
|
||||
|
||||
def _generate_hint(self, word: str, text: str) -> str:
|
||||
"""Generiert einen Hinweis für ein Wort."""
|
||||
# Einfacher Hinweis basierend auf Kontext
|
||||
sentences = text.split('.')
|
||||
for sentence in sentences:
|
||||
if word in sentence:
|
||||
# Extrahiere Kontext
|
||||
words_in_sentence = sentence.split()
|
||||
if len(words_in_sentence) > 5:
|
||||
return f"Beginnt mit '{word[0]}' ({len(word)} Buchstaben)"
|
||||
return f"Beginnt mit '{word[0]}'"
|
||||
|
||||
def _generate_distractors(self, word: str, all_words: List[tuple]) -> List[str]:
|
||||
"""Generiert Distraktoren (falsche Optionen)."""
|
||||
distractors = []
|
||||
word_len = len(word)
|
||||
|
||||
# Finde ähnlich lange Wörter
|
||||
for w, _ in all_words:
|
||||
if w.lower() != word.lower():
|
||||
if abs(len(w) - word_len) <= 2:
|
||||
distractors.append(w)
|
||||
if len(distractors) >= 3:
|
||||
break
|
||||
|
||||
# Falls nicht genug, füge generische hinzu
|
||||
while len(distractors) < 3:
|
||||
distractors.append(f"[Option {len(distractors)+1}]")
|
||||
|
||||
return distractors[:3]
|
||||
|
||||
def _create_cloze_from_llm(
|
||||
self,
|
||||
source_text: str,
|
||||
data: Dict[str, Any],
|
||||
difficulty: str,
|
||||
cloze_type: ClozeType,
|
||||
topic: Optional[str]
|
||||
) -> ClozeText:
|
||||
"""Erstellt ClozeText aus LLM-Antwort."""
|
||||
text_with_gaps = source_text
|
||||
gaps = []
|
||||
|
||||
for i, gap_data in enumerate(data.get("gaps", [])):
|
||||
word = gap_data.get("word", "")
|
||||
if word:
|
||||
# Ersetze im Text
|
||||
pattern = r'\b' + re.escape(word) + r'\b'
|
||||
placeholder = f"[_{i+1}_]"
|
||||
text_with_gaps = re.sub(pattern, placeholder, text_with_gaps, count=1)
|
||||
|
||||
gap = ClozeGap(
|
||||
position=i,
|
||||
answer=word,
|
||||
alternatives=gap_data.get("alternatives", []),
|
||||
hint=gap_data.get("hint"),
|
||||
distractors=gap_data.get("distractors", [])
|
||||
)
|
||||
gaps.append(gap)
|
||||
|
||||
return ClozeText(
|
||||
text_with_gaps=text_with_gaps,
|
||||
original_text=source_text,
|
||||
gaps=gaps,
|
||||
cloze_type=cloze_type,
|
||||
topic=topic,
|
||||
difficulty=difficulty
|
||||
)
|
||||
|
||||
def _empty_cloze(self, text: str, cloze_type: ClozeType) -> ClozeText:
|
||||
"""Erstellt leeren ClozeText bei Fehler."""
|
||||
return ClozeText(
|
||||
text_with_gaps=text,
|
||||
original_text=text,
|
||||
gaps=[],
|
||||
cloze_type=cloze_type,
|
||||
topic=None,
|
||||
difficulty="medium"
|
||||
)
|
||||
|
||||
def to_h5p_format(self, cloze: ClozeText) -> Dict[str, Any]:
|
||||
"""
|
||||
Konvertiert Lückentext ins H5P-Format.
|
||||
|
||||
Args:
|
||||
cloze: ClozeText-Objekt
|
||||
|
||||
Returns:
|
||||
H5P-kompatibles Dict
|
||||
"""
|
||||
# H5P Fill in the Blanks Format
|
||||
h5p_text = cloze.text_with_gaps
|
||||
|
||||
# Ersetze Platzhalter durch H5P-Format
|
||||
for i, gap in enumerate(cloze.gaps):
|
||||
placeholder = f"[_{i+1}_]"
|
||||
answers = [gap.answer] + gap.alternatives
|
||||
h5p_answer = "/".join(answers)
|
||||
|
||||
if cloze.cloze_type == ClozeType.DROPDOWN:
|
||||
# Mit Distraktoren
|
||||
all_options = answers + gap.distractors
|
||||
h5p_answer = "/".join(all_options)
|
||||
|
||||
h5p_text = h5p_text.replace(placeholder, f"*{h5p_answer}*")
|
||||
|
||||
return {
|
||||
"library": "H5P.Blanks",
|
||||
"params": {
|
||||
"text": h5p_text,
|
||||
"behaviour": {
|
||||
"enableRetry": True,
|
||||
"enableSolutionsButton": True,
|
||||
"caseSensitive": False,
|
||||
"showSolutionsRequiresInput": True
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def to_dict(self, cloze: ClozeText) -> Dict[str, Any]:
|
||||
"""Konvertiert ClozeText zu Dictionary-Format."""
|
||||
return {
|
||||
"text_with_gaps": cloze.text_with_gaps,
|
||||
"original_text": cloze.original_text,
|
||||
"gaps": [
|
||||
{
|
||||
"position": gap.position,
|
||||
"answer": gap.answer,
|
||||
"alternatives": gap.alternatives,
|
||||
"hint": gap.hint,
|
||||
"distractors": gap.distractors
|
||||
}
|
||||
for gap in cloze.gaps
|
||||
],
|
||||
"cloze_type": cloze.cloze_type.value,
|
||||
"topic": cloze.topic,
|
||||
"difficulty": cloze.difficulty
|
||||
}
|
||||
Reference in New Issue
Block a user