A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
381 lines
12 KiB
Python
381 lines
12 KiB
Python
"""
|
|
Cloze Generator - Erstellt Lückentexte aus Quelltexten.
|
|
|
|
Generiert:
|
|
- Lückentexte mit ausgeblendeten Schlüsselwörtern
|
|
- Verschiedene Schwierigkeitsgrade
|
|
- Hinweise und Erklärungen
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
from typing import List, Dict, Any, Optional
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ClozeType(str, Enum):
|
|
"""Typen von Lückentexten."""
|
|
FILL_IN = "fill_in" # Freies Ausfüllen
|
|
DRAG_DROP = "drag_drop" # Drag & Drop
|
|
DROPDOWN = "dropdown" # Dropdown-Auswahl
|
|
|
|
|
|
@dataclass
|
|
class ClozeGap:
|
|
"""Eine Lücke im Text."""
|
|
position: int # Position im Text (0-basiert)
|
|
answer: str # Korrekte Antwort
|
|
alternatives: List[str] # Alternative korrekte Antworten
|
|
hint: Optional[str] # Hinweis
|
|
distractors: List[str] # Falsche Optionen (für Dropdown/Drag-Drop)
|
|
|
|
|
|
@dataclass
|
|
class ClozeText:
|
|
"""Ein kompletter Lückentext."""
|
|
text_with_gaps: str # Text mit Platzhaltern
|
|
original_text: str # Originaltext
|
|
gaps: List[ClozeGap] # Liste der Lücken
|
|
cloze_type: ClozeType # Typ des Lückentexts
|
|
topic: Optional[str] # Thema
|
|
difficulty: str # easy, medium, hard
|
|
|
|
|
|
class ClozeGenerator:
|
|
"""
|
|
Generiert Lückentexte aus Quelltexten.
|
|
|
|
Unterstützt verschiedene Modi:
|
|
- Automatische Erkennung wichtiger Begriffe
|
|
- LLM-basierte intelligente Auswahl
|
|
- Manuelle Vorgabe von Lücken
|
|
"""
|
|
|
|
def __init__(self, llm_client=None):
|
|
"""
|
|
Initialisiert den Generator.
|
|
|
|
Args:
|
|
llm_client: Optional - LLM-Client für intelligente Generierung
|
|
"""
|
|
self.llm_client = llm_client
|
|
logger.info("ClozeGenerator initialized")
|
|
|
|
# Wortarten, die oft als Lücken geeignet sind
|
|
self._important_pos = {"NOUN", "VERB", "ADJ"} # Substantive, Verben, Adjektive
|
|
|
|
def generate(
|
|
self,
|
|
source_text: str,
|
|
num_gaps: int = 5,
|
|
difficulty: str = "medium",
|
|
cloze_type: ClozeType = ClozeType.FILL_IN,
|
|
topic: Optional[str] = None
|
|
) -> ClozeText:
|
|
"""
|
|
Generiert einen Lückentext aus einem Quelltext.
|
|
|
|
Args:
|
|
source_text: Der Ausgangstext
|
|
num_gaps: Anzahl der Lücken
|
|
difficulty: Schwierigkeitsgrad (easy, medium, hard)
|
|
cloze_type: Art des Lückentexts
|
|
topic: Optionales Thema
|
|
|
|
Returns:
|
|
ClozeText-Objekt
|
|
"""
|
|
logger.info(f"Generating cloze text with {num_gaps} gaps (difficulty: {difficulty})")
|
|
|
|
if not source_text or len(source_text.strip()) < 50:
|
|
logger.warning("Source text too short")
|
|
return self._empty_cloze(source_text, cloze_type)
|
|
|
|
if self.llm_client:
|
|
return self._generate_with_llm(
|
|
source_text, num_gaps, difficulty, cloze_type, topic
|
|
)
|
|
else:
|
|
return self._generate_automatic(
|
|
source_text, num_gaps, difficulty, cloze_type, topic
|
|
)
|
|
|
|
def _generate_with_llm(
|
|
self,
|
|
source_text: str,
|
|
num_gaps: int,
|
|
difficulty: str,
|
|
cloze_type: ClozeType,
|
|
topic: Optional[str]
|
|
) -> ClozeText:
|
|
"""Generiert Lückentext mit LLM."""
|
|
prompt = f"""
|
|
Erstelle einen Lückentext auf Deutsch basierend auf folgendem Text.
|
|
Ersetze {num_gaps} wichtige Begriffe durch Lücken.
|
|
Schwierigkeitsgrad: {difficulty}
|
|
{f'Thema: {topic}' if topic else ''}
|
|
|
|
Originaltext:
|
|
{source_text}
|
|
|
|
Wähle {num_gaps} wichtige Begriffe (Substantive, Verben, Fachbegriffe) aus.
|
|
Für jeden Begriff gib an:
|
|
- Das Wort, das ausgeblendet wird
|
|
- Alternative Schreibweisen (falls vorhanden)
|
|
- Einen Hinweis
|
|
- 3 ähnliche aber falsche Wörter (Distraktoren)
|
|
|
|
Antworte im JSON-Format:
|
|
{{
|
|
"gaps": [
|
|
{{
|
|
"word": "Photosynthese",
|
|
"alternatives": ["Fotosynthese"],
|
|
"hint": "Prozess bei dem Pflanzen Licht nutzen",
|
|
"distractors": ["Zellatmung", "Osmose", "Diffusion"]
|
|
}}
|
|
]
|
|
}}
|
|
"""
|
|
|
|
try:
|
|
response = self.llm_client.generate(prompt)
|
|
data = json.loads(response)
|
|
return self._create_cloze_from_llm(
|
|
source_text, data, difficulty, cloze_type, topic
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error generating with LLM: {e}")
|
|
return self._generate_automatic(
|
|
source_text, num_gaps, difficulty, cloze_type, topic
|
|
)
|
|
|
|
def _generate_automatic(
|
|
self,
|
|
source_text: str,
|
|
num_gaps: int,
|
|
difficulty: str,
|
|
cloze_type: ClozeType,
|
|
topic: Optional[str]
|
|
) -> ClozeText:
|
|
"""Generiert Lückentext automatisch ohne LLM."""
|
|
# Finde wichtige Wörter
|
|
words = self._find_important_words(source_text)
|
|
|
|
# Wähle Wörter basierend auf Schwierigkeit
|
|
selected = self._select_words_by_difficulty(words, num_gaps, difficulty)
|
|
|
|
# Erstelle Lücken
|
|
gaps = []
|
|
text_with_gaps = source_text
|
|
|
|
for i, (word, pos) in enumerate(selected):
|
|
# Position im aktuellen Text finden
|
|
match = re.search(r'\b' + re.escape(word) + r'\b', text_with_gaps)
|
|
if match:
|
|
# Ersetze durch Platzhalter
|
|
placeholder = f"[_{i+1}_]"
|
|
text_with_gaps = text_with_gaps[:match.start()] + placeholder + text_with_gaps[match.end():]
|
|
|
|
gap = ClozeGap(
|
|
position=i,
|
|
answer=word,
|
|
alternatives=[word.lower(), word.upper()],
|
|
hint=self._generate_hint(word, source_text),
|
|
distractors=self._generate_distractors(word, words)
|
|
)
|
|
gaps.append(gap)
|
|
|
|
return ClozeText(
|
|
text_with_gaps=text_with_gaps,
|
|
original_text=source_text,
|
|
gaps=gaps,
|
|
cloze_type=cloze_type,
|
|
topic=topic,
|
|
difficulty=difficulty
|
|
)
|
|
|
|
def _find_important_words(self, text: str) -> List[tuple]:
|
|
"""Findet wichtige Wörter im Text."""
|
|
# Einfache Heuristik: Längere Wörter sind oft wichtiger
|
|
words = re.findall(r'\b[A-Za-zäöüÄÖÜß]{4,}\b', text)
|
|
|
|
# Zähle Häufigkeit
|
|
word_count = {}
|
|
for word in words:
|
|
word_lower = word.lower()
|
|
word_count[word_lower] = word_count.get(word_lower, 0) + 1
|
|
|
|
# Sortiere nach Länge und Häufigkeit
|
|
unique_words = list(set(words))
|
|
scored = []
|
|
for word in unique_words:
|
|
score = len(word) + word_count[word.lower()] * 2
|
|
# Bevorzuge Wörter mit Großbuchstaben (Substantive)
|
|
if word[0].isupper():
|
|
score += 3
|
|
scored.append((word, score))
|
|
|
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
return [(w, s) for w, s in scored]
|
|
|
|
def _select_words_by_difficulty(
|
|
self,
|
|
words: List[tuple],
|
|
num_gaps: int,
|
|
difficulty: str
|
|
) -> List[tuple]:
|
|
"""Wählt Wörter basierend auf Schwierigkeit."""
|
|
if difficulty == "easy":
|
|
# Einfach: Häufige, wichtige Wörter
|
|
return words[:num_gaps]
|
|
elif difficulty == "hard":
|
|
# Schwer: Weniger häufige Wörter
|
|
return words[num_gaps:num_gaps*2] if len(words) > num_gaps else words[:num_gaps]
|
|
else:
|
|
# Medium: Mischung
|
|
return words[:num_gaps]
|
|
|
|
def _generate_hint(self, word: str, text: str) -> str:
|
|
"""Generiert einen Hinweis für ein Wort."""
|
|
# Einfacher Hinweis basierend auf Kontext
|
|
sentences = text.split('.')
|
|
for sentence in sentences:
|
|
if word in sentence:
|
|
# Extrahiere Kontext
|
|
words_in_sentence = sentence.split()
|
|
if len(words_in_sentence) > 5:
|
|
return f"Beginnt mit '{word[0]}' ({len(word)} Buchstaben)"
|
|
return f"Beginnt mit '{word[0]}'"
|
|
|
|
def _generate_distractors(self, word: str, all_words: List[tuple]) -> List[str]:
|
|
"""Generiert Distraktoren (falsche Optionen)."""
|
|
distractors = []
|
|
word_len = len(word)
|
|
|
|
# Finde ähnlich lange Wörter
|
|
for w, _ in all_words:
|
|
if w.lower() != word.lower():
|
|
if abs(len(w) - word_len) <= 2:
|
|
distractors.append(w)
|
|
if len(distractors) >= 3:
|
|
break
|
|
|
|
# Falls nicht genug, füge generische hinzu
|
|
while len(distractors) < 3:
|
|
distractors.append(f"[Option {len(distractors)+1}]")
|
|
|
|
return distractors[:3]
|
|
|
|
def _create_cloze_from_llm(
|
|
self,
|
|
source_text: str,
|
|
data: Dict[str, Any],
|
|
difficulty: str,
|
|
cloze_type: ClozeType,
|
|
topic: Optional[str]
|
|
) -> ClozeText:
|
|
"""Erstellt ClozeText aus LLM-Antwort."""
|
|
text_with_gaps = source_text
|
|
gaps = []
|
|
|
|
for i, gap_data in enumerate(data.get("gaps", [])):
|
|
word = gap_data.get("word", "")
|
|
if word:
|
|
# Ersetze im Text
|
|
pattern = r'\b' + re.escape(word) + r'\b'
|
|
placeholder = f"[_{i+1}_]"
|
|
text_with_gaps = re.sub(pattern, placeholder, text_with_gaps, count=1)
|
|
|
|
gap = ClozeGap(
|
|
position=i,
|
|
answer=word,
|
|
alternatives=gap_data.get("alternatives", []),
|
|
hint=gap_data.get("hint"),
|
|
distractors=gap_data.get("distractors", [])
|
|
)
|
|
gaps.append(gap)
|
|
|
|
return ClozeText(
|
|
text_with_gaps=text_with_gaps,
|
|
original_text=source_text,
|
|
gaps=gaps,
|
|
cloze_type=cloze_type,
|
|
topic=topic,
|
|
difficulty=difficulty
|
|
)
|
|
|
|
def _empty_cloze(self, text: str, cloze_type: ClozeType) -> ClozeText:
|
|
"""Erstellt leeren ClozeText bei Fehler."""
|
|
return ClozeText(
|
|
text_with_gaps=text,
|
|
original_text=text,
|
|
gaps=[],
|
|
cloze_type=cloze_type,
|
|
topic=None,
|
|
difficulty="medium"
|
|
)
|
|
|
|
def to_h5p_format(self, cloze: ClozeText) -> Dict[str, Any]:
|
|
"""
|
|
Konvertiert Lückentext ins H5P-Format.
|
|
|
|
Args:
|
|
cloze: ClozeText-Objekt
|
|
|
|
Returns:
|
|
H5P-kompatibles Dict
|
|
"""
|
|
# H5P Fill in the Blanks Format
|
|
h5p_text = cloze.text_with_gaps
|
|
|
|
# Ersetze Platzhalter durch H5P-Format
|
|
for i, gap in enumerate(cloze.gaps):
|
|
placeholder = f"[_{i+1}_]"
|
|
answers = [gap.answer] + gap.alternatives
|
|
h5p_answer = "/".join(answers)
|
|
|
|
if cloze.cloze_type == ClozeType.DROPDOWN:
|
|
# Mit Distraktoren
|
|
all_options = answers + gap.distractors
|
|
h5p_answer = "/".join(all_options)
|
|
|
|
h5p_text = h5p_text.replace(placeholder, f"*{h5p_answer}*")
|
|
|
|
return {
|
|
"library": "H5P.Blanks",
|
|
"params": {
|
|
"text": h5p_text,
|
|
"behaviour": {
|
|
"enableRetry": True,
|
|
"enableSolutionsButton": True,
|
|
"caseSensitive": False,
|
|
"showSolutionsRequiresInput": True
|
|
}
|
|
}
|
|
}
|
|
|
|
def to_dict(self, cloze: ClozeText) -> Dict[str, Any]:
|
|
"""Konvertiert ClozeText zu Dictionary-Format."""
|
|
return {
|
|
"text_with_gaps": cloze.text_with_gaps,
|
|
"original_text": cloze.original_text,
|
|
"gaps": [
|
|
{
|
|
"position": gap.position,
|
|
"answer": gap.answer,
|
|
"alternatives": gap.alternatives,
|
|
"hint": gap.hint,
|
|
"distractors": gap.distractors
|
|
}
|
|
for gap in cloze.gaps
|
|
],
|
|
"cloze_type": cloze.cloze_type.value,
|
|
"topic": cloze.topic,
|
|
"difficulty": cloze.difficulty
|
|
}
|