This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/generators/cloze_generator.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

381 lines
12 KiB
Python

"""
Cloze Generator - Erstellt Lückentexte aus Quelltexten.
Generiert:
- Lückentexte mit ausgeblendeten Schlüsselwörtern
- Verschiedene Schwierigkeitsgrade
- Hinweise und Erklärungen
"""
import logging
import json
import re
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum
logger = logging.getLogger(__name__)
class ClozeType(str, Enum):
"""Typen von Lückentexten."""
FILL_IN = "fill_in" # Freies Ausfüllen
DRAG_DROP = "drag_drop" # Drag & Drop
DROPDOWN = "dropdown" # Dropdown-Auswahl
@dataclass
class ClozeGap:
"""Eine Lücke im Text."""
position: int # Position im Text (0-basiert)
answer: str # Korrekte Antwort
alternatives: List[str] # Alternative korrekte Antworten
hint: Optional[str] # Hinweis
distractors: List[str] # Falsche Optionen (für Dropdown/Drag-Drop)
@dataclass
class ClozeText:
"""Ein kompletter Lückentext."""
text_with_gaps: str # Text mit Platzhaltern
original_text: str # Originaltext
gaps: List[ClozeGap] # Liste der Lücken
cloze_type: ClozeType # Typ des Lückentexts
topic: Optional[str] # Thema
difficulty: str # easy, medium, hard
class ClozeGenerator:
"""
Generiert Lückentexte aus Quelltexten.
Unterstützt verschiedene Modi:
- Automatische Erkennung wichtiger Begriffe
- LLM-basierte intelligente Auswahl
- Manuelle Vorgabe von Lücken
"""
def __init__(self, llm_client=None):
"""
Initialisiert den Generator.
Args:
llm_client: Optional - LLM-Client für intelligente Generierung
"""
self.llm_client = llm_client
logger.info("ClozeGenerator initialized")
# Wortarten, die oft als Lücken geeignet sind
self._important_pos = {"NOUN", "VERB", "ADJ"} # Substantive, Verben, Adjektive
def generate(
self,
source_text: str,
num_gaps: int = 5,
difficulty: str = "medium",
cloze_type: ClozeType = ClozeType.FILL_IN,
topic: Optional[str] = None
) -> ClozeText:
"""
Generiert einen Lückentext aus einem Quelltext.
Args:
source_text: Der Ausgangstext
num_gaps: Anzahl der Lücken
difficulty: Schwierigkeitsgrad (easy, medium, hard)
cloze_type: Art des Lückentexts
topic: Optionales Thema
Returns:
ClozeText-Objekt
"""
logger.info(f"Generating cloze text with {num_gaps} gaps (difficulty: {difficulty})")
if not source_text or len(source_text.strip()) < 50:
logger.warning("Source text too short")
return self._empty_cloze(source_text, cloze_type)
if self.llm_client:
return self._generate_with_llm(
source_text, num_gaps, difficulty, cloze_type, topic
)
else:
return self._generate_automatic(
source_text, num_gaps, difficulty, cloze_type, topic
)
def _generate_with_llm(
self,
source_text: str,
num_gaps: int,
difficulty: str,
cloze_type: ClozeType,
topic: Optional[str]
) -> ClozeText:
"""Generiert Lückentext mit LLM."""
prompt = f"""
Erstelle einen Lückentext auf Deutsch basierend auf folgendem Text.
Ersetze {num_gaps} wichtige Begriffe durch Lücken.
Schwierigkeitsgrad: {difficulty}
{f'Thema: {topic}' if topic else ''}
Originaltext:
{source_text}
Wähle {num_gaps} wichtige Begriffe (Substantive, Verben, Fachbegriffe) aus.
Für jeden Begriff gib an:
- Das Wort, das ausgeblendet wird
- Alternative Schreibweisen (falls vorhanden)
- Einen Hinweis
- 3 ähnliche aber falsche Wörter (Distraktoren)
Antworte im JSON-Format:
{{
"gaps": [
{{
"word": "Photosynthese",
"alternatives": ["Fotosynthese"],
"hint": "Prozess bei dem Pflanzen Licht nutzen",
"distractors": ["Zellatmung", "Osmose", "Diffusion"]
}}
]
}}
"""
try:
response = self.llm_client.generate(prompt)
data = json.loads(response)
return self._create_cloze_from_llm(
source_text, data, difficulty, cloze_type, topic
)
except Exception as e:
logger.error(f"Error generating with LLM: {e}")
return self._generate_automatic(
source_text, num_gaps, difficulty, cloze_type, topic
)
def _generate_automatic(
self,
source_text: str,
num_gaps: int,
difficulty: str,
cloze_type: ClozeType,
topic: Optional[str]
) -> ClozeText:
"""Generiert Lückentext automatisch ohne LLM."""
# Finde wichtige Wörter
words = self._find_important_words(source_text)
# Wähle Wörter basierend auf Schwierigkeit
selected = self._select_words_by_difficulty(words, num_gaps, difficulty)
# Erstelle Lücken
gaps = []
text_with_gaps = source_text
for i, (word, pos) in enumerate(selected):
# Position im aktuellen Text finden
match = re.search(r'\b' + re.escape(word) + r'\b', text_with_gaps)
if match:
# Ersetze durch Platzhalter
placeholder = f"[_{i+1}_]"
text_with_gaps = text_with_gaps[:match.start()] + placeholder + text_with_gaps[match.end():]
gap = ClozeGap(
position=i,
answer=word,
alternatives=[word.lower(), word.upper()],
hint=self._generate_hint(word, source_text),
distractors=self._generate_distractors(word, words)
)
gaps.append(gap)
return ClozeText(
text_with_gaps=text_with_gaps,
original_text=source_text,
gaps=gaps,
cloze_type=cloze_type,
topic=topic,
difficulty=difficulty
)
def _find_important_words(self, text: str) -> List[tuple]:
"""Findet wichtige Wörter im Text."""
# Einfache Heuristik: Längere Wörter sind oft wichtiger
words = re.findall(r'\b[A-Za-zäöüÄÖÜß]{4,}\b', text)
# Zähle Häufigkeit
word_count = {}
for word in words:
word_lower = word.lower()
word_count[word_lower] = word_count.get(word_lower, 0) + 1
# Sortiere nach Länge und Häufigkeit
unique_words = list(set(words))
scored = []
for word in unique_words:
score = len(word) + word_count[word.lower()] * 2
# Bevorzuge Wörter mit Großbuchstaben (Substantive)
if word[0].isupper():
score += 3
scored.append((word, score))
scored.sort(key=lambda x: x[1], reverse=True)
return [(w, s) for w, s in scored]
def _select_words_by_difficulty(
self,
words: List[tuple],
num_gaps: int,
difficulty: str
) -> List[tuple]:
"""Wählt Wörter basierend auf Schwierigkeit."""
if difficulty == "easy":
# Einfach: Häufige, wichtige Wörter
return words[:num_gaps]
elif difficulty == "hard":
# Schwer: Weniger häufige Wörter
return words[num_gaps:num_gaps*2] if len(words) > num_gaps else words[:num_gaps]
else:
# Medium: Mischung
return words[:num_gaps]
def _generate_hint(self, word: str, text: str) -> str:
"""Generiert einen Hinweis für ein Wort."""
# Einfacher Hinweis basierend auf Kontext
sentences = text.split('.')
for sentence in sentences:
if word in sentence:
# Extrahiere Kontext
words_in_sentence = sentence.split()
if len(words_in_sentence) > 5:
return f"Beginnt mit '{word[0]}' ({len(word)} Buchstaben)"
return f"Beginnt mit '{word[0]}'"
def _generate_distractors(self, word: str, all_words: List[tuple]) -> List[str]:
"""Generiert Distraktoren (falsche Optionen)."""
distractors = []
word_len = len(word)
# Finde ähnlich lange Wörter
for w, _ in all_words:
if w.lower() != word.lower():
if abs(len(w) - word_len) <= 2:
distractors.append(w)
if len(distractors) >= 3:
break
# Falls nicht genug, füge generische hinzu
while len(distractors) < 3:
distractors.append(f"[Option {len(distractors)+1}]")
return distractors[:3]
def _create_cloze_from_llm(
self,
source_text: str,
data: Dict[str, Any],
difficulty: str,
cloze_type: ClozeType,
topic: Optional[str]
) -> ClozeText:
"""Erstellt ClozeText aus LLM-Antwort."""
text_with_gaps = source_text
gaps = []
for i, gap_data in enumerate(data.get("gaps", [])):
word = gap_data.get("word", "")
if word:
# Ersetze im Text
pattern = r'\b' + re.escape(word) + r'\b'
placeholder = f"[_{i+1}_]"
text_with_gaps = re.sub(pattern, placeholder, text_with_gaps, count=1)
gap = ClozeGap(
position=i,
answer=word,
alternatives=gap_data.get("alternatives", []),
hint=gap_data.get("hint"),
distractors=gap_data.get("distractors", [])
)
gaps.append(gap)
return ClozeText(
text_with_gaps=text_with_gaps,
original_text=source_text,
gaps=gaps,
cloze_type=cloze_type,
topic=topic,
difficulty=difficulty
)
def _empty_cloze(self, text: str, cloze_type: ClozeType) -> ClozeText:
"""Erstellt leeren ClozeText bei Fehler."""
return ClozeText(
text_with_gaps=text,
original_text=text,
gaps=[],
cloze_type=cloze_type,
topic=None,
difficulty="medium"
)
def to_h5p_format(self, cloze: ClozeText) -> Dict[str, Any]:
"""
Konvertiert Lückentext ins H5P-Format.
Args:
cloze: ClozeText-Objekt
Returns:
H5P-kompatibles Dict
"""
# H5P Fill in the Blanks Format
h5p_text = cloze.text_with_gaps
# Ersetze Platzhalter durch H5P-Format
for i, gap in enumerate(cloze.gaps):
placeholder = f"[_{i+1}_]"
answers = [gap.answer] + gap.alternatives
h5p_answer = "/".join(answers)
if cloze.cloze_type == ClozeType.DROPDOWN:
# Mit Distraktoren
all_options = answers + gap.distractors
h5p_answer = "/".join(all_options)
h5p_text = h5p_text.replace(placeholder, f"*{h5p_answer}*")
return {
"library": "H5P.Blanks",
"params": {
"text": h5p_text,
"behaviour": {
"enableRetry": True,
"enableSolutionsButton": True,
"caseSensitive": False,
"showSolutionsRequiresInput": True
}
}
}
def to_dict(self, cloze: ClozeText) -> Dict[str, Any]:
"""Konvertiert ClozeText zu Dictionary-Format."""
return {
"text_with_gaps": cloze.text_with_gaps,
"original_text": cloze.original_text,
"gaps": [
{
"position": gap.position,
"answer": gap.answer,
"alternatives": gap.alternatives,
"hint": gap.hint,
"distractors": gap.distractors
}
for gap in cloze.gaps
],
"cloze_type": cloze.cloze_type.value,
"topic": cloze.topic,
"difficulty": cloze.difficulty
}