This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

313 lines
9.5 KiB
Python

"""
AI Processor - Cloze Text Generator
Generate cloze (fill-in-the-blank) texts from worksheet analysis.
"""
from pathlib import Path
import json
import logging
import os
import requests
from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
logger = logging.getLogger(__name__)
# Language codes to names
LANGUAGE_NAMES = {
"tr": "Tuerkisch",
"ar": "Arabisch",
"ru": "Russisch",
"en": "Englisch",
"fr": "Franzoesisch",
"es": "Spanisch",
"pl": "Polnisch",
"uk": "Ukrainisch",
}
def _generate_cloze_with_openai(analysis_data: dict, target_language: str = "tr") -> dict:
"""
Generate cloze texts based on worksheet analysis.
Important didactic requirements:
- Multiple meaningful gaps per sentence (not just one!)
- Difficulty level matches the original
- Translation with the same gaps
Args:
analysis_data: The analysis JSON of the worksheet
target_language: Target language for translation (default: "tr" for Turkish)
Returns:
Dict with cloze_items and metadata
"""
api_key = get_openai_api_key()
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt fuer Lueckentext-Generierung gefunden")
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Paedagoge, der Lueckentexte fuer Schueler erstellt.
WICHTIGE REGELN FUER LUECKENTEXTE:
1. MEHRERE LUECKEN PRO SATZ:
- Erstelle IMMER mehrere sinnvolle Luecken pro Satz
- Beispiel: "Ich habe gestern meine Hausaufgaben gemacht."
→ Luecken: "habe" UND "gemacht" (nicht nur eine!)
2. SCHWIERIGKEITSGRAD:
- Niveau muss exakt "{grade_level}" entsprechen
3. SINNVOLLE LUECKENWOERTER:
- Verben (konjugiert)
- Wichtige Nomen
- Adjektive
- KEINE Artikel oder Praepositionen allein
4. UEBERSETZUNG:
- Uebersetze den VOLLSTAENDIGEN Satz auf {target_lang_name}
- Die GLEICHEN Woerter muessen als Luecken markiert sein
5. AUSGABE: Nur gueltiges JSON, kein Markdown."""
user_prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Erstelle 5-8 Saetze mit Luecken. Gib das Ergebnis als JSON zurueck:
{{
"cloze_items": [
{{
"id": "c1",
"original_sentence": "Der vollstaendige Originalsatz ohne Luecken",
"sentence_with_gaps": "Der Satz mit ___ fuer jede Luecke",
"gaps": [
{{
"id": "g1",
"word": "das fehlende Wort",
"position": 0,
"hint": "optionaler Hinweis"
}}
],
"translation": {{
"language": "{target_language}",
"language_name": "{target_lang_name}",
"full_sentence": "Vollstaendige Uebersetzung",
"sentence_with_gaps": "Uebersetzung mit ___ an gleichen Stellen"
}}
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"target_language": "{target_language}",
"total_gaps": 0
}}
}}
WICHTIG:
- Jeder Satz MUSS mindestens 2 Luecken haben!
- Position ist der Index des Wortes im Satz (0-basiert)"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 3000,
"temperature": 0.7,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
cloze_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei Lueckentext-Generierung: {e}")
# Calculate total number of gaps
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
if "metadata" in cloze_data:
cloze_data["metadata"]["total_gaps"] = total_gaps
return cloze_data
def _generate_cloze_with_claude(analysis_data: dict, target_language: str = "tr") -> dict:
"""Generate cloze texts with Claude API."""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt.
WICHTIGE REGELN:
1. MEHRERE LUECKEN PRO SATZ (mindestens 2!)
Beispiel: "Ich habe gestern Hausaufgaben gemacht" → Luecken: "habe" UND "gemacht"
2. Schwierigkeitsgrad: exakt "{grade_level}"
3. Uebersetzung auf {target_lang_name} mit gleichen Luecken
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Antworte NUR mit diesem JSON (5-8 Saetze):
{{
"cloze_items": [
{{
"id": "c1",
"original_sentence": "Vollstaendiger Satz",
"sentence_with_gaps": "Satz mit ___ fuer Luecken",
"gaps": [
{{"id": "g1", "word": "Lueckenwort", "position": 0, "hint": "Hinweis"}}
],
"translation": {{
"language": "{target_language}",
"language_name": "{target_lang_name}",
"full_sentence": "Uebersetzung",
"sentence_with_gaps": "Uebersetzung mit ___"
}}
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"target_language": "{target_language}",
"total_gaps": 0
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
try:
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
cloze_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
# Calculate total number of gaps
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
if "metadata" in cloze_data:
cloze_data["metadata"]["total_gaps"] = total_gaps
return cloze_data
def generate_cloze_from_analysis(analysis_path: Path, target_language: str = "tr") -> Path:
"""
Generate cloze texts from an analysis JSON file.
The cloze texts will:
- Have multiple meaningful gaps per sentence
- Match the difficulty level of the original
- Include translation to target language
Args:
analysis_path: Path to *_analyse.json file
target_language: Language code for translation (default: "tr" for Turkish)
Returns:
Path to generated *_cloze.json file
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
logger.info(f"Generiere Lueckentexte fuer: {analysis_path.name}")
# Generate cloze texts (use configured API)
if VISION_API == "claude":
try:
cloze_data = _generate_cloze_with_claude(analysis_data, target_language)
except Exception as e:
logger.warning(f"Claude Lueckentext-Generierung fehlgeschlagen, nutze OpenAI: {e}")
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
else:
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
# Save cloze data
out_name = analysis_path.stem.replace("_analyse", "") + "_cloze.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(cloze_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Lueckentexte gespeichert: {out_path.name}")
return out_path