fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
126
backend/ai_processing/__init__.py
Normal file
126
backend/ai_processing/__init__.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
AI Processing - Modul für KI-gestützte Arbeitsblatt-Verarbeitung.
|
||||
|
||||
Dieses Modul bietet:
|
||||
- Bildanalyse mit Vision APIs (OpenAI/Claude)
|
||||
- Handschrift-Entfernung aus Scans
|
||||
- Generierung von Übungsmaterialien (MC, Lückentext, Q&A)
|
||||
- Leitner-System für Spaced Repetition
|
||||
- Druck-Versionen für alle Materialtypen
|
||||
- Mindmap-Generierung für Lernposter
|
||||
|
||||
Verwendung:
|
||||
from ai_processing import analyze_scan_structure_with_ai, generate_mc_from_analysis
|
||||
|
||||
# Oder alle Funktionen:
|
||||
from ai_processing import *
|
||||
"""
|
||||
|
||||
# Core utilities
|
||||
from .core import (
|
||||
BASE_DIR,
|
||||
EINGANG_DIR,
|
||||
BEREINIGT_DIR,
|
||||
get_openai_api_key,
|
||||
get_anthropic_api_key,
|
||||
encode_image_to_data_url,
|
||||
encode_image_to_base64,
|
||||
ensure_directories,
|
||||
dummy_process_scan,
|
||||
get_vision_api,
|
||||
)
|
||||
|
||||
# Analysis functions
|
||||
from .analysis import (
|
||||
describe_scan_with_ai,
|
||||
analyze_scan_structure_with_ai,
|
||||
)
|
||||
|
||||
# HTML generation
|
||||
from .html_generator import (
|
||||
build_clean_html_from_analysis,
|
||||
)
|
||||
|
||||
# Image processing
|
||||
from .image_processor import (
|
||||
remove_handwriting_from_scan,
|
||||
)
|
||||
|
||||
# Multiple Choice generator
|
||||
from .mc_generator import (
|
||||
generate_mc_from_analysis,
|
||||
)
|
||||
|
||||
# Cloze/Lückentext generator
|
||||
from .cloze_generator import (
|
||||
generate_cloze_from_analysis,
|
||||
)
|
||||
|
||||
# Q&A generator
|
||||
from .qa_generator import (
|
||||
generate_qa_from_analysis,
|
||||
)
|
||||
|
||||
# Leitner system
|
||||
from .leitner import (
|
||||
update_leitner_progress,
|
||||
get_next_review_items,
|
||||
)
|
||||
|
||||
# Print version generators
|
||||
from .print_generator import (
|
||||
generate_print_version_qa,
|
||||
generate_print_version_cloze,
|
||||
generate_print_version_mc,
|
||||
generate_print_version_worksheet,
|
||||
)
|
||||
|
||||
# Mindmap generator
|
||||
from .mindmap import (
|
||||
generate_mindmap_data,
|
||||
generate_mindmap_html,
|
||||
save_mindmap_for_worksheet,
|
||||
)
|
||||
|
||||
# Legacy aliases for backwards compatibility
|
||||
_get_api_key = get_openai_api_key
|
||||
_encode_image_to_data_url = encode_image_to_data_url
|
||||
|
||||
__all__ = [
|
||||
# Core
|
||||
"BASE_DIR",
|
||||
"EINGANG_DIR",
|
||||
"BEREINIGT_DIR",
|
||||
"get_openai_api_key",
|
||||
"get_anthropic_api_key",
|
||||
"encode_image_to_data_url",
|
||||
"encode_image_to_base64",
|
||||
"ensure_directories",
|
||||
"dummy_process_scan",
|
||||
"get_vision_api",
|
||||
# Analysis
|
||||
"describe_scan_with_ai",
|
||||
"analyze_scan_structure_with_ai",
|
||||
# HTML
|
||||
"build_clean_html_from_analysis",
|
||||
# Image
|
||||
"remove_handwriting_from_scan",
|
||||
# MC
|
||||
"generate_mc_from_analysis",
|
||||
# Cloze
|
||||
"generate_cloze_from_analysis",
|
||||
# Q&A
|
||||
"generate_qa_from_analysis",
|
||||
# Leitner
|
||||
"update_leitner_progress",
|
||||
"get_next_review_items",
|
||||
# Print
|
||||
"generate_print_version_qa",
|
||||
"generate_print_version_cloze",
|
||||
"generate_print_version_mc",
|
||||
"generate_print_version_worksheet",
|
||||
# Mindmap
|
||||
"generate_mindmap_data",
|
||||
"generate_mindmap_html",
|
||||
"save_mindmap_for_worksheet",
|
||||
]
|
||||
209
backend/ai_processing/analysis.py
Normal file
209
backend/ai_processing/analysis.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
AI Processing - Worksheet Analysis.
|
||||
|
||||
Strukturierte Analyse von Arbeitsblättern mit OpenAI oder Claude.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
get_openai_api_key,
|
||||
encode_image_to_data_url,
|
||||
BEREINIGT_DIR,
|
||||
get_vision_api,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def describe_scan_with_ai(input_path: Path) -> Path:
|
||||
"""Vision-Modell gibt eine kurze Beschreibung des Arbeitsblatts zurück."""
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
api_key = get_openai_api_key()
|
||||
image_data_url = encode_image_to_data_url(input_path)
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblätter knapp beschreibt.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": (
|
||||
"Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben "
|
||||
"(z.B. Lückentext, Multiple Choice, Rechenaufgaben) und groben Inhalt."
|
||||
),
|
||||
},
|
||||
{"type": "image_url", "image_url": {"url": image_data_url}},
|
||||
],
|
||||
},
|
||||
],
|
||||
"max_tokens": 400,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
description = data["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e
|
||||
|
||||
out_name = input_path.stem + "_beschreibung.txt"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(description, encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def _get_analysis_system_prompt() -> str:
|
||||
"""Gibt den System-Prompt für die Arbeitsblatt-Analyse zurück."""
|
||||
return (
|
||||
"Du bist ein Experte für die Analyse von Schul-Arbeitsblättern.\n\n"
|
||||
"HAUPTAUFGABEN:\n"
|
||||
"1. Erkenne ALLE gedruckten Elemente: Text, Überschriften, Tabellen, Linien, Kästchen, Diagramme, Illustrationen\n"
|
||||
"2. Identifiziere ALLE handschriftlichen Ergänzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n"
|
||||
"3. Bestimme präzise Positionen (Bounding Boxes in Pixeln) für JEDES Element\n\n"
|
||||
"KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n"
|
||||
"- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n"
|
||||
"- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n"
|
||||
"- Für JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n"
|
||||
"- Beschrifte-Linien (von Beschriftung zu Bildteil) gehören zum Diagramm!\n\n"
|
||||
"HANDSCHRIFT ERKENNUNG:\n"
|
||||
"- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n"
|
||||
"- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n"
|
||||
"- Durchgestrichene Wörter separat auflisten\n\n"
|
||||
"AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück (kein Markdown, keine Code-Blöcke)."
|
||||
)
|
||||
|
||||
|
||||
def _analyze_with_openai(input_path: Path) -> Path:
|
||||
"""Strukturierte JSON-Analyse des Arbeitsblatts mit OpenAI."""
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
api_key = get_openai_api_key()
|
||||
image_data_url = encode_image_to_data_url(input_path)
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
|
||||
system_prompt = _get_analysis_system_prompt()
|
||||
|
||||
user_text = '''Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurück:
|
||||
|
||||
{
|
||||
"title": string | null,
|
||||
"subject": string | null,
|
||||
"grade_level": string | null,
|
||||
"instructions": string | null,
|
||||
"canonical_text": string,
|
||||
"printed_blocks": [...],
|
||||
"handwritten_annotations": [...],
|
||||
"struck_through_words": [...],
|
||||
"tasks": [...],
|
||||
"has_diagram": boolean,
|
||||
"diagram_elements": [...]
|
||||
}'''
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": user_text},
|
||||
{"type": "image_url", "image_url": {"url": image_data_url}},
|
||||
],
|
||||
},
|
||||
],
|
||||
"max_tokens": 2500,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
raw_content = data["choices"][0]["message"]["content"]
|
||||
# JSON-Block extrahieren falls in Markdown eingebettet
|
||||
if "```json" in raw_content:
|
||||
raw_content = raw_content.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in raw_content:
|
||||
raw_content = raw_content.split("```")[1].split("```")[0].strip()
|
||||
obj = json.loads(raw_content)
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"KI hat kein valides JSON zurückgegeben: {e}\nAntwort: {raw_content}") from e
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unerwartete Antwortstruktur: {e}\nAntwort: {data}") from e
|
||||
|
||||
out_name = input_path.stem + "_analyse.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def _analyze_with_claude(input_path: Path) -> Path:
|
||||
"""Strukturierte JSON-Analyse mit Claude Vision API."""
|
||||
from claude_vision import analyze_worksheet_with_claude
|
||||
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
logger.info(f"Analyzing with Claude Vision: {input_path.name}")
|
||||
|
||||
try:
|
||||
analysis_data = analyze_worksheet_with_claude(input_path, max_tokens=2500)
|
||||
|
||||
out_name = input_path.stem + "_analyse.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(
|
||||
json.dumps(analysis_data, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8"
|
||||
)
|
||||
|
||||
logger.info(f"Claude analysis saved: {out_path.name}")
|
||||
return out_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Claude analysis failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def analyze_scan_structure_with_ai(input_path: Path) -> Path:
|
||||
"""
|
||||
Strukturierte JSON-Analyse des Arbeitsblatts (Hybrid-Modus).
|
||||
|
||||
Verwendet die in VISION_API konfigurierte API:
|
||||
- "claude" (Standard): Claude 3.5 Sonnet - bessere OCR, Layout-Erkennung
|
||||
- "openai": OpenAI GPT-4o-mini - günstiger, schneller
|
||||
"""
|
||||
vision_api = get_vision_api()
|
||||
logger.info(f"Using Vision API: {vision_api}")
|
||||
|
||||
if vision_api == "claude":
|
||||
try:
|
||||
return _analyze_with_claude(input_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Claude failed, falling back to OpenAI: {e}")
|
||||
return _analyze_with_openai(input_path)
|
||||
|
||||
elif vision_api == "openai":
|
||||
return _analyze_with_openai(input_path)
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown VISION_API '{vision_api}', using Claude as default")
|
||||
return _analyze_with_claude(input_path)
|
||||
328
backend/ai_processing/cloze_generator.py
Normal file
328
backend/ai_processing/cloze_generator.py
Normal file
@@ -0,0 +1,328 @@
|
||||
"""
|
||||
AI Processing - Cloze/Lückentext Generator.
|
||||
|
||||
Generiert Lückentexte mit Übersetzungen aus Arbeitsblatt-Analysen.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
get_openai_api_key,
|
||||
get_vision_api,
|
||||
BEREINIGT_DIR,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Sprachcodes zu Namen
|
||||
LANGUAGE_NAMES = {
|
||||
"tr": "Türkisch",
|
||||
"ar": "Arabisch",
|
||||
"ru": "Russisch",
|
||||
"en": "Englisch",
|
||||
"fr": "Französisch",
|
||||
"es": "Spanisch",
|
||||
"pl": "Polnisch",
|
||||
"uk": "Ukrainisch",
|
||||
}
|
||||
|
||||
|
||||
def _generate_cloze_with_openai(analysis_data: dict, target_language: str = "tr") -> dict:
|
||||
"""
|
||||
Generiert Lückentexte basierend auf der Arbeitsblatt-Analyse.
|
||||
|
||||
Wichtige didaktische Anforderungen:
|
||||
- Mehrere sinnvolle Lücken pro Satz (nicht nur eine!)
|
||||
- Schwierigkeitsgrad entspricht dem Original
|
||||
- Übersetzung mit denselben Lücken
|
||||
|
||||
Args:
|
||||
analysis_data: Die Analyse-JSON des Arbeitsblatts
|
||||
target_language: Zielsprache für Übersetzung (default: "tr" für Türkisch)
|
||||
|
||||
Returns:
|
||||
Dict mit cloze_items und metadata
|
||||
"""
|
||||
api_key = get_openai_api_key()
|
||||
|
||||
# Extrahiere relevante Inhalte
|
||||
title = analysis_data.get("title") or "Arbeitsblatt"
|
||||
subject = analysis_data.get("subject") or "Allgemein"
|
||||
grade_level = analysis_data.get("grade_level") or "unbekannt"
|
||||
canonical_text = analysis_data.get("canonical_text") or ""
|
||||
printed_blocks = analysis_data.get("printed_blocks") or []
|
||||
|
||||
# Baue Textinhalt zusammen
|
||||
content_parts = []
|
||||
if canonical_text:
|
||||
content_parts.append(canonical_text)
|
||||
for block in printed_blocks:
|
||||
text = block.get("text", "").strip()
|
||||
if text and text not in content_parts:
|
||||
content_parts.append(text)
|
||||
|
||||
worksheet_content = "\n\n".join(content_parts)
|
||||
|
||||
if not worksheet_content.strip():
|
||||
logger.warning("Kein Textinhalt für Lückentext-Generierung gefunden")
|
||||
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
|
||||
|
||||
target_lang_name = LANGUAGE_NAMES.get(target_language, "Türkisch")
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
|
||||
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Lückentexte für Schüler erstellt.
|
||||
|
||||
WICHTIGE REGELN FÜR LÜCKENTEXTE:
|
||||
|
||||
1. MEHRERE LÜCKEN PRO SATZ:
|
||||
- Erstelle IMMER mehrere sinnvolle Lücken pro Satz
|
||||
- Beispiel: "Ich habe gestern meine Hausaufgaben gemacht."
|
||||
→ Lücken: "habe" UND "gemacht" (nicht nur eine!)
|
||||
- Wähle Wörter, die für das Verständnis wichtig sind
|
||||
|
||||
2. SCHWIERIGKEITSGRAD:
|
||||
- Niveau muss exakt "{grade_level}" entsprechen
|
||||
- Nicht zu leicht, nicht zu schwer
|
||||
- Altersgerechte Lücken wählen
|
||||
|
||||
3. SINNVOLLE LÜCKENWÖRTER:
|
||||
- Verben (konjugiert)
|
||||
- Wichtige Nomen
|
||||
- Adjektive
|
||||
- KEINE Artikel oder Präpositionen allein
|
||||
|
||||
4. ÜBERSETZUNG:
|
||||
- Übersetze den VOLLSTÄNDIGEN Satz auf {target_lang_name}
|
||||
- Die GLEICHEN Wörter müssen als Lücken markiert sein
|
||||
- Die Übersetzung dient als Hilfe für Eltern
|
||||
|
||||
5. AUSGABE: Nur gültiges JSON, kein Markdown."""
|
||||
|
||||
user_prompt = f"""Erstelle Lückentexte aus diesem Arbeitsblatt:
|
||||
|
||||
TITEL: {title}
|
||||
FACH: {subject}
|
||||
KLASSENSTUFE: {grade_level}
|
||||
|
||||
TEXT:
|
||||
{worksheet_content}
|
||||
|
||||
Erstelle 5-8 Sätze mit Lücken. Gib das Ergebnis als JSON zurück:
|
||||
|
||||
{{
|
||||
"cloze_items": [
|
||||
{{
|
||||
"id": "c1",
|
||||
"original_sentence": "Der vollständige Originalsatz ohne Lücken",
|
||||
"sentence_with_gaps": "Der Satz mit ___ für jede Lücke",
|
||||
"gaps": [
|
||||
{{
|
||||
"id": "g1",
|
||||
"word": "das fehlende Wort",
|
||||
"position": 0,
|
||||
"hint": "optionaler Hinweis"
|
||||
}}
|
||||
],
|
||||
"translation": {{
|
||||
"language": "{target_language}",
|
||||
"language_name": "{target_lang_name}",
|
||||
"full_sentence": "Vollständige Übersetzung",
|
||||
"sentence_with_gaps": "Übersetzung mit ___ an gleichen Stellen"
|
||||
}}
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"subject": "{subject}",
|
||||
"grade_level": "{grade_level}",
|
||||
"source_title": "{title}",
|
||||
"target_language": "{target_language}",
|
||||
"total_gaps": 0
|
||||
}}
|
||||
}}
|
||||
|
||||
WICHTIG:
|
||||
- Jeder Satz MUSS mindestens 2 Lücken haben!
|
||||
- Die Lücken in der Übersetzung müssen den deutschen Lücken entsprechen
|
||||
- Position ist der Index des Wortes im Satz (0-basiert)"""
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"response_format": {"type": "json_object"},
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"max_tokens": 3000,
|
||||
"temperature": 0.7,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
cloze_data = json.loads(content)
|
||||
except (KeyError, json.JSONDecodeError) as e:
|
||||
raise RuntimeError(f"Fehler bei Lückentext-Generierung: {e}")
|
||||
|
||||
# Berechne Gesamtzahl der Lücken
|
||||
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
|
||||
if "metadata" in cloze_data:
|
||||
cloze_data["metadata"]["total_gaps"] = total_gaps
|
||||
|
||||
return cloze_data
|
||||
|
||||
|
||||
def _generate_cloze_with_claude(analysis_data: dict, target_language: str = "tr") -> dict:
|
||||
"""
|
||||
Generiert Lückentexte mit Claude API.
|
||||
"""
|
||||
import anthropic
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
|
||||
|
||||
client = anthropic.Anthropic(api_key=api_key)
|
||||
|
||||
# Extrahiere relevante Inhalte
|
||||
title = analysis_data.get("title") or "Arbeitsblatt"
|
||||
subject = analysis_data.get("subject") or "Allgemein"
|
||||
grade_level = analysis_data.get("grade_level") or "unbekannt"
|
||||
canonical_text = analysis_data.get("canonical_text") or ""
|
||||
printed_blocks = analysis_data.get("printed_blocks") or []
|
||||
|
||||
content_parts = []
|
||||
if canonical_text:
|
||||
content_parts.append(canonical_text)
|
||||
for block in printed_blocks:
|
||||
text = block.get("text", "").strip()
|
||||
if text and text not in content_parts:
|
||||
content_parts.append(text)
|
||||
|
||||
worksheet_content = "\n\n".join(content_parts)
|
||||
|
||||
if not worksheet_content.strip():
|
||||
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
|
||||
|
||||
target_lang_name = LANGUAGE_NAMES.get(target_language, "Türkisch")
|
||||
|
||||
prompt = f"""Erstelle Lückentexte aus diesem Arbeitsblatt.
|
||||
|
||||
WICHTIGE REGELN:
|
||||
1. MEHRERE LÜCKEN PRO SATZ (mindestens 2!)
|
||||
Beispiel: "Ich habe gestern Hausaufgaben gemacht" → Lücken: "habe" UND "gemacht"
|
||||
2. Schwierigkeitsgrad: exakt "{grade_level}"
|
||||
3. Übersetzung auf {target_lang_name} mit gleichen Lücken
|
||||
|
||||
TITEL: {title}
|
||||
FACH: {subject}
|
||||
KLASSENSTUFE: {grade_level}
|
||||
|
||||
TEXT:
|
||||
{worksheet_content}
|
||||
|
||||
Antworte NUR mit diesem JSON (5-8 Sätze):
|
||||
{{
|
||||
"cloze_items": [
|
||||
{{
|
||||
"id": "c1",
|
||||
"original_sentence": "Vollständiger Satz",
|
||||
"sentence_with_gaps": "Satz mit ___ für Lücken",
|
||||
"gaps": [
|
||||
{{"id": "g1", "word": "Lückenwort", "position": 0, "hint": "Hinweis"}}
|
||||
],
|
||||
"translation": {{
|
||||
"language": "{target_language}",
|
||||
"language_name": "{target_lang_name}",
|
||||
"full_sentence": "Übersetzung",
|
||||
"sentence_with_gaps": "Übersetzung mit ___"
|
||||
}}
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"subject": "{subject}",
|
||||
"grade_level": "{grade_level}",
|
||||
"source_title": "{title}",
|
||||
"target_language": "{target_language}",
|
||||
"total_gaps": 0
|
||||
}}
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=3000,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
|
||||
content = message.content[0].text
|
||||
|
||||
try:
|
||||
if "```json" in content:
|
||||
content = content.split("```json")[1].split("```")[0]
|
||||
elif "```" in content:
|
||||
content = content.split("```")[1].split("```")[0]
|
||||
cloze_data = json.loads(content.strip())
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
|
||||
|
||||
# Berechne Gesamtzahl der Lücken
|
||||
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
|
||||
if "metadata" in cloze_data:
|
||||
cloze_data["metadata"]["total_gaps"] = total_gaps
|
||||
|
||||
return cloze_data
|
||||
|
||||
|
||||
def generate_cloze_from_analysis(analysis_path: Path, target_language: str = "tr") -> Path:
|
||||
"""
|
||||
Generiert Lückentexte aus einer Analyse-JSON-Datei.
|
||||
|
||||
Die Lückentexte werden:
|
||||
- Mit mehreren sinnvollen Lücken pro Satz erstellt
|
||||
- Auf dem Schwierigkeitsniveau des Originals gehalten
|
||||
- Mit Übersetzung in die Zielsprache versehen
|
||||
|
||||
Args:
|
||||
analysis_path: Pfad zur *_analyse.json Datei
|
||||
target_language: Sprachcode für Übersetzung (default: "tr" für Türkisch)
|
||||
|
||||
Returns:
|
||||
Pfad zur generierten *_cloze.json Datei
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
|
||||
try:
|
||||
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
|
||||
|
||||
logger.info(f"Generiere Lückentexte für: {analysis_path.name}")
|
||||
|
||||
vision_api = get_vision_api()
|
||||
|
||||
# Generiere Lückentexte (nutze konfigurierte API)
|
||||
if vision_api == "claude":
|
||||
try:
|
||||
cloze_data = _generate_cloze_with_claude(analysis_data, target_language)
|
||||
except Exception as e:
|
||||
logger.warning(f"Claude Lückentext-Generierung fehlgeschlagen, nutze OpenAI: {e}")
|
||||
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
|
||||
else:
|
||||
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
|
||||
|
||||
# Speichere Lückentext-Daten
|
||||
out_name = analysis_path.stem.replace("_analyse", "") + "_cloze.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(json.dumps(cloze_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
logger.info(f"Lückentexte gespeichert: {out_path.name}")
|
||||
return out_path
|
||||
71
backend/ai_processing/core.py
Normal file
71
backend/ai_processing/core.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
AI Processing - Core Utilities.
|
||||
|
||||
Basis-Funktionen für API-Zugriff, Encoding und Verzeichnisse.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import os
|
||||
import base64
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Verzeichnisse
|
||||
BASE_DIR = Path.home() / "Arbeitsblaetter"
|
||||
EINGANG_DIR = BASE_DIR / "Eingang"
|
||||
BEREINIGT_DIR = BASE_DIR / "Bereinigt"
|
||||
|
||||
# Vision API Konfiguration
|
||||
VISION_API = os.getenv("VISION_API", "claude").lower()
|
||||
|
||||
|
||||
def get_openai_api_key() -> str:
|
||||
"""Holt den OpenAI API Key aus der Umgebungsvariable."""
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("OPENAI_API_KEY ist nicht gesetzt. Bitte API-Schlüssel als Umgebungsvariable setzen.")
|
||||
return api_key
|
||||
|
||||
|
||||
def get_anthropic_api_key() -> str:
|
||||
"""Holt den Anthropic API Key aus der Umgebungsvariable."""
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
|
||||
return api_key
|
||||
|
||||
|
||||
def encode_image_to_data_url(input_path: Path) -> str:
|
||||
"""Kodiert ein Bild als Data-URL für Vision APIs."""
|
||||
image_bytes = input_path.read_bytes()
|
||||
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
|
||||
return f"data:image/jpeg;base64,{image_b64}"
|
||||
|
||||
|
||||
def encode_image_to_base64(input_path: Path) -> str:
|
||||
"""Kodiert ein Bild als Base64-String."""
|
||||
image_bytes = input_path.read_bytes()
|
||||
return base64.b64encode(image_bytes).decode("utf-8")
|
||||
|
||||
|
||||
def ensure_directories():
|
||||
"""Stellt sicher, dass alle benötigten Verzeichnisse existieren."""
|
||||
EINGANG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
BEREINIGT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def dummy_process_scan(input_path: Path) -> Path:
|
||||
"""Einfache Kopie in den Ordner Bereinigt – bleibt als Fallback erhalten."""
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
new_name = input_path.stem + "_bereinigt" + input_path.suffix
|
||||
target = BEREINIGT_DIR / new_name
|
||||
shutil.copy2(input_path, target)
|
||||
return target
|
||||
|
||||
|
||||
def get_vision_api() -> str:
|
||||
"""Gibt die konfigurierte Vision API zurück."""
|
||||
return VISION_API
|
||||
211
backend/ai_processing/html_generator.py
Normal file
211
backend/ai_processing/html_generator.py
Normal file
@@ -0,0 +1,211 @@
|
||||
"""
|
||||
AI Processing - HTML Generator.
|
||||
|
||||
Baut saubere HTML-Arbeitsblätter aus Analyse-JSON.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import logging
|
||||
|
||||
from .core import BEREINIGT_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_clean_html_from_analysis(analysis_path: Path) -> Path:
|
||||
"""
|
||||
Nimmt eine *_analyse.json-Datei und baut daraus ein sauberes HTML-Arbeitsblatt.
|
||||
|
||||
NEU:
|
||||
- Fokus auf gedruckten Text (canonical_text / printed_blocks)
|
||||
- Handschriftliche Eintragungen und durchgestrichene Wörter werden NICHT in den
|
||||
Haupttext übernommen
|
||||
- Verwendung eines Open-Source-Font-Stacks (z.B. Inter / Noto Sans)
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
try:
|
||||
data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
|
||||
|
||||
title = data.get("title") or "Arbeitsblatt"
|
||||
subject = data.get("subject") or ""
|
||||
grade_level = data.get("grade_level") or ""
|
||||
instructions = data.get("instructions") or ""
|
||||
tasks = data.get("tasks", []) or []
|
||||
canonical_text = data.get("canonical_text") or ""
|
||||
printed_blocks = data.get("printed_blocks") or []
|
||||
struck = data.get("struck_through_words") or []
|
||||
|
||||
html_parts = []
|
||||
html_parts.append("<!DOCTYPE html>")
|
||||
html_parts.append("<html lang='de'>")
|
||||
html_parts.append("<head>")
|
||||
html_parts.append("<meta charset='UTF-8'>")
|
||||
html_parts.append(f"<title>{title}</title>")
|
||||
html_parts.append(
|
||||
"""
|
||||
<style>
|
||||
:root {
|
||||
--font-main: "Inter", "Noto Sans", system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
font-family: var(--font-main);
|
||||
margin: 32px;
|
||||
line-height: 1.5;
|
||||
font-size: 14px;
|
||||
color: #111827;
|
||||
}
|
||||
.page {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
h1 {
|
||||
font-size: 24px;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
h2 {
|
||||
font-size: 18px;
|
||||
margin-top: 24px;
|
||||
}
|
||||
.meta {
|
||||
font-size: 12px;
|
||||
color: #6b7280;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
.instructions {
|
||||
margin-bottom: 20px;
|
||||
padding: 8px 10px;
|
||||
border-radius: 8px;
|
||||
background: #eff6ff;
|
||||
border: 1px solid #bfdbfe;
|
||||
font-size: 13px;
|
||||
}
|
||||
.text-blocks {
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
.text-block {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.text-block-title {
|
||||
font-weight: 600;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.task-list {
|
||||
margin-top: 8px;
|
||||
}
|
||||
.task {
|
||||
margin-bottom: 14px;
|
||||
padding-bottom: 8px;
|
||||
border-bottom: 1px dashed #e5e7eb;
|
||||
}
|
||||
.task-title {
|
||||
font-weight: 600;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.gap-line {
|
||||
display: inline-block;
|
||||
border-bottom: 1px solid #000;
|
||||
min-width: 80px;
|
||||
margin: 0 4px;
|
||||
}
|
||||
.footnote {
|
||||
margin-top: 24px;
|
||||
font-size: 11px;
|
||||
color: #9ca3af;
|
||||
}
|
||||
</style>
|
||||
"""
|
||||
)
|
||||
html_parts.append("</head>")
|
||||
html_parts.append("<body>")
|
||||
html_parts.append("<div class='page'>")
|
||||
|
||||
# Kopfbereich
|
||||
html_parts.append(f"<h1>{title}</h1>")
|
||||
meta_bits = []
|
||||
if subject:
|
||||
meta_bits.append(f"Fach: {subject}")
|
||||
if grade_level:
|
||||
meta_bits.append(f"Klassenstufe: {grade_level}")
|
||||
if meta_bits:
|
||||
html_parts.append(f"<div class='meta'>{' | '.join(meta_bits)}</div>")
|
||||
|
||||
if instructions:
|
||||
html_parts.append(
|
||||
f"<div class='instructions'><strong>Arbeitsanweisung:</strong> {instructions}</div>"
|
||||
)
|
||||
|
||||
# Haupttext / gedruckte Blöcke
|
||||
html_parts.append("<section class='text-blocks'>")
|
||||
|
||||
if printed_blocks:
|
||||
for block in printed_blocks:
|
||||
role = (block.get("role") or "body").lower()
|
||||
text = (block.get("text") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
html_parts.append("<div class='text-block'>")
|
||||
if role == "title":
|
||||
html_parts.append(f"<div class='text-block-title'>{text}</div>")
|
||||
else:
|
||||
html_parts.append(f"<div>{text}</div>")
|
||||
html_parts.append("</div>")
|
||||
elif canonical_text:
|
||||
# Fallback: canonical_text in Absätze aufteilen
|
||||
paragraphs = [
|
||||
p.strip()
|
||||
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
|
||||
if p.strip()
|
||||
]
|
||||
for p in paragraphs:
|
||||
html_parts.append(f"<div class='text-block'>{p}</div>")
|
||||
|
||||
html_parts.append("</section>")
|
||||
|
||||
# Aufgabenbereich
|
||||
if tasks:
|
||||
html_parts.append("<h2>Aufgaben</h2>")
|
||||
html_parts.append("<div class='task-list'>")
|
||||
|
||||
for idx, task in enumerate(tasks, start=1):
|
||||
t_type = task.get("type") or "other"
|
||||
desc = task.get("description") or ""
|
||||
text_with_gaps = task.get("text_with_gaps")
|
||||
|
||||
html_parts.append("<div class='task'>")
|
||||
html_parts.append(
|
||||
f"<div class='task-title'>Aufgabe {idx} ({t_type}): {desc}</div>"
|
||||
)
|
||||
|
||||
if text_with_gaps:
|
||||
# Lücken „___" werden in Linien umgewandelt
|
||||
rendered = text_with_gaps.replace("___", "<span class='gap-line'> </span>")
|
||||
html_parts.append(f"<div>{rendered}</div>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</div>") # .task-list
|
||||
|
||||
# kleine Fußnote mit Hinweis
|
||||
if struck:
|
||||
html_parts.append(
|
||||
"<div class='footnote'>Hinweis: Einige im Original durchgestrichene Wörter wurden "
|
||||
"von der KI erkannt und NICHT in dieses saubere Arbeitsblatt übernommen.</div>"
|
||||
)
|
||||
else:
|
||||
html_parts.append(
|
||||
"<div class='footnote'>Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert "
|
||||
"und von handschriftlichen Eintragungen bereinigt.</div>"
|
||||
)
|
||||
|
||||
html_parts.append("</div>") # .page
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
html_content = "\n".join(html_parts)
|
||||
out_name = analysis_path.stem.replace("_analyse", "") + "_clean.html"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(html_content, encoding="utf-8")
|
||||
return out_path
|
||||
78
backend/ai_processing/image_processor.py
Normal file
78
backend/ai_processing/image_processor.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""
|
||||
AI Processing - Image Processor.
|
||||
|
||||
Entfernt Handschrift aus Arbeitsblatt-Scans.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import json
|
||||
import logging
|
||||
|
||||
from .core import BEREINIGT_DIR
|
||||
from .analysis import analyze_scan_structure_with_ai
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def remove_handwriting_from_scan(input_path: Path) -> Path:
|
||||
"""
|
||||
Remove handwriting from worksheet scan using AI-guided image processing (Stage 2).
|
||||
|
||||
Process:
|
||||
1. Load corresponding analysis JSON (from Stage 1)
|
||||
2. Apply multi-strategy cleaning using WorksheetCleaner:
|
||||
- Color-based filtering (blue ink)
|
||||
- AI-guided region masking
|
||||
- Stroke thickness analysis
|
||||
3. Preserve diagrams and printed content
|
||||
4. Save cleaned image
|
||||
|
||||
Returns:
|
||||
Path to cleaned image (*_clean.jpg)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If input file not found
|
||||
RuntimeError: If cleaning fails (falls back to copy)
|
||||
"""
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
# Import WorksheetCleaner
|
||||
from image_cleaner import WorksheetCleaner
|
||||
|
||||
# Load analysis JSON (from Stage 1)
|
||||
analysis_name = input_path.stem + "_analyse.json"
|
||||
analysis_path = BEREINIGT_DIR / analysis_name
|
||||
|
||||
# If analysis doesn't exist, run it first
|
||||
if not analysis_path.exists():
|
||||
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
|
||||
analysis_path = analyze_scan_structure_with_ai(input_path)
|
||||
|
||||
# Load analysis data
|
||||
try:
|
||||
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
|
||||
# Fallback: create minimal analysis structure
|
||||
analysis_data = {
|
||||
"layout": {"text_regions": [], "diagram_elements": []},
|
||||
"handwriting_regions": []
|
||||
}
|
||||
|
||||
# Prepare output path
|
||||
output_name = input_path.stem + "_clean" + input_path.suffix
|
||||
output_path = BEREINIGT_DIR / output_name
|
||||
|
||||
# Clean the image using WorksheetCleaner
|
||||
cleaner = WorksheetCleaner(debug_mode=False)
|
||||
try:
|
||||
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
|
||||
logger.info(f"Successfully cleaned {input_path.name}")
|
||||
return cleaned_path
|
||||
except Exception as e:
|
||||
# Fallback: if cleaning fails, copy original
|
||||
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
|
||||
shutil.copy2(input_path, output_path)
|
||||
return output_path
|
||||
155
backend/ai_processing/leitner.py
Normal file
155
backend/ai_processing/leitner.py
Normal file
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
AI Processing - Leitner System.
|
||||
|
||||
Spaced Repetition System für Q&A-Paare.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def update_leitner_progress(qa_path: Path, item_id: str, correct: bool) -> dict:
|
||||
"""
|
||||
Aktualisiert den Lernfortschritt eines Q&A-Items nach dem Leitner-System.
|
||||
|
||||
Leitner-Boxen:
|
||||
- Box 0: Neu (noch nicht gelernt)
|
||||
- Box 1: Gelernt (bei Fehler → zurück zu Box 0)
|
||||
- Box 2: Gefestigt (bei Fehler → zurück zu Box 1)
|
||||
|
||||
Bei korrekter Antwort: Box erhöhen (max 2)
|
||||
Bei falscher Antwort: Box verringern (min 0)
|
||||
|
||||
Args:
|
||||
qa_path: Pfad zur *_qa.json Datei
|
||||
item_id: ID des Q&A-Items
|
||||
correct: True wenn korrekt beantwortet
|
||||
|
||||
Returns:
|
||||
Dict mit aktualisiertem Item und Status
|
||||
"""
|
||||
if not qa_path.exists():
|
||||
raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
|
||||
|
||||
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
|
||||
|
||||
# Finde das Item
|
||||
item = None
|
||||
for qa_item in qa_data.get("qa_items", []):
|
||||
if qa_item.get("id") == item_id:
|
||||
item = qa_item
|
||||
break
|
||||
|
||||
if not item:
|
||||
return {"status": "NOT_FOUND", "message": f"Item {item_id} nicht gefunden"}
|
||||
|
||||
# Aktualisiere Statistiken
|
||||
now = datetime.now().isoformat()
|
||||
item["last_seen"] = now
|
||||
|
||||
if correct:
|
||||
item["correct_count"] = item.get("correct_count", 0) + 1
|
||||
# Box erhöhen (max 2)
|
||||
current_box = item.get("leitner_box", 0)
|
||||
if current_box < 2:
|
||||
item["leitner_box"] = current_box + 1
|
||||
# Nächste Wiederholung basierend auf Box
|
||||
# Box 0→1: Nach 1 Tag, Box 1→2: Nach 3 Tagen, Box 2: Nach 7 Tagen
|
||||
days = [1, 3, 7][item["leitner_box"]]
|
||||
item["next_review"] = (datetime.now() + timedelta(days=days)).isoformat()
|
||||
else:
|
||||
item["incorrect_count"] = item.get("incorrect_count", 0) + 1
|
||||
# Box verringern (min 0)
|
||||
current_box = item.get("leitner_box", 0)
|
||||
if current_box > 0:
|
||||
item["leitner_box"] = current_box - 1
|
||||
# Bei Fehler: Bald wiederholen
|
||||
item["next_review"] = (datetime.now() + timedelta(hours=4)).isoformat()
|
||||
|
||||
# Speichere aktualisierte Daten
|
||||
qa_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
box_names = ["Neu", "Gelernt", "Gefestigt"]
|
||||
return {
|
||||
"status": "OK",
|
||||
"item_id": item_id,
|
||||
"correct": correct,
|
||||
"new_box": item["leitner_box"],
|
||||
"box_name": box_names[item["leitner_box"]],
|
||||
"correct_count": item["correct_count"],
|
||||
"incorrect_count": item["incorrect_count"],
|
||||
"next_review": item["next_review"]
|
||||
}
|
||||
|
||||
|
||||
def get_next_review_items(qa_path: Path, limit: int = 5) -> list:
|
||||
"""
|
||||
Gibt die nächsten zu wiederholenden Items zurück.
|
||||
|
||||
Priorisierung:
|
||||
1. Falsch beantwortete Items (Box 0) - häufiger
|
||||
2. Gelernte Items (Box 1) deren Review fällig ist
|
||||
3. Gefestigte Items (Box 2) zur gelegentlichen Auffrischung
|
||||
|
||||
Args:
|
||||
qa_path: Pfad zur *_qa.json Datei
|
||||
limit: Maximale Anzahl Items
|
||||
|
||||
Returns:
|
||||
Liste der zu wiederholenden Items (sortiert nach Priorität)
|
||||
"""
|
||||
if not qa_path.exists():
|
||||
return []
|
||||
|
||||
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
|
||||
items = qa_data.get("qa_items", [])
|
||||
|
||||
now = datetime.now()
|
||||
review_items = []
|
||||
|
||||
for item in items:
|
||||
box = item.get("leitner_box", 0)
|
||||
next_review = item.get("next_review")
|
||||
incorrect = item.get("incorrect_count", 0)
|
||||
|
||||
# Priorität berechnen (niedriger = wichtiger)
|
||||
priority = box * 10 # Box 0 hat höchste Priorität
|
||||
|
||||
# Bonus für häufig falsch beantwortete
|
||||
priority -= incorrect * 2
|
||||
|
||||
# Prüfe ob Review fällig
|
||||
is_due = True
|
||||
if next_review:
|
||||
try:
|
||||
review_time = datetime.fromisoformat(next_review)
|
||||
is_due = now >= review_time
|
||||
# Überfällige Items bekommen höhere Priorität
|
||||
if is_due:
|
||||
overdue_hours = (now - review_time).total_seconds() / 3600
|
||||
priority -= overdue_hours
|
||||
except (ValueError, TypeError):
|
||||
is_due = True
|
||||
|
||||
# Neue Items (Box 0) immer einschließen
|
||||
if box == 0 or is_due:
|
||||
review_items.append({
|
||||
**item,
|
||||
"_priority": priority,
|
||||
"_is_due": is_due
|
||||
})
|
||||
|
||||
# Sortiere nach Priorität (niedrigste zuerst)
|
||||
review_items.sort(key=lambda x: x["_priority"])
|
||||
|
||||
# Entferne interne Felder und limitiere
|
||||
result = []
|
||||
for item in review_items[:limit]:
|
||||
clean_item = {k: v for k, v in item.items() if not k.startswith("_")}
|
||||
result.append(clean_item)
|
||||
|
||||
return result
|
||||
316
backend/ai_processing/mc_generator.py
Normal file
316
backend/ai_processing/mc_generator.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
AI Processing - Multiple Choice Generator.
|
||||
|
||||
Generiert Multiple-Choice-Fragen aus Arbeitsblatt-Analysen.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import random
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
get_openai_api_key,
|
||||
get_vision_api,
|
||||
BEREINIGT_DIR,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _generate_mc_with_openai(analysis_data: dict, num_questions: int = 5) -> dict:
|
||||
"""
|
||||
Generiert Multiple-Choice-Fragen basierend auf der Arbeitsblatt-Analyse.
|
||||
|
||||
Verwendet OpenAI GPT-4o-mini für die Generierung.
|
||||
Schwierigkeitsgrad entspricht dem Original (grade_level aus Analyse).
|
||||
"""
|
||||
api_key = get_openai_api_key()
|
||||
|
||||
# Extrahiere relevante Inhalte aus der Analyse
|
||||
title = analysis_data.get("title") or "Arbeitsblatt"
|
||||
subject = analysis_data.get("subject") or "Allgemein"
|
||||
grade_level = analysis_data.get("grade_level") or "unbekannt"
|
||||
canonical_text = analysis_data.get("canonical_text") or ""
|
||||
printed_blocks = analysis_data.get("printed_blocks") or []
|
||||
|
||||
# Baue den Textinhalt zusammen
|
||||
content_parts = []
|
||||
if canonical_text:
|
||||
content_parts.append(canonical_text)
|
||||
for block in printed_blocks:
|
||||
text = block.get("text", "").strip()
|
||||
if text and text not in content_parts:
|
||||
content_parts.append(text)
|
||||
|
||||
worksheet_content = "\n\n".join(content_parts)
|
||||
|
||||
if not worksheet_content.strip():
|
||||
logger.warning("Kein Textinhalt für MC-Generierung gefunden")
|
||||
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
|
||||
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Multiple-Choice-Fragen für Schüler erstellt.
|
||||
|
||||
WICHTIGE REGELN:
|
||||
1. SCHWIERIGKEITSGRAD: Die Fragen müssen exakt dem Niveau "{grade_level}" entsprechen.
|
||||
- Nicht zu leicht, nicht zu schwer
|
||||
- Passend für das angegebene Klassenniveau
|
||||
|
||||
2. INHALTSTREUE: Alle Fragen müssen sich direkt auf den gegebenen Text beziehen.
|
||||
- Keine Fragen zu Themen, die nicht im Text vorkommen
|
||||
- Die richtige Antwort muss aus dem Text ableitbar sein
|
||||
|
||||
3. QUALITÄT DER DISTRAKTOREN (falsche Antworten):
|
||||
- Müssen plausibel klingen
|
||||
- Dürfen nicht offensichtlich falsch sein
|
||||
- Sollten typische Schüler-Missverständnisse widerspiegeln
|
||||
|
||||
4. AUSGABEFORMAT: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück."""
|
||||
|
||||
user_prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt:
|
||||
|
||||
TITEL: {title}
|
||||
FACH: {subject}
|
||||
KLASSENSTUFE: {grade_level}
|
||||
|
||||
INHALT DES ARBEITSBLATTS:
|
||||
{worksheet_content}
|
||||
|
||||
Gib die Fragen als JSON zurück:
|
||||
{{
|
||||
"questions": [
|
||||
{{
|
||||
"id": "q1",
|
||||
"question": "Die Fragestellung hier",
|
||||
"options": [
|
||||
{{"id": "a", "text": "Antwort A"}},
|
||||
{{"id": "b", "text": "Antwort B"}},
|
||||
{{"id": "c", "text": "Antwort C"}},
|
||||
{{"id": "d", "text": "Antwort D"}}
|
||||
],
|
||||
"correct_answer": "a",
|
||||
"explanation": "Kurze Erklärung warum diese Antwort richtig ist"
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"subject": "{subject}",
|
||||
"grade_level": "{grade_level}",
|
||||
"source_title": "{title}",
|
||||
"num_questions": {num_questions}
|
||||
}}
|
||||
}}"""
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"response_format": {"type": "json_object"},
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"max_tokens": 2000,
|
||||
"temperature": 0.7,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
mc_data = json.loads(content)
|
||||
except (KeyError, json.JSONDecodeError) as e:
|
||||
raise RuntimeError(f"Fehler bei MC-Generierung: {e}")
|
||||
|
||||
return mc_data
|
||||
|
||||
|
||||
def _generate_mc_with_claude(analysis_data: dict, num_questions: int = 5) -> dict:
|
||||
"""
|
||||
Generiert Multiple-Choice-Fragen mit Claude API.
|
||||
"""
|
||||
import anthropic
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
|
||||
|
||||
client = anthropic.Anthropic(api_key=api_key)
|
||||
|
||||
# Extrahiere relevante Inhalte
|
||||
title = analysis_data.get("title") or "Arbeitsblatt"
|
||||
subject = analysis_data.get("subject") or "Allgemein"
|
||||
grade_level = analysis_data.get("grade_level") or "unbekannt"
|
||||
canonical_text = analysis_data.get("canonical_text") or ""
|
||||
printed_blocks = analysis_data.get("printed_blocks") or []
|
||||
|
||||
content_parts = []
|
||||
if canonical_text:
|
||||
content_parts.append(canonical_text)
|
||||
for block in printed_blocks:
|
||||
text = block.get("text", "").strip()
|
||||
if text and text not in content_parts:
|
||||
content_parts.append(text)
|
||||
|
||||
worksheet_content = "\n\n".join(content_parts)
|
||||
|
||||
if not worksheet_content.strip():
|
||||
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
|
||||
|
||||
prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt.
|
||||
|
||||
WICHTIGE REGELN:
|
||||
1. SCHWIERIGKEITSGRAD: Exakt Niveau "{grade_level}" - nicht leichter, nicht schwerer
|
||||
2. INHALTSTREUE: Nur Fragen zum gegebenen Text
|
||||
3. QUALITÄT: Plausible Distraktoren (falsche Antworten)
|
||||
|
||||
TITEL: {title}
|
||||
FACH: {subject}
|
||||
KLASSENSTUFE: {grade_level}
|
||||
|
||||
INHALT:
|
||||
{worksheet_content}
|
||||
|
||||
Antworte NUR mit diesem JSON-Format:
|
||||
{{
|
||||
"questions": [
|
||||
{{
|
||||
"id": "q1",
|
||||
"question": "Fragestellung",
|
||||
"options": [
|
||||
{{"id": "a", "text": "Antwort A"}},
|
||||
{{"id": "b", "text": "Antwort B"}},
|
||||
{{"id": "c", "text": "Antwort C"}},
|
||||
{{"id": "d", "text": "Antwort D"}}
|
||||
],
|
||||
"correct_answer": "a",
|
||||
"explanation": "Erklärung"
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"subject": "{subject}",
|
||||
"grade_level": "{grade_level}",
|
||||
"source_title": "{title}",
|
||||
"num_questions": {num_questions}
|
||||
}}
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=2000,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
|
||||
content = message.content[0].text
|
||||
|
||||
# Versuche JSON zu extrahieren
|
||||
try:
|
||||
# Falls in Code-Block eingebettet
|
||||
if "```json" in content:
|
||||
content = content.split("```json")[1].split("```")[0]
|
||||
elif "```" in content:
|
||||
content = content.split("```")[1].split("```")[0]
|
||||
mc_data = json.loads(content.strip())
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
|
||||
|
||||
return mc_data
|
||||
|
||||
|
||||
def _shuffle_mc_options(mc_data: dict) -> dict:
|
||||
"""
|
||||
Mischt die Antwort-Optionen jeder Frage zufällig durch.
|
||||
Aktualisiert auch correct_answer entsprechend.
|
||||
|
||||
Dies stellt sicher, dass die richtige Antwort nicht immer an der gleichen Position steht.
|
||||
"""
|
||||
if "questions" not in mc_data:
|
||||
return mc_data
|
||||
|
||||
for question in mc_data["questions"]:
|
||||
options = question.get("options", [])
|
||||
correct_id = question.get("correct_answer")
|
||||
|
||||
if not options or not correct_id:
|
||||
continue
|
||||
|
||||
# Finde den Text der richtigen Antwort
|
||||
correct_text = None
|
||||
for opt in options:
|
||||
if opt.get("id") == correct_id:
|
||||
correct_text = opt.get("text")
|
||||
break
|
||||
|
||||
# Mische die Optionen
|
||||
random.shuffle(options)
|
||||
|
||||
# Vergebe neue IDs (a, b, c, d) und finde neue Position der richtigen Antwort
|
||||
new_ids = ["a", "b", "c", "d"]
|
||||
new_correct = None
|
||||
|
||||
for i, opt in enumerate(options):
|
||||
if i < len(new_ids):
|
||||
if opt.get("text") == correct_text:
|
||||
new_correct = new_ids[i]
|
||||
opt["id"] = new_ids[i]
|
||||
|
||||
if new_correct:
|
||||
question["correct_answer"] = new_correct
|
||||
|
||||
question["options"] = options
|
||||
|
||||
return mc_data
|
||||
|
||||
|
||||
def generate_mc_from_analysis(analysis_path: Path, num_questions: int = 5) -> Path:
|
||||
"""
|
||||
Generiert Multiple-Choice-Fragen aus einer Analyse-JSON-Datei.
|
||||
|
||||
Die Fragen werden:
|
||||
- Basierend auf dem extrahierten Text erstellt
|
||||
- Auf dem Schwierigkeitsniveau des Originals gehalten
|
||||
- Mit zufällig angeordneten Antworten versehen
|
||||
|
||||
Args:
|
||||
analysis_path: Pfad zur *_analyse.json Datei
|
||||
num_questions: Anzahl der zu generierenden Fragen (Standard: 5)
|
||||
|
||||
Returns:
|
||||
Pfad zur generierten *_mc.json Datei
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
|
||||
try:
|
||||
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
|
||||
|
||||
logger.info(f"Generiere MC-Fragen für: {analysis_path.name}")
|
||||
|
||||
vision_api = get_vision_api()
|
||||
|
||||
# Generiere MC-Fragen (nutze konfigurierte API)
|
||||
if vision_api == "claude":
|
||||
try:
|
||||
mc_data = _generate_mc_with_claude(analysis_data, num_questions)
|
||||
except Exception as e:
|
||||
logger.warning(f"Claude MC-Generierung fehlgeschlagen, nutze OpenAI: {e}")
|
||||
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
|
||||
else:
|
||||
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
|
||||
|
||||
# Mische die Antwort-Positionen durch
|
||||
mc_data = _shuffle_mc_options(mc_data)
|
||||
|
||||
# Speichere MC-Daten
|
||||
out_name = analysis_path.stem.replace("_analyse", "") + "_mc.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(json.dumps(mc_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
logger.info(f"MC-Fragen gespeichert: {out_path.name}")
|
||||
return out_path
|
||||
472
backend/ai_processing/mindmap.py
Normal file
472
backend/ai_processing/mindmap.py
Normal file
@@ -0,0 +1,472 @@
|
||||
"""
|
||||
AI Processing - Mindmap Generator.
|
||||
|
||||
Generiert kindgerechte Lernposter-Mindmaps aus Arbeitsblatt-Analysen.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import math
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .core import get_openai_api_key, BEREINIGT_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_mindmap_data(analysis_path: Path) -> dict:
|
||||
"""
|
||||
Extrahiert Fachbegriffe aus der Analyse und gruppiert sie für eine Mindmap.
|
||||
|
||||
Args:
|
||||
analysis_path: Pfad zur *_analyse.json Datei
|
||||
|
||||
Returns:
|
||||
Dictionary mit Mindmap-Struktur:
|
||||
{
|
||||
"topic": "Hauptthema",
|
||||
"subject": "Fach",
|
||||
"categories": [
|
||||
{
|
||||
"name": "Kategorie",
|
||||
"color": "#hexcolor",
|
||||
"emoji": "🔬",
|
||||
"terms": [
|
||||
{"term": "Begriff", "explanation": "Kurze Erklärung"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
|
||||
try:
|
||||
data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
|
||||
|
||||
title = data.get("title") or "Arbeitsblatt"
|
||||
subject = data.get("subject") or ""
|
||||
canonical_text = data.get("canonical_text") or ""
|
||||
tasks = data.get("tasks", []) or []
|
||||
|
||||
# Sammle allen Text für die Analyse
|
||||
all_text = canonical_text
|
||||
for task in tasks:
|
||||
if task.get("description"):
|
||||
all_text += "\n" + task.get("description")
|
||||
if task.get("text_with_gaps"):
|
||||
all_text += "\n" + task.get("text_with_gaps")
|
||||
|
||||
if not all_text.strip():
|
||||
return {
|
||||
"topic": title,
|
||||
"subject": subject,
|
||||
"categories": []
|
||||
}
|
||||
|
||||
# KI-basierte Extraktion der Fachbegriffe
|
||||
api_key = get_openai_api_key()
|
||||
|
||||
prompt = f"""Analysiere diesen Schultext und extrahiere alle Fachbegriffe für eine kindgerechte Lern-Mindmap.
|
||||
|
||||
THEMA: {title}
|
||||
FACH: {subject}
|
||||
|
||||
TEXT:
|
||||
{all_text[:3000]}
|
||||
|
||||
AUFGABE:
|
||||
1. Identifiziere das Hauptthema (ein einzelnes Wort oder kurzer Begriff)
|
||||
2. Finde ALLE Fachbegriffe und gruppiere sie in 3-6 sinnvolle Kategorien
|
||||
3. Gib für jeden Begriff eine kurze, kindgerechte Erklärung (max 10 Wörter)
|
||||
4. Wähle für jede Kategorie ein passendes Emoji und eine Farbe
|
||||
|
||||
Antworte NUR mit diesem JSON-Format:
|
||||
{{
|
||||
"topic": "Hauptthema (z.B. 'Das Auge')",
|
||||
"categories": [
|
||||
{{
|
||||
"name": "Kategoriename",
|
||||
"emoji": "passendes Emoji",
|
||||
"color": "#Hexfarbe (bunt, kindgerecht)",
|
||||
"terms": [
|
||||
{{"term": "Fachbegriff", "explanation": "Kurze Erklärung"}}
|
||||
]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
WICHTIG:
|
||||
- Verwende kindgerechte, einfache Sprache
|
||||
- Bunte, fröhliche Farben: #FF6B6B, #4ECDC4, #45B7D1, #96CEB4, #FFEAA7, #DDA0DD, #98D8C8
|
||||
- Passende Emojis für jede Kategorie
|
||||
- Mindestens 3 Begriffe pro Kategorie wenn möglich
|
||||
- Maximal 6 Kategorien"""
|
||||
|
||||
try:
|
||||
# Versuche Claude
|
||||
claude_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
if claude_key:
|
||||
import anthropic
|
||||
client = anthropic.Anthropic(api_key=claude_key)
|
||||
response = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=2000,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
result_text = response.content[0].text
|
||||
else:
|
||||
# Fallback zu OpenAI
|
||||
logger.info("Claude Mindmap-Generierung fehlgeschlagen, nutze OpenAI: ANTHROPIC_API_KEY ist nicht gesetzt.")
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{"role": "system", "content": "Du bist ein Experte für kindgerechte Lernmaterialien."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"max_tokens": 2000,
|
||||
"temperature": 0.7
|
||||
}
|
||||
resp = requests.post(url, headers=headers, json=payload, timeout=60)
|
||||
resp.raise_for_status()
|
||||
result_text = resp.json()["choices"][0]["message"]["content"]
|
||||
|
||||
# JSON extrahieren
|
||||
result_text = result_text.strip()
|
||||
if result_text.startswith("```"):
|
||||
result_text = result_text.split("```")[1]
|
||||
if result_text.startswith("json"):
|
||||
result_text = result_text[4:]
|
||||
result_text = result_text.strip()
|
||||
|
||||
mindmap_data = json.loads(result_text)
|
||||
mindmap_data["subject"] = subject
|
||||
|
||||
return mindmap_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Mindmap-Generierung fehlgeschlagen: {e}")
|
||||
# Fallback: Einfache Struktur zurückgeben
|
||||
return {
|
||||
"topic": title,
|
||||
"subject": subject,
|
||||
"categories": []
|
||||
}
|
||||
|
||||
|
||||
def generate_mindmap_html(mindmap_data: dict, format: str = "a3") -> str:
|
||||
"""
|
||||
Generiert ein kindgerechtes HTML/SVG Mindmap-Poster.
|
||||
|
||||
Args:
|
||||
mindmap_data: Dictionary aus generate_mindmap_data()
|
||||
format: "a3" für A3-Poster (Standard) oder "a4" für A4-Ansicht
|
||||
|
||||
Returns:
|
||||
HTML-String mit SVG-Mindmap
|
||||
"""
|
||||
topic = mindmap_data.get("topic", "Thema")
|
||||
subject = mindmap_data.get("subject", "")
|
||||
categories = mindmap_data.get("categories", [])
|
||||
|
||||
# Format-spezifische Einstellungen
|
||||
if format.lower() == "a4":
|
||||
page_size = "A4 landscape"
|
||||
svg_width = 1100
|
||||
svg_height = 780
|
||||
radius = 250
|
||||
else: # a3 (Standard)
|
||||
page_size = "A3 landscape"
|
||||
svg_width = 1400
|
||||
svg_height = 990
|
||||
radius = 320
|
||||
|
||||
# Wenn keine Kategorien, zeige Platzhalter
|
||||
if not categories:
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Mindmap - {topic}</title>
|
||||
<style>
|
||||
body {{ font-family: 'Comic Sans MS', cursive, sans-serif; text-align: center; padding: 50px; }}
|
||||
h1 {{ color: #FF6B6B; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>🧠 Mindmap: {topic}</h1>
|
||||
<p>Noch keine Daten vorhanden. Bitte zuerst das Arbeitsblatt analysieren.</p>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# Farben für Verbindungslinien
|
||||
num_categories = len(categories)
|
||||
|
||||
# SVG-Dimensionen wurden oben basierend auf format gesetzt
|
||||
center_x = svg_width // 2
|
||||
center_y = svg_height // 2
|
||||
|
||||
# Berechne Positionen der Kategorien im Kreis
|
||||
category_positions = []
|
||||
|
||||
for i, cat in enumerate(categories):
|
||||
angle = (2 * math.pi * i / num_categories) - (math.pi / 2) # Start oben
|
||||
x = center_x + radius * math.cos(angle)
|
||||
y = center_y + radius * math.sin(angle)
|
||||
category_positions.append({
|
||||
"x": x,
|
||||
"y": y,
|
||||
"angle": angle,
|
||||
"data": cat
|
||||
})
|
||||
|
||||
html = f"""<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Lernposter - {topic}</title>
|
||||
<style>
|
||||
@page {{
|
||||
size: {page_size};
|
||||
margin: 10mm;
|
||||
}}
|
||||
@media print {{
|
||||
body {{ -webkit-print-color-adjust: exact; print-color-adjust: exact; }}
|
||||
.no-print {{ display: none !important; }}
|
||||
}}
|
||||
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
|
||||
body {{
|
||||
font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive, sans-serif;
|
||||
background: linear-gradient(135deg, #f5f7fa 0%, #e4e8f0 100%);
|
||||
min-height: 100vh;
|
||||
padding: 20px;
|
||||
}}
|
||||
.poster-container {{
|
||||
width: 100%;
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
background: white;
|
||||
border-radius: 20px;
|
||||
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
|
||||
overflow: hidden;
|
||||
}}
|
||||
.poster-header {{
|
||||
background: linear-gradient(90deg, #FF6B6B, #4ECDC4);
|
||||
padding: 15px 30px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}}
|
||||
.poster-title {{
|
||||
color: white;
|
||||
font-size: 24px;
|
||||
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
|
||||
}}
|
||||
.poster-subject {{
|
||||
color: white;
|
||||
font-size: 16px;
|
||||
opacity: 0.9;
|
||||
}}
|
||||
.mindmap-svg {{
|
||||
width: 100%;
|
||||
height: auto;
|
||||
}}
|
||||
.print-btn {{
|
||||
position: fixed;
|
||||
top: 20px;
|
||||
right: 20px;
|
||||
padding: 12px 24px;
|
||||
background: #4ECDC4;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 25px;
|
||||
font-size: 16px;
|
||||
cursor: pointer;
|
||||
box-shadow: 0 4px 15px rgba(78, 205, 196, 0.4);
|
||||
font-family: inherit;
|
||||
}}
|
||||
.print-btn:hover {{
|
||||
transform: scale(1.05);
|
||||
background: #45B7D1;
|
||||
}}
|
||||
/* Animationen für interaktive Version */
|
||||
.category-group:hover {{
|
||||
transform: scale(1.02);
|
||||
cursor: pointer;
|
||||
}}
|
||||
.term-bubble:hover {{
|
||||
transform: scale(1.1);
|
||||
filter: brightness(1.1);
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<button class="print-btn no-print" onclick="window.print()">🖨️ Als A3 drucken</button>
|
||||
|
||||
<div class="poster-container">
|
||||
<div class="poster-header">
|
||||
<div class="poster-title">🧠 Lernposter: {topic}</div>
|
||||
<div class="poster-subject">{subject}</div>
|
||||
</div>
|
||||
|
||||
<svg class="mindmap-svg" viewBox="0 0 {svg_width} {svg_height}" xmlns="http://www.w3.org/2000/svg">
|
||||
<defs>
|
||||
<!-- Schatten für Bubbles -->
|
||||
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
|
||||
<feDropShadow dx="2" dy="4" stdDeviation="4" flood-opacity="0.2"/>
|
||||
</filter>
|
||||
<!-- Glow-Effekt für Zentrum -->
|
||||
<filter id="glow">
|
||||
<feGaussianBlur stdDeviation="8" result="coloredBlur"/>
|
||||
<feMerge>
|
||||
<feMergeNode in="coloredBlur"/>
|
||||
<feMergeNode in="SourceGraphic"/>
|
||||
</feMerge>
|
||||
</filter>
|
||||
</defs>
|
||||
|
||||
<!-- Hintergrund-Muster (dezente Punkte) -->
|
||||
<pattern id="dots" x="0" y="0" width="30" height="30" patternUnits="userSpaceOnUse">
|
||||
<circle cx="15" cy="15" r="1.5" fill="#e0e0e0"/>
|
||||
</pattern>
|
||||
<rect width="100%" height="100%" fill="url(#dots)"/>
|
||||
|
||||
<!-- Verbindungslinien vom Zentrum zu Kategorien -->
|
||||
"""
|
||||
|
||||
# Zeichne Verbindungslinien
|
||||
for pos in category_positions:
|
||||
color = pos["data"].get("color", "#4ECDC4")
|
||||
html += f""" <path d="M {center_x} {center_y} Q {(center_x + pos['x'])/2 + 30} {(center_y + pos['y'])/2 - 30} {pos['x']} {pos['y']}"
|
||||
stroke="{color}" stroke-width="4" fill="none" stroke-linecap="round" opacity="0.6"/>
|
||||
"""
|
||||
|
||||
# Zentrum (Hauptthema)
|
||||
html += f"""
|
||||
<!-- Zentrum: Hauptthema -->
|
||||
<g filter="url(#glow)">
|
||||
<circle cx="{center_x}" cy="{center_y}" r="85" fill="url(#centerGradient)"/>
|
||||
<defs>
|
||||
<radialGradient id="centerGradient" cx="30%" cy="30%">
|
||||
<stop offset="0%" stop-color="#FFD93D"/>
|
||||
<stop offset="100%" stop-color="#FF6B6B"/>
|
||||
</radialGradient>
|
||||
</defs>
|
||||
<text x="{center_x}" y="{center_y - 10}" text-anchor="middle" font-size="28" font-weight="bold" fill="white">🌟</text>
|
||||
<text x="{center_x}" y="{center_y + 25}" text-anchor="middle" font-size="22" font-weight="bold" fill="white">{topic}</text>
|
||||
</g>
|
||||
"""
|
||||
|
||||
# Zeichne Kategorien mit ihren Begriffen
|
||||
for i, pos in enumerate(category_positions):
|
||||
cat = pos["data"]
|
||||
cat_x = pos["x"]
|
||||
cat_y = pos["y"]
|
||||
color = cat.get("color", "#4ECDC4")
|
||||
emoji = cat.get("emoji", "📚")
|
||||
name = cat.get("name", "Kategorie")
|
||||
terms = cat.get("terms", [])
|
||||
|
||||
# Kategorie-Bubble
|
||||
html += f"""
|
||||
<!-- Kategorie: {name} -->
|
||||
<g class="category-group" transform="translate({cat_x}, {cat_y})">
|
||||
<ellipse cx="0" cy="0" rx="75" ry="45" fill="{color}" filter="url(#shadow)"/>
|
||||
<text x="0" y="-8" text-anchor="middle" font-size="20">{emoji}</text>
|
||||
<text x="0" y="18" text-anchor="middle" font-size="14" font-weight="bold" fill="white">{name}</text>
|
||||
"""
|
||||
|
||||
# Begriffe um die Kategorie herum
|
||||
term_radius = 110
|
||||
num_terms = len(terms)
|
||||
for j, term_data in enumerate(terms[:8]): # Max 8 Begriffe pro Kategorie
|
||||
term = term_data.get("term", "")
|
||||
|
||||
# Berechne Position relativ zur Kategorie
|
||||
# Verteile Begriffe in einem Halbkreis auf der Außenseite
|
||||
base_angle = pos["angle"]
|
||||
spread = math.pi * 0.8 # 80% eines Halbkreises
|
||||
if num_terms > 1:
|
||||
term_angle = base_angle - spread/2 + (spread * j / (num_terms - 1))
|
||||
else:
|
||||
term_angle = base_angle
|
||||
|
||||
term_x = term_radius * math.cos(term_angle - base_angle)
|
||||
term_y = term_radius * math.sin(term_angle - base_angle)
|
||||
|
||||
# Kleine Verbindungslinie
|
||||
html += f""" <line x1="0" y1="0" x2="{term_x * 0.6}" y2="{term_y * 0.6}" stroke="{color}" stroke-width="2" opacity="0.5"/>
|
||||
"""
|
||||
|
||||
# Begriff-Bubble
|
||||
bubble_width = max(70, len(term) * 8 + 20)
|
||||
html += f""" <g class="term-bubble" transform="translate({term_x}, {term_y})">
|
||||
<rect x="{-bubble_width/2}" y="-22" width="{bubble_width}" height="44" rx="22" fill="white" stroke="{color}" stroke-width="2" filter="url(#shadow)"/>
|
||||
<text x="0" y="5" text-anchor="middle" font-size="12" font-weight="bold" fill="#333">{term}</text>
|
||||
</g>
|
||||
"""
|
||||
|
||||
html += " </g>\n"
|
||||
|
||||
# Legende mit Erklärungen (unten)
|
||||
html += f"""
|
||||
<!-- Legende -->
|
||||
<g transform="translate(50, {svg_height - 80})">
|
||||
<text x="0" y="0" font-size="14" font-weight="bold" fill="#666">📖 Begriffe zum Lernen:</text>
|
||||
"""
|
||||
legend_x = 0
|
||||
for i, pos in enumerate(category_positions):
|
||||
cat = pos["data"]
|
||||
color = cat.get("color", "#4ECDC4")
|
||||
emoji = cat.get("emoji", "📚")
|
||||
name = cat.get("name", "")
|
||||
terms = cat.get("terms", [])
|
||||
|
||||
# Zeige Kategorie mit ersten 3 Begriffen
|
||||
terms_text = ", ".join([t.get("term", "") for t in terms[:3]])
|
||||
if len(terms) > 3:
|
||||
terms_text += "..."
|
||||
|
||||
html += f""" <g transform="translate({legend_x}, 25)">
|
||||
<circle cx="8" cy="0" r="8" fill="{color}"/>
|
||||
<text x="22" y="4" font-size="11" fill="#444"><tspan font-weight="bold">{emoji} {name}:</tspan> {terms_text}</text>
|
||||
</g>
|
||||
"""
|
||||
legend_x += 220
|
||||
|
||||
html += """ </g>
|
||||
</svg>
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
return html
|
||||
|
||||
|
||||
def save_mindmap_for_worksheet(analysis_path: Path, mindmap_data: dict = None) -> Path:
|
||||
"""
|
||||
Speichert eine Mindmap für ein Arbeitsblatt.
|
||||
|
||||
Args:
|
||||
analysis_path: Pfad zur *_analyse.json Datei
|
||||
mindmap_data: Optional - bereits generierte Mindmap-Daten. Falls nicht angegeben, werden sie generiert.
|
||||
|
||||
Returns:
|
||||
Pfad zur gespeicherten *_mindmap.json Datei
|
||||
"""
|
||||
if mindmap_data is None:
|
||||
mindmap_data = generate_mindmap_data(analysis_path)
|
||||
|
||||
# Speichere JSON
|
||||
out_name = analysis_path.stem.replace("_analyse", "") + "_mindmap.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(json.dumps(mindmap_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
logger.info(f"Mindmap-Daten gespeichert: {out_path.name}")
|
||||
return out_path
|
||||
824
backend/ai_processing/print_generator.py
Normal file
824
backend/ai_processing/print_generator.py
Normal file
@@ -0,0 +1,824 @@
|
||||
"""
|
||||
AI Processing - Print Version Generator.
|
||||
|
||||
Generiert druckbare HTML-Versionen für verschiedene Arbeitsblatt-Typen.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import random
|
||||
import logging
|
||||
|
||||
from .core import BEREINIGT_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_print_version_qa(qa_path: Path, include_answers: bool = False) -> Path:
|
||||
"""
|
||||
Generiert eine druckbare HTML-Version der Frage-Antwort-Paare.
|
||||
|
||||
Args:
|
||||
qa_path: Pfad zur *_qa.json Datei
|
||||
include_answers: True für Lösungsblatt (für Eltern)
|
||||
|
||||
Returns:
|
||||
Pfad zur generierten HTML-Datei
|
||||
"""
|
||||
if not qa_path.exists():
|
||||
raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
|
||||
|
||||
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
|
||||
items = qa_data.get("qa_items", [])
|
||||
metadata = qa_data.get("metadata", {})
|
||||
|
||||
title = metadata.get("source_title", "Arbeitsblatt")
|
||||
subject = metadata.get("subject", "")
|
||||
grade = metadata.get("grade_level", "")
|
||||
|
||||
html_parts = []
|
||||
html_parts.append("""<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>""" + title + """ - Fragen</title>
|
||||
<style>
|
||||
@media print {
|
||||
.no-print { display: none; }
|
||||
.page-break { page-break-before: always; }
|
||||
}
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 40px auto;
|
||||
padding: 20px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
h1 { font-size: 24px; margin-bottom: 8px; }
|
||||
.meta { color: #666; margin-bottom: 24px; }
|
||||
.question-block {
|
||||
margin-bottom: 32px;
|
||||
padding-bottom: 16px;
|
||||
border-bottom: 1px dashed #ccc;
|
||||
}
|
||||
.question-number {
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
.question-text {
|
||||
font-size: 16px;
|
||||
margin: 8px 0;
|
||||
}
|
||||
.answer-space {
|
||||
border: 1px solid #ddd;
|
||||
min-height: 60px;
|
||||
margin-top: 12px;
|
||||
background: #fafafa;
|
||||
}
|
||||
.answer-lines {
|
||||
margin-top: 12px;
|
||||
}
|
||||
.answer-line {
|
||||
border-bottom: 1px solid #999;
|
||||
height: 28px;
|
||||
}
|
||||
.answer {
|
||||
margin-top: 8px;
|
||||
padding: 8px;
|
||||
background: #e8f5e9;
|
||||
border-left: 3px solid #4caf50;
|
||||
}
|
||||
.key-terms {
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
margin-top: 8px;
|
||||
}
|
||||
.key-terms span {
|
||||
background: #fff3e0;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
margin-right: 4px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
""")
|
||||
|
||||
# Header
|
||||
version_text = "Lösungsblatt" if include_answers else "Fragenblatt"
|
||||
html_parts.append(f"<h1>{title} - {version_text}</h1>")
|
||||
meta_parts = []
|
||||
if subject:
|
||||
meta_parts.append(f"Fach: {subject}")
|
||||
if grade:
|
||||
meta_parts.append(f"Klasse: {grade}")
|
||||
meta_parts.append(f"Anzahl Fragen: {len(items)}")
|
||||
html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
|
||||
|
||||
# Fragen
|
||||
for idx, item in enumerate(items, 1):
|
||||
html_parts.append("<div class='question-block'>")
|
||||
html_parts.append(f"<div class='question-number'>Frage {idx}</div>")
|
||||
html_parts.append(f"<div class='question-text'>{item.get('question', '')}</div>")
|
||||
|
||||
if include_answers:
|
||||
# Lösungsblatt: Antwort anzeigen
|
||||
html_parts.append(f"<div class='answer'><strong>Antwort:</strong> {item.get('answer', '')}</div>")
|
||||
# Schlüsselbegriffe
|
||||
key_terms = item.get("key_terms", [])
|
||||
if key_terms:
|
||||
terms_html = " ".join([f"<span>{term}</span>" for term in key_terms])
|
||||
html_parts.append(f"<div class='key-terms'>Wichtige Begriffe: {terms_html}</div>")
|
||||
else:
|
||||
# Fragenblatt: Antwortlinien
|
||||
html_parts.append("<div class='answer-lines'>")
|
||||
for _ in range(3):
|
||||
html_parts.append("<div class='answer-line'></div>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
# Speichern
|
||||
suffix = "_qa_solutions.html" if include_answers else "_qa_print.html"
|
||||
out_name = qa_path.stem.replace("_qa", "") + suffix
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text("\n".join(html_parts), encoding="utf-8")
|
||||
|
||||
logger.info(f"Print-Version gespeichert: {out_path.name}")
|
||||
return out_path
|
||||
|
||||
|
||||
def generate_print_version_cloze(cloze_path: Path, include_answers: bool = False) -> Path:
|
||||
"""
|
||||
Generiert eine druckbare HTML-Version der Lückentexte.
|
||||
|
||||
Args:
|
||||
cloze_path: Pfad zur *_cloze.json Datei
|
||||
include_answers: True für Lösungsblatt (für Eltern)
|
||||
|
||||
Returns:
|
||||
Pfad zur generierten HTML-Datei
|
||||
"""
|
||||
if not cloze_path.exists():
|
||||
raise FileNotFoundError(f"Cloze-Datei nicht gefunden: {cloze_path}")
|
||||
|
||||
cloze_data = json.loads(cloze_path.read_text(encoding="utf-8"))
|
||||
items = cloze_data.get("cloze_items", [])
|
||||
metadata = cloze_data.get("metadata", {})
|
||||
|
||||
title = metadata.get("source_title", "Arbeitsblatt")
|
||||
subject = metadata.get("subject", "")
|
||||
grade = metadata.get("grade_level", "")
|
||||
total_gaps = metadata.get("total_gaps", 0)
|
||||
|
||||
html_parts = []
|
||||
html_parts.append("""<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>""" + title + """ - Lückentext</title>
|
||||
<style>
|
||||
@media print {
|
||||
.no-print { display: none; }
|
||||
.page-break { page-break-before: always; }
|
||||
}
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 40px auto;
|
||||
padding: 20px;
|
||||
line-height: 1.8;
|
||||
}
|
||||
h1 { font-size: 24px; margin-bottom: 8px; }
|
||||
.meta { color: #666; margin-bottom: 24px; }
|
||||
.cloze-item {
|
||||
margin-bottom: 24px;
|
||||
padding: 16px;
|
||||
background: #f9f9f9;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.cloze-number {
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.cloze-sentence {
|
||||
font-size: 16px;
|
||||
line-height: 2;
|
||||
}
|
||||
.gap {
|
||||
display: inline-block;
|
||||
min-width: 80px;
|
||||
border-bottom: 2px solid #333;
|
||||
margin: 0 4px;
|
||||
text-align: center;
|
||||
}
|
||||
.gap-filled {
|
||||
display: inline-block;
|
||||
padding: 2px 8px;
|
||||
background: #e8f5e9;
|
||||
border: 1px solid #4caf50;
|
||||
border-radius: 4px;
|
||||
font-weight: bold;
|
||||
}
|
||||
.translation {
|
||||
margin-top: 12px;
|
||||
padding: 8px;
|
||||
background: #e3f2fd;
|
||||
border-left: 3px solid #2196f3;
|
||||
font-size: 14px;
|
||||
color: #555;
|
||||
}
|
||||
.translation-label {
|
||||
font-size: 12px;
|
||||
color: #777;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.word-bank {
|
||||
margin-top: 32px;
|
||||
padding: 16px;
|
||||
background: #fff3e0;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.word-bank-title {
|
||||
font-weight: bold;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
.word {
|
||||
display: inline-block;
|
||||
padding: 4px 12px;
|
||||
margin: 4px;
|
||||
background: white;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
""")
|
||||
|
||||
# Header
|
||||
version_text = "Lösungsblatt" if include_answers else "Lückentext"
|
||||
html_parts.append(f"<h1>{title} - {version_text}</h1>")
|
||||
meta_parts = []
|
||||
if subject:
|
||||
meta_parts.append(f"Fach: {subject}")
|
||||
if grade:
|
||||
meta_parts.append(f"Klasse: {grade}")
|
||||
meta_parts.append(f"Lücken gesamt: {total_gaps}")
|
||||
html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
|
||||
|
||||
# Sammle alle Lückenwörter für Wortbank
|
||||
all_words = []
|
||||
|
||||
# Lückentexte
|
||||
for idx, item in enumerate(items, 1):
|
||||
html_parts.append("<div class='cloze-item'>")
|
||||
html_parts.append(f"<div class='cloze-number'>{idx}.</div>")
|
||||
|
||||
gaps = item.get("gaps", [])
|
||||
sentence = item.get("sentence_with_gaps", "")
|
||||
|
||||
if include_answers:
|
||||
# Lösungsblatt: Lücken mit Antworten füllen
|
||||
for gap in gaps:
|
||||
word = gap.get("word", "")
|
||||
sentence = sentence.replace("___", f"<span class='gap-filled'>{word}</span>", 1)
|
||||
else:
|
||||
# Fragenblatt: Lücken als Linien
|
||||
sentence = sentence.replace("___", "<span class='gap'> </span>")
|
||||
# Wörter für Wortbank sammeln
|
||||
for gap in gaps:
|
||||
all_words.append(gap.get("word", ""))
|
||||
|
||||
html_parts.append(f"<div class='cloze-sentence'>{sentence}</div>")
|
||||
|
||||
# Übersetzung anzeigen
|
||||
translation = item.get("translation", {})
|
||||
if translation:
|
||||
lang_name = translation.get("language_name", "Übersetzung")
|
||||
full_sentence = translation.get("full_sentence", "")
|
||||
if full_sentence:
|
||||
html_parts.append("<div class='translation'>")
|
||||
html_parts.append(f"<div class='translation-label'>{lang_name}:</div>")
|
||||
html_parts.append(full_sentence)
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</div>")
|
||||
|
||||
# Wortbank (nur für Fragenblatt)
|
||||
if not include_answers and all_words:
|
||||
random.shuffle(all_words) # Mische die Wörter
|
||||
html_parts.append("<div class='word-bank'>")
|
||||
html_parts.append("<div class='word-bank-title'>Wortbank (diese Wörter fehlen):</div>")
|
||||
for word in all_words:
|
||||
html_parts.append(f"<span class='word'>{word}</span>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
# Speichern
|
||||
suffix = "_cloze_solutions.html" if include_answers else "_cloze_print.html"
|
||||
out_name = cloze_path.stem.replace("_cloze", "") + suffix
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text("\n".join(html_parts), encoding="utf-8")
|
||||
|
||||
logger.info(f"Cloze Print-Version gespeichert: {out_path.name}")
|
||||
return out_path
|
||||
|
||||
|
||||
def generate_print_version_mc(mc_path: Path, include_answers: bool = False) -> str:
|
||||
"""
|
||||
Generiert eine druckbare HTML-Version der Multiple-Choice-Fragen.
|
||||
|
||||
Args:
|
||||
mc_path: Pfad zur *_mc.json Datei
|
||||
include_answers: True für Lösungsblatt mit markierten richtigen Antworten
|
||||
|
||||
Returns:
|
||||
HTML-String (zum direkten Ausliefern)
|
||||
"""
|
||||
if not mc_path.exists():
|
||||
raise FileNotFoundError(f"MC-Datei nicht gefunden: {mc_path}")
|
||||
|
||||
mc_data = json.loads(mc_path.read_text(encoding="utf-8"))
|
||||
questions = mc_data.get("questions", [])
|
||||
metadata = mc_data.get("metadata", {})
|
||||
|
||||
title = metadata.get("source_title", "Arbeitsblatt")
|
||||
subject = metadata.get("subject", "")
|
||||
grade = metadata.get("grade_level", "")
|
||||
|
||||
html_parts = []
|
||||
html_parts.append("""<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>""" + title + """ - Multiple Choice</title>
|
||||
<style>
|
||||
@media print {
|
||||
.no-print { display: none; }
|
||||
.page-break { page-break-before: always; }
|
||||
body { font-size: 14pt; }
|
||||
}
|
||||
body {
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 40px auto;
|
||||
padding: 20px;
|
||||
line-height: 1.6;
|
||||
color: #000;
|
||||
}
|
||||
h1 {
|
||||
font-size: 28px;
|
||||
margin-bottom: 8px;
|
||||
border-bottom: 2px solid #000;
|
||||
padding-bottom: 8px;
|
||||
}
|
||||
.meta {
|
||||
color: #333;
|
||||
margin-bottom: 32px;
|
||||
font-size: 14px;
|
||||
}
|
||||
.instructions {
|
||||
background: #f5f5f5;
|
||||
padding: 12px 16px;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 24px;
|
||||
font-size: 14px;
|
||||
}
|
||||
.question-block {
|
||||
margin-bottom: 28px;
|
||||
padding-bottom: 16px;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
.question-number {
|
||||
font-weight: bold;
|
||||
font-size: 18px;
|
||||
color: #000;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.question-text {
|
||||
font-size: 16px;
|
||||
margin: 8px 0 16px 0;
|
||||
line-height: 1.5;
|
||||
}
|
||||
.options {
|
||||
margin-left: 20px;
|
||||
}
|
||||
.option {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
margin-bottom: 12px;
|
||||
padding: 8px 12px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
background: #fff;
|
||||
}
|
||||
.option-correct {
|
||||
background: #e8f5e9;
|
||||
border-color: #4caf50;
|
||||
border-width: 2px;
|
||||
}
|
||||
.option-checkbox {
|
||||
width: 20px;
|
||||
height: 20px;
|
||||
border: 2px solid #333;
|
||||
border-radius: 50%;
|
||||
margin-right: 12px;
|
||||
flex-shrink: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
.option-checkbox.checked::after {
|
||||
content: "✓";
|
||||
font-weight: bold;
|
||||
color: #4caf50;
|
||||
}
|
||||
.option-label {
|
||||
font-weight: bold;
|
||||
margin-right: 8px;
|
||||
min-width: 24px;
|
||||
}
|
||||
.option-text {
|
||||
flex: 1;
|
||||
}
|
||||
.explanation {
|
||||
margin-top: 8px;
|
||||
padding: 8px 12px;
|
||||
background: #e3f2fd;
|
||||
border-left: 3px solid #2196f3;
|
||||
font-size: 13px;
|
||||
color: #333;
|
||||
}
|
||||
.answer-key {
|
||||
margin-top: 40px;
|
||||
padding: 16px;
|
||||
background: #f5f5f5;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.answer-key-title {
|
||||
font-weight: bold;
|
||||
font-size: 18px;
|
||||
margin-bottom: 12px;
|
||||
border-bottom: 1px solid #999;
|
||||
padding-bottom: 8px;
|
||||
}
|
||||
.answer-key-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(5, 1fr);
|
||||
gap: 8px;
|
||||
}
|
||||
.answer-key-item {
|
||||
padding: 8px;
|
||||
text-align: center;
|
||||
background: white;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
}
|
||||
.answer-key-q {
|
||||
font-weight: bold;
|
||||
}
|
||||
.answer-key-a {
|
||||
color: #4caf50;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
""")
|
||||
|
||||
# Header
|
||||
version_text = "Lösungsblatt" if include_answers else "Multiple Choice Test"
|
||||
html_parts.append(f"<h1>{title}</h1>")
|
||||
html_parts.append(f"<div class='meta'><strong>{version_text}</strong>")
|
||||
if subject:
|
||||
html_parts.append(f" | Fach: {subject}")
|
||||
if grade:
|
||||
html_parts.append(f" | Klasse: {grade}")
|
||||
html_parts.append(f" | Anzahl Fragen: {len(questions)}</div>")
|
||||
|
||||
if not include_answers:
|
||||
html_parts.append("<div class='instructions'>")
|
||||
html_parts.append("<strong>Anleitung:</strong> Kreuze bei jeder Frage die richtige Antwort an. ")
|
||||
html_parts.append("Es ist immer nur eine Antwort richtig.")
|
||||
html_parts.append("</div>")
|
||||
|
||||
# Fragen
|
||||
for idx, q in enumerate(questions, 1):
|
||||
html_parts.append("<div class='question-block'>")
|
||||
html_parts.append(f"<div class='question-number'>Frage {idx}</div>")
|
||||
html_parts.append(f"<div class='question-text'>{q.get('question', '')}</div>")
|
||||
|
||||
html_parts.append("<div class='options'>")
|
||||
correct_answer = q.get("correct_answer", "")
|
||||
|
||||
for opt in q.get("options", []):
|
||||
opt_id = opt.get("id", "")
|
||||
is_correct = opt_id == correct_answer
|
||||
|
||||
opt_class = "option"
|
||||
checkbox_class = "option-checkbox"
|
||||
if include_answers and is_correct:
|
||||
opt_class += " option-correct"
|
||||
checkbox_class += " checked"
|
||||
|
||||
html_parts.append(f"<div class='{opt_class}'>")
|
||||
html_parts.append(f"<div class='{checkbox_class}'></div>")
|
||||
html_parts.append(f"<span class='option-label'>{opt_id})</span>")
|
||||
html_parts.append(f"<span class='option-text'>{opt.get('text', '')}</span>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</div>")
|
||||
|
||||
# Erklärung nur bei Lösungsblatt
|
||||
if include_answers and q.get("explanation"):
|
||||
html_parts.append(f"<div class='explanation'><strong>Erklärung:</strong> {q.get('explanation')}</div>")
|
||||
|
||||
html_parts.append("</div>")
|
||||
|
||||
# Lösungsschlüssel (kompakt) - nur bei Lösungsblatt
|
||||
if include_answers:
|
||||
html_parts.append("<div class='answer-key'>")
|
||||
html_parts.append("<div class='answer-key-title'>Lösungsschlüssel</div>")
|
||||
html_parts.append("<div class='answer-key-grid'>")
|
||||
for idx, q in enumerate(questions, 1):
|
||||
html_parts.append("<div class='answer-key-item'>")
|
||||
html_parts.append(f"<span class='answer-key-q'>{idx}.</span> ")
|
||||
html_parts.append(f"<span class='answer-key-a'>{q.get('correct_answer', '')}</span>")
|
||||
html_parts.append("</div>")
|
||||
html_parts.append("</div>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
return "\n".join(html_parts)
|
||||
|
||||
|
||||
def generate_print_version_worksheet(analysis_path: Path) -> str:
|
||||
"""
|
||||
Generiert eine druckoptimierte HTML-Version des Arbeitsblatts.
|
||||
|
||||
Eigenschaften:
|
||||
- Große, gut lesbare Schrift (16pt)
|
||||
- Schwarz-weiß / Graustufen-tauglich
|
||||
- Klare Struktur für Druck
|
||||
- Keine interaktiven Elemente
|
||||
|
||||
Args:
|
||||
analysis_path: Pfad zur *_analyse.json Datei
|
||||
|
||||
Returns:
|
||||
HTML-String zum direkten Ausliefern
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
|
||||
try:
|
||||
data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
|
||||
|
||||
title = data.get("title") or "Arbeitsblatt"
|
||||
subject = data.get("subject") or ""
|
||||
grade_level = data.get("grade_level") or ""
|
||||
instructions = data.get("instructions") or ""
|
||||
tasks = data.get("tasks", []) or []
|
||||
canonical_text = data.get("canonical_text") or ""
|
||||
printed_blocks = data.get("printed_blocks") or []
|
||||
|
||||
html_parts = []
|
||||
html_parts.append("""<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>""" + title + """</title>
|
||||
<style>
|
||||
@page {
|
||||
size: A4;
|
||||
margin: 20mm;
|
||||
}
|
||||
@media print {
|
||||
body {
|
||||
font-size: 14pt !important;
|
||||
-webkit-print-color-adjust: exact;
|
||||
print-color-adjust: exact;
|
||||
}
|
||||
.no-print { display: none !important; }
|
||||
.page-break { page-break-before: always; }
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
font-family: Arial, "Helvetica Neue", sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 30px;
|
||||
line-height: 1.7;
|
||||
font-size: 16px;
|
||||
color: #000;
|
||||
background: #fff;
|
||||
}
|
||||
h1 {
|
||||
font-size: 28px;
|
||||
margin: 0 0 8px 0;
|
||||
padding-bottom: 8px;
|
||||
border-bottom: 3px solid #000;
|
||||
}
|
||||
h2 {
|
||||
font-size: 20px;
|
||||
margin: 28px 0 12px 0;
|
||||
padding-bottom: 4px;
|
||||
border-bottom: 1px solid #666;
|
||||
}
|
||||
.meta {
|
||||
font-size: 14px;
|
||||
color: #333;
|
||||
margin-bottom: 20px;
|
||||
padding: 8px 0;
|
||||
}
|
||||
.meta span {
|
||||
margin-right: 20px;
|
||||
}
|
||||
.instructions {
|
||||
margin: 20px 0;
|
||||
padding: 16px;
|
||||
border: 2px solid #333;
|
||||
background: #f5f5f5;
|
||||
font-size: 15px;
|
||||
}
|
||||
.instructions-label {
|
||||
font-weight: bold;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.text-section {
|
||||
margin: 24px 0;
|
||||
}
|
||||
.text-block {
|
||||
margin-bottom: 16px;
|
||||
text-align: justify;
|
||||
}
|
||||
.text-block-title {
|
||||
font-weight: bold;
|
||||
font-size: 17px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.task-section {
|
||||
margin-top: 32px;
|
||||
}
|
||||
.task {
|
||||
margin-bottom: 24px;
|
||||
padding: 16px;
|
||||
border: 1px solid #999;
|
||||
background: #fafafa;
|
||||
}
|
||||
.task-header {
|
||||
font-weight: bold;
|
||||
font-size: 16px;
|
||||
margin-bottom: 12px;
|
||||
padding-bottom: 8px;
|
||||
border-bottom: 1px dashed #666;
|
||||
}
|
||||
.task-content {
|
||||
font-size: 15px;
|
||||
}
|
||||
.gap-line {
|
||||
display: inline-block;
|
||||
border-bottom: 2px solid #000;
|
||||
min-width: 100px;
|
||||
margin: 0 6px;
|
||||
}
|
||||
.answer-lines {
|
||||
margin-top: 16px;
|
||||
}
|
||||
.answer-line {
|
||||
border-bottom: 1px solid #333;
|
||||
height: 36px;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.footer {
|
||||
margin-top: 40px;
|
||||
padding-top: 16px;
|
||||
border-top: 1px solid #ccc;
|
||||
font-size: 11px;
|
||||
color: #666;
|
||||
text-align: center;
|
||||
}
|
||||
/* Print Button - versteckt beim Drucken */
|
||||
.print-button {
|
||||
position: fixed;
|
||||
top: 20px;
|
||||
right: 20px;
|
||||
padding: 12px 24px;
|
||||
background: #333;
|
||||
color: #fff;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
}
|
||||
.print-button:hover {
|
||||
background: #555;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<button class="print-button no-print" onclick="window.print()">🖨️ Drucken</button>
|
||||
""")
|
||||
|
||||
# Titel
|
||||
html_parts.append(f"<h1>{title}</h1>")
|
||||
|
||||
# Meta-Informationen
|
||||
meta_parts = []
|
||||
if subject:
|
||||
meta_parts.append(f"<span><strong>Fach:</strong> {subject}</span>")
|
||||
if grade_level:
|
||||
meta_parts.append(f"<span><strong>Klasse:</strong> {grade_level}</span>")
|
||||
if meta_parts:
|
||||
html_parts.append(f"<div class='meta'>{''.join(meta_parts)}</div>")
|
||||
|
||||
# Arbeitsanweisung
|
||||
if instructions:
|
||||
html_parts.append("<div class='instructions'>")
|
||||
html_parts.append("<div class='instructions-label'>Arbeitsanweisung:</div>")
|
||||
html_parts.append(f"<div>{instructions}</div>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
# Haupttext / gedruckte Blöcke
|
||||
if printed_blocks:
|
||||
html_parts.append("<section class='text-section'>")
|
||||
for block in printed_blocks:
|
||||
role = (block.get("role") or "body").lower()
|
||||
text = (block.get("text") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
if role == "title":
|
||||
html_parts.append(f"<div class='text-block'><div class='text-block-title'>{text}</div></div>")
|
||||
else:
|
||||
html_parts.append(f"<div class='text-block'>{text}</div>")
|
||||
html_parts.append("</section>")
|
||||
elif canonical_text:
|
||||
html_parts.append("<section class='text-section'>")
|
||||
paragraphs = [
|
||||
p.strip()
|
||||
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
|
||||
if p.strip()
|
||||
]
|
||||
for p in paragraphs:
|
||||
html_parts.append(f"<div class='text-block'>{p}</div>")
|
||||
html_parts.append("</section>")
|
||||
|
||||
# Aufgaben
|
||||
if tasks:
|
||||
html_parts.append("<section class='task-section'>")
|
||||
html_parts.append("<h2>Aufgaben</h2>")
|
||||
|
||||
for idx, task in enumerate(tasks, start=1):
|
||||
t_type = task.get("type") or "Aufgabe"
|
||||
desc = task.get("description") or ""
|
||||
text_with_gaps = task.get("text_with_gaps")
|
||||
|
||||
html_parts.append("<div class='task'>")
|
||||
|
||||
# Task-Header
|
||||
type_label = {
|
||||
"fill_in_blank": "Lückentext",
|
||||
"multiple_choice": "Multiple Choice",
|
||||
"free_text": "Freitext",
|
||||
"matching": "Zuordnung",
|
||||
"labeling": "Beschriftung",
|
||||
"calculation": "Rechnung",
|
||||
"other": "Aufgabe"
|
||||
}.get(t_type, t_type)
|
||||
|
||||
html_parts.append(f"<div class='task-header'>Aufgabe {idx}: {type_label}</div>")
|
||||
|
||||
if desc:
|
||||
html_parts.append(f"<div class='task-content'>{desc}</div>")
|
||||
|
||||
if text_with_gaps:
|
||||
rendered = text_with_gaps.replace("___", "<span class='gap-line'> </span>")
|
||||
html_parts.append(f"<div class='task-content' style='margin-top:12px;'>{rendered}</div>")
|
||||
|
||||
# Antwortlinien für Freitext-Aufgaben
|
||||
if t_type in ["free_text", "other"] or (not text_with_gaps and not desc):
|
||||
html_parts.append("<div class='answer-lines'>")
|
||||
for _ in range(3):
|
||||
html_parts.append("<div class='answer-line'></div>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</section>")
|
||||
|
||||
# Fußzeile
|
||||
html_parts.append("<div class='footer'>")
|
||||
html_parts.append("Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert.")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
return "\n".join(html_parts)
|
||||
333
backend/ai_processing/qa_generator.py
Normal file
333
backend/ai_processing/qa_generator.py
Normal file
@@ -0,0 +1,333 @@
|
||||
"""
|
||||
AI Processing - Q&A Generator.
|
||||
|
||||
Generiert Frage-Antwort-Paare mit Leitner-System-Vorbereitung.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
get_openai_api_key,
|
||||
get_vision_api,
|
||||
BEREINIGT_DIR,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _generate_qa_with_openai(analysis_data: dict, num_questions: int = 8) -> dict:
|
||||
"""
|
||||
Generiert Frage-Antwort-Paare basierend auf der Arbeitsblatt-Analyse.
|
||||
|
||||
Wichtige didaktische Anforderungen:
|
||||
- Fragen basieren fast wörtlich auf dem vorhandenen Stoff
|
||||
- Nur minimale Umformulierung erlaubt
|
||||
- Schlüsselwörter/Fachbegriffe werden als wichtig markiert
|
||||
- Schwierigkeitsgrad entspricht dem Original (grade_level)
|
||||
|
||||
Args:
|
||||
analysis_data: Die Analyse-JSON des Arbeitsblatts
|
||||
num_questions: Anzahl der zu generierenden Fragen (Standard: 8)
|
||||
|
||||
Returns:
|
||||
Dict mit qa_items und metadata
|
||||
"""
|
||||
api_key = get_openai_api_key()
|
||||
|
||||
# Extrahiere relevante Inhalte
|
||||
title = analysis_data.get("title") or "Arbeitsblatt"
|
||||
subject = analysis_data.get("subject") or "Allgemein"
|
||||
grade_level = analysis_data.get("grade_level") or "unbekannt"
|
||||
canonical_text = analysis_data.get("canonical_text") or ""
|
||||
printed_blocks = analysis_data.get("printed_blocks") or []
|
||||
tasks = analysis_data.get("tasks") or []
|
||||
|
||||
# Baue Textinhalt zusammen
|
||||
content_parts = []
|
||||
if canonical_text:
|
||||
content_parts.append(canonical_text)
|
||||
for block in printed_blocks:
|
||||
text = block.get("text", "").strip()
|
||||
if text and text not in content_parts:
|
||||
content_parts.append(text)
|
||||
|
||||
# Aufgaben-Texte hinzufügen
|
||||
for task in tasks:
|
||||
desc = task.get("description", "").strip()
|
||||
text = task.get("text_with_gaps", "").strip()
|
||||
if desc:
|
||||
content_parts.append(f"Aufgabe: {desc}")
|
||||
if text:
|
||||
content_parts.append(text)
|
||||
|
||||
worksheet_content = "\n\n".join(content_parts)
|
||||
|
||||
if not worksheet_content.strip():
|
||||
logger.warning("Kein Textinhalt für Q&A-Generierung gefunden")
|
||||
return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
|
||||
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Frage-Antwort-Paare für Schüler erstellt.
|
||||
|
||||
WICHTIGE REGELN:
|
||||
|
||||
1. INHALTE NUR AUS DEM TEXT:
|
||||
- Verwende FAST WÖRTLICH den vorhandenen Stoff
|
||||
- Du darfst nur minimal umformulieren (z.B. "Beschreibe..." → "Erkläre in eigenen Worten...")
|
||||
- KEINE neuen Fakten oder Inhalte einführen!
|
||||
- Alles muss aus dem gegebenen Text ableitbar sein
|
||||
|
||||
2. SCHWIERIGKEITSGRAD:
|
||||
- Niveau muss exakt "{grade_level}" entsprechen
|
||||
- Fragen altersgerecht formulieren
|
||||
|
||||
3. SCHLÜSSELWÖRTER MARKIEREN:
|
||||
- Identifiziere wichtige Fachbegriffe als "key_terms"
|
||||
- Diese Begriffe sind besonders wichtig für die Wiederholung
|
||||
- Beispiele: Netzhaut, Linse, Pupille (beim Thema Auge)
|
||||
|
||||
4. FRAGETYPEN:
|
||||
- Wissensfragen: "Was ist...?", "Nenne..."
|
||||
- Verständnisfragen: "Erkläre...", "Beschreibe..."
|
||||
- Anwendungsfragen: "Warum...?", "Was passiert, wenn...?"
|
||||
|
||||
5. ANTWORT-FORMAT:
|
||||
- Kurze, präzise Antworten (1-3 Sätze)
|
||||
- Die Antwort muss direkt aus dem Text stammen
|
||||
|
||||
6. AUSGABE: Nur gültiges JSON, kein Markdown."""
|
||||
|
||||
user_prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt:
|
||||
|
||||
TITEL: {title}
|
||||
FACH: {subject}
|
||||
KLASSENSTUFE: {grade_level}
|
||||
|
||||
TEXT:
|
||||
{worksheet_content}
|
||||
|
||||
Gib das Ergebnis als JSON zurück:
|
||||
|
||||
{{
|
||||
"qa_items": [
|
||||
{{
|
||||
"id": "qa1",
|
||||
"question": "Die Frage hier (fast wörtlich aus dem Text)",
|
||||
"answer": "Die korrekte Antwort (direkt aus dem Text)",
|
||||
"question_type": "knowledge" | "understanding" | "application",
|
||||
"key_terms": ["wichtiger Begriff 1", "wichtiger Begriff 2"],
|
||||
"difficulty": 1-3,
|
||||
"source_hint": "Kurzer Hinweis, wo im Text die Antwort steht",
|
||||
"leitner_box": 0
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"subject": "{subject}",
|
||||
"grade_level": "{grade_level}",
|
||||
"source_title": "{title}",
|
||||
"total_questions": {num_questions},
|
||||
"key_terms_summary": ["alle", "wichtigen", "Fachbegriffe", "gesammelt"]
|
||||
}}
|
||||
}}
|
||||
|
||||
WICHTIG:
|
||||
- Alle Antworten müssen aus dem Text ableitbar sein!
|
||||
- "leitner_box": 0 bedeutet "neu" (noch nicht gelernt)
|
||||
- "difficulty": 1=leicht, 2=mittel, 3=schwer (passend zu {grade_level})
|
||||
- "key_terms" sind die wichtigsten Wörter, die der Schüler lernen soll"""
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"response_format": {"type": "json_object"},
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"max_tokens": 3000,
|
||||
"temperature": 0.5,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
qa_data = json.loads(content)
|
||||
except (KeyError, json.JSONDecodeError) as e:
|
||||
raise RuntimeError(f"Fehler bei Q&A-Generierung: {e}")
|
||||
|
||||
# Initialisiere Leitner-Box Felder für alle Items
|
||||
for item in qa_data.get("qa_items", []):
|
||||
if "leitner_box" not in item:
|
||||
item["leitner_box"] = 0 # 0=neu, 1=gelernt, 2=gefestigt
|
||||
if "correct_count" not in item:
|
||||
item["correct_count"] = 0
|
||||
if "incorrect_count" not in item:
|
||||
item["incorrect_count"] = 0
|
||||
if "last_seen" not in item:
|
||||
item["last_seen"] = None
|
||||
if "next_review" not in item:
|
||||
item["next_review"] = None
|
||||
|
||||
return qa_data
|
||||
|
||||
|
||||
def _generate_qa_with_claude(analysis_data: dict, num_questions: int = 8) -> dict:
|
||||
"""
|
||||
Generiert Frage-Antwort-Paare mit Claude API.
|
||||
"""
|
||||
import anthropic
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
|
||||
|
||||
client = anthropic.Anthropic(api_key=api_key)
|
||||
|
||||
# Extrahiere relevante Inhalte
|
||||
title = analysis_data.get("title") or "Arbeitsblatt"
|
||||
subject = analysis_data.get("subject") or "Allgemein"
|
||||
grade_level = analysis_data.get("grade_level") or "unbekannt"
|
||||
canonical_text = analysis_data.get("canonical_text") or ""
|
||||
printed_blocks = analysis_data.get("printed_blocks") or []
|
||||
tasks = analysis_data.get("tasks") or []
|
||||
|
||||
content_parts = []
|
||||
if canonical_text:
|
||||
content_parts.append(canonical_text)
|
||||
for block in printed_blocks:
|
||||
text = block.get("text", "").strip()
|
||||
if text and text not in content_parts:
|
||||
content_parts.append(text)
|
||||
for task in tasks:
|
||||
desc = task.get("description", "").strip()
|
||||
if desc:
|
||||
content_parts.append(f"Aufgabe: {desc}")
|
||||
|
||||
worksheet_content = "\n\n".join(content_parts)
|
||||
|
||||
if not worksheet_content.strip():
|
||||
return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
|
||||
|
||||
prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt.
|
||||
|
||||
WICHTIGE REGELN:
|
||||
1. Verwende FAST WÖRTLICH den vorhandenen Stoff - KEINE neuen Fakten!
|
||||
2. Schwierigkeitsgrad: exakt "{grade_level}"
|
||||
3. Markiere wichtige Fachbegriffe als "key_terms"
|
||||
|
||||
TITEL: {title}
|
||||
FACH: {subject}
|
||||
KLASSENSTUFE: {grade_level}
|
||||
|
||||
TEXT:
|
||||
{worksheet_content}
|
||||
|
||||
Antworte NUR mit diesem JSON:
|
||||
{{
|
||||
"qa_items": [
|
||||
{{
|
||||
"id": "qa1",
|
||||
"question": "Frage (fast wörtlich aus Text)",
|
||||
"answer": "Antwort (direkt aus Text)",
|
||||
"question_type": "knowledge",
|
||||
"key_terms": ["Begriff1", "Begriff2"],
|
||||
"difficulty": 1,
|
||||
"source_hint": "Wo im Text",
|
||||
"leitner_box": 0
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"subject": "{subject}",
|
||||
"grade_level": "{grade_level}",
|
||||
"source_title": "{title}",
|
||||
"total_questions": {num_questions},
|
||||
"key_terms_summary": ["alle", "Fachbegriffe"]
|
||||
}}
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=3000,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
|
||||
content = message.content[0].text
|
||||
|
||||
try:
|
||||
if "```json" in content:
|
||||
content = content.split("```json")[1].split("```")[0]
|
||||
elif "```" in content:
|
||||
content = content.split("```")[1].split("```")[0]
|
||||
qa_data = json.loads(content.strip())
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
|
||||
|
||||
# Initialisiere Leitner-Box Felder
|
||||
for item in qa_data.get("qa_items", []):
|
||||
if "leitner_box" not in item:
|
||||
item["leitner_box"] = 0
|
||||
if "correct_count" not in item:
|
||||
item["correct_count"] = 0
|
||||
if "incorrect_count" not in item:
|
||||
item["incorrect_count"] = 0
|
||||
if "last_seen" not in item:
|
||||
item["last_seen"] = None
|
||||
if "next_review" not in item:
|
||||
item["next_review"] = None
|
||||
|
||||
return qa_data
|
||||
|
||||
|
||||
def generate_qa_from_analysis(analysis_path: Path, num_questions: int = 8) -> Path:
|
||||
"""
|
||||
Generiert Frage-Antwort-Paare aus einer Analyse-JSON-Datei.
|
||||
|
||||
Die Q&A-Paare werden:
|
||||
- Fast wörtlich aus dem Originaltext erstellt
|
||||
- Mit Leitner-Box-System für Wiederholung vorbereitet
|
||||
- Mit Schlüsselbegriffen für Festigung markiert
|
||||
|
||||
Args:
|
||||
analysis_path: Pfad zur *_analyse.json Datei
|
||||
num_questions: Anzahl der zu generierenden Fragen
|
||||
|
||||
Returns:
|
||||
Pfad zur generierten *_qa.json Datei
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
|
||||
try:
|
||||
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
|
||||
|
||||
logger.info(f"Generiere Q&A-Paare für: {analysis_path.name}")
|
||||
|
||||
vision_api = get_vision_api()
|
||||
|
||||
# Generiere Q&A (nutze konfigurierte API)
|
||||
if vision_api == "claude":
|
||||
try:
|
||||
qa_data = _generate_qa_with_claude(analysis_data, num_questions)
|
||||
except Exception as e:
|
||||
logger.warning(f"Claude Q&A-Generierung fehlgeschlagen, nutze OpenAI: {e}")
|
||||
qa_data = _generate_qa_with_openai(analysis_data, num_questions)
|
||||
else:
|
||||
qa_data = _generate_qa_with_openai(analysis_data, num_questions)
|
||||
|
||||
# Speichere Q&A-Daten
|
||||
out_name = analysis_path.stem.replace("_analyse", "") + "_qa.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
logger.info(f"Q&A-Paare gespeichert: {out_path.name}")
|
||||
return out_path
|
||||
Reference in New Issue
Block a user