[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions
--- a/backend-lehrer/ai_processing/print_cloze.py
+++ b/backend-lehrer/ai_processing/print_cloze.py
@@ -0,0 +1,193 @@
+"""
+AI Processing - Print Version Generator: Cloze (Lueckentext).
+
+Generates printable HTML for cloze/fill-in-the-blank worksheets.
+"""
+
+from pathlib import Path
+import json
+import random
+import logging
+
+from .core import BEREINIGT_DIR
+
+logger = logging.getLogger(__name__)
+
+
+def generate_print_version_cloze(cloze_path: Path, include_answers: bool = False) -> Path:
+    """
+    Generiert eine druckbare HTML-Version der Lueckentexte.
+
+    Args:
+        cloze_path: Pfad zur *_cloze.json Datei
+        include_answers: True fuer Loesungsblatt (fuer Eltern)
+
+    Returns:
+        Pfad zur generierten HTML-Datei
+    """
+    if not cloze_path.exists():
+        raise FileNotFoundError(f"Cloze-Datei nicht gefunden: {cloze_path}")
+
+    cloze_data = json.loads(cloze_path.read_text(encoding="utf-8"))
+    items = cloze_data.get("cloze_items", [])
+    metadata = cloze_data.get("metadata", {})
+
+    title = metadata.get("source_title", "Arbeitsblatt")
+    subject = metadata.get("subject", "")
+    grade = metadata.get("grade_level", "")
+    total_gaps = metadata.get("total_gaps", 0)
+
+    html_parts = []
+    html_parts.append("""<!DOCTYPE html>
+<html lang="de">
+<head>
+<meta charset="UTF-8">
+<title>""" + title + """ - Lueckentext</title>
+<style>
+  @media print {
+    .no-print { display: none; }
+    .page-break { page-break-before: always; }
+  }
+  body {
+    font-family: Arial, sans-serif;
+    max-width: 800px;
+    margin: 40px auto;
+    padding: 20px;
+    line-height: 1.8;
+  }
+  h1 { font-size: 24px; margin-bottom: 8px; }
+  .meta { color: #666; margin-bottom: 24px; }
+  .cloze-item {
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #f9f9f9;
+    border-radius: 8px;
+  }
+  .cloze-number {
+    font-weight: bold;
+    color: #333;
+    margin-bottom: 8px;
+  }
+  .cloze-sentence {
+    font-size: 16px;
+    line-height: 2;
+  }
+  .gap {
+    display: inline-block;
+    min-width: 80px;
+    border-bottom: 2px solid #333;
+    margin: 0 4px;
+    text-align: center;
+  }
+  .gap-filled {
+    display: inline-block;
+    padding: 2px 8px;
+    background: #e8f5e9;
+    border: 1px solid #4caf50;
+    border-radius: 4px;
+    font-weight: bold;
+  }
+  .translation {
+    margin-top: 12px;
+    padding: 8px;
+    background: #e3f2fd;
+    border-left: 3px solid #2196f3;
+    font-size: 14px;
+    color: #555;
+  }
+  .translation-label {
+    font-size: 12px;
+    color: #777;
+    margin-bottom: 4px;
+  }
+  .word-bank {
+    margin-top: 32px;
+    padding: 16px;
+    background: #fff3e0;
+    border-radius: 8px;
+  }
+  .word-bank-title {
+    font-weight: bold;
+    margin-bottom: 12px;
+  }
+  .word {
+    display: inline-block;
+    padding: 4px 12px;
+    margin: 4px;
+    background: white;
+    border: 1px solid #ddd;
+    border-radius: 4px;
+  }
+</style>
+</head>
+<body>
+""")
+
+    # Header
+    version_text = "Loesungsblatt" if include_answers else "Lueckentext"
+    html_parts.append(f"<h1>{title} - {version_text}</h1>")
+    meta_parts = []
+    if subject:
+        meta_parts.append(f"Fach: {subject}")
+    if grade:
+        meta_parts.append(f"Klasse: {grade}")
+    meta_parts.append(f"Luecken gesamt: {total_gaps}")
+    html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
+
+    # Sammle alle Lueckenwoerter fuer Wortbank
+    all_words = []
+
+    # Lueckentexte
+    for idx, item in enumerate(items, 1):
+        html_parts.append("<div class='cloze-item'>")
+        html_parts.append(f"<div class='cloze-number'>{idx}.</div>")
+
+        gaps = item.get("gaps", [])
+        sentence = item.get("sentence_with_gaps", "")
+
+        if include_answers:
+            # Loesungsblatt: Luecken mit Antworten fuellen
+            for gap in gaps:
+                word = gap.get("word", "")
+                sentence = sentence.replace("___", f"<span class='gap-filled'>{word}</span>", 1)
+        else:
+            # Fragenblatt: Luecken als Linien
+            sentence = sentence.replace("___", "<span class='gap'>&nbsp;</span>")
+            # Woerter fuer Wortbank sammeln
+            for gap in gaps:
+                all_words.append(gap.get("word", ""))
+
+        html_parts.append(f"<div class='cloze-sentence'>{sentence}</div>")
+
+        # Uebersetzung anzeigen
+        translation = item.get("translation", {})
+        if translation:
+            lang_name = translation.get("language_name", "Uebersetzung")
+            full_sentence = translation.get("full_sentence", "")
+            if full_sentence:
+                html_parts.append("<div class='translation'>")
+                html_parts.append(f"<div class='translation-label'>{lang_name}:</div>")
+                html_parts.append(full_sentence)
+                html_parts.append("</div>")
+
+        html_parts.append("</div>")
+
+    # Wortbank (nur fuer Fragenblatt)
+    if not include_answers and all_words:
+        random.shuffle(all_words)  # Mische die Woerter
+        html_parts.append("<div class='word-bank'>")
+        html_parts.append("<div class='word-bank-title'>Wortbank (diese Woerter fehlen):</div>")
+        for word in all_words:
+            html_parts.append(f"<span class='word'>{word}</span>")
+        html_parts.append("</div>")
+
+    html_parts.append("</body></html>")
+
+    # Speichern
+    suffix = "_cloze_solutions.html" if include_answers else "_cloze_print.html"
+    out_name = cloze_path.stem.replace("_cloze", "") + suffix
+    out_path = BEREINIGT_DIR / out_name
+    out_path.write_text("\n".join(html_parts), encoding="utf-8")
+
+    logger.info(f"Cloze Print-Version gespeichert: {out_path.name}")
+    return out_path