"""
AI Processing - HTML Generator.
Baut saubere HTML-Arbeitsblätter aus Analyse-JSON.
"""
from pathlib import Path
import json
import logging
from .core import BEREINIGT_DIR
logger = logging.getLogger(__name__)
def build_clean_html_from_analysis(analysis_path: Path) -> Path:
"""
Nimmt eine *_analyse.json-Datei und baut daraus ein sauberes HTML-Arbeitsblatt.
NEU:
- Fokus auf gedruckten Text (canonical_text / printed_blocks)
- Handschriftliche Eintragungen und durchgestrichene Wörter werden NICHT in den
Haupttext übernommen
- Verwendung eines Open-Source-Font-Stacks (z.B. Inter / Noto Sans)
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
grade_level = data.get("grade_level") or ""
instructions = data.get("instructions") or ""
tasks = data.get("tasks", []) or []
canonical_text = data.get("canonical_text") or ""
printed_blocks = data.get("printed_blocks") or []
struck = data.get("struck_through_words") or []
html_parts = []
html_parts.append("")
html_parts.append("")
html_parts.append("
")
html_parts.append("")
# Kopfbereich
html_parts.append(f"
{title}
")
meta_bits = []
if subject:
meta_bits.append(f"Fach: {subject}")
if grade_level:
meta_bits.append(f"Klassenstufe: {grade_level}")
if meta_bits:
html_parts.append(f"
{' | '.join(meta_bits)}
")
if instructions:
html_parts.append(
f"
Arbeitsanweisung: {instructions}
"
)
# Haupttext / gedruckte Blöcke
html_parts.append("
")
if printed_blocks:
for block in printed_blocks:
role = (block.get("role") or "body").lower()
text = (block.get("text") or "").strip()
if not text:
continue
html_parts.append("")
if role == "title":
html_parts.append(f"
{text}
")
else:
html_parts.append(f"
{text}
")
html_parts.append("
")
elif canonical_text:
# Fallback: canonical_text in Absätze aufteilen
paragraphs = [
p.strip()
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
if p.strip()
]
for p in paragraphs:
html_parts.append(f"{p}
")
html_parts.append("")
# Aufgabenbereich
if tasks:
html_parts.append("
Aufgaben
")
html_parts.append("
")
for idx, task in enumerate(tasks, start=1):
t_type = task.get("type") or "other"
desc = task.get("description") or ""
text_with_gaps = task.get("text_with_gaps")
html_parts.append("
")
html_parts.append(
f"
Aufgabe {idx} ({t_type}): {desc}
"
)
if text_with_gaps:
# Lücken „___" werden in Linien umgewandelt
rendered = text_with_gaps.replace("___", "
")
html_parts.append(f"
{rendered}
")
html_parts.append("
")
html_parts.append("
") # .task-list
# kleine Fußnote mit Hinweis
if struck:
html_parts.append(
""
)
else:
html_parts.append(
""
)
html_parts.append("
") # .page
html_parts.append("")
html_content = "\n".join(html_parts)
out_name = analysis_path.stem.replace("_analyse", "") + "_clean.html"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(html_content, encoding="utf-8")
return out_path