fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
218
backend/ai_processor/vision/html_builder.py
Normal file
218
backend/ai_processor/vision/html_builder.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
AI Processor - HTML Builder
|
||||
|
||||
Build clean HTML worksheets from analysis data.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import logging
|
||||
|
||||
from ..config import BEREINIGT_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_clean_html_from_analysis(analysis_path: Path) -> Path:
|
||||
"""
|
||||
Build a clean HTML worksheet from an analysis JSON file.
|
||||
|
||||
Features:
|
||||
- Focus on printed text (canonical_text / printed_blocks)
|
||||
- Handwritten entries and crossed-out words are NOT included
|
||||
- Uses open-source font stack (Inter / Noto Sans)
|
||||
|
||||
Args:
|
||||
analysis_path: Path to *_analyse.json file
|
||||
|
||||
Returns:
|
||||
Path to the generated HTML file
|
||||
"""
|
||||
if not analysis_path.exists():
|
||||
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
|
||||
try:
|
||||
data = json.loads(analysis_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"Analyse-Datei enthaelt kein gueltiges JSON: {analysis_path}\n{e}") from e
|
||||
|
||||
title = data.get("title") or "Arbeitsblatt"
|
||||
subject = data.get("subject") or ""
|
||||
grade_level = data.get("grade_level") or ""
|
||||
instructions = data.get("instructions") or ""
|
||||
tasks = data.get("tasks", []) or []
|
||||
canonical_text = data.get("canonical_text") or ""
|
||||
printed_blocks = data.get("printed_blocks") or []
|
||||
struck = data.get("struck_through_words") or []
|
||||
|
||||
html_parts = []
|
||||
html_parts.append("<!DOCTYPE html>")
|
||||
html_parts.append("<html lang='de'>")
|
||||
html_parts.append("<head>")
|
||||
html_parts.append("<meta charset='UTF-8'>")
|
||||
html_parts.append(f"<title>{title}</title>")
|
||||
html_parts.append(_get_html_styles())
|
||||
html_parts.append("</head>")
|
||||
html_parts.append("<body>")
|
||||
html_parts.append("<div class='page'>")
|
||||
|
||||
# Header section
|
||||
html_parts.append(f"<h1>{title}</h1>")
|
||||
meta_bits = []
|
||||
if subject:
|
||||
meta_bits.append(f"Fach: {subject}")
|
||||
if grade_level:
|
||||
meta_bits.append(f"Klassenstufe: {grade_level}")
|
||||
if meta_bits:
|
||||
html_parts.append(f"<div class='meta'>{' | '.join(meta_bits)}</div>")
|
||||
|
||||
if instructions:
|
||||
html_parts.append(
|
||||
f"<div class='instructions'><strong>Arbeitsanweisung:</strong> {instructions}</div>"
|
||||
)
|
||||
|
||||
# Main text / printed blocks
|
||||
html_parts.append("<section class='text-blocks'>")
|
||||
|
||||
if printed_blocks:
|
||||
for block in printed_blocks:
|
||||
role = (block.get("role") or "body").lower()
|
||||
text = (block.get("text") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
html_parts.append("<div class='text-block'>")
|
||||
if role == "title":
|
||||
html_parts.append(f"<div class='text-block-title'>{text}</div>")
|
||||
else:
|
||||
html_parts.append(f"<div>{text}</div>")
|
||||
html_parts.append("</div>")
|
||||
elif canonical_text:
|
||||
# Fallback: split canonical_text into paragraphs
|
||||
paragraphs = [
|
||||
p.strip()
|
||||
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
|
||||
if p.strip()
|
||||
]
|
||||
for p in paragraphs:
|
||||
html_parts.append(f"<div class='text-block'>{p}</div>")
|
||||
|
||||
html_parts.append("</section>")
|
||||
|
||||
# Tasks section
|
||||
if tasks:
|
||||
html_parts.append("<h2>Aufgaben</h2>")
|
||||
html_parts.append("<div class='task-list'>")
|
||||
|
||||
for idx, task in enumerate(tasks, start=1):
|
||||
t_type = task.get("type") or "other"
|
||||
desc = task.get("description") or ""
|
||||
text_with_gaps = task.get("text_with_gaps")
|
||||
|
||||
html_parts.append("<div class='task'>")
|
||||
html_parts.append(
|
||||
f"<div class='task-title'>Aufgabe {idx} ({t_type}): {desc}</div>"
|
||||
)
|
||||
|
||||
if text_with_gaps:
|
||||
rendered = text_with_gaps.replace("___", "<span class='gap-line'> </span>")
|
||||
html_parts.append(f"<div>{rendered}</div>")
|
||||
html_parts.append("</div>")
|
||||
|
||||
html_parts.append("</div>")
|
||||
|
||||
# Footer note
|
||||
if struck:
|
||||
html_parts.append(
|
||||
"<div class='footnote'>Hinweis: Einige im Original durchgestrichene Woerter wurden "
|
||||
"von der KI erkannt und NICHT in dieses saubere Arbeitsblatt uebernommen.</div>"
|
||||
)
|
||||
else:
|
||||
html_parts.append(
|
||||
"<div class='footnote'>Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert "
|
||||
"und von handschriftlichen Eintragungen bereinigt.</div>"
|
||||
)
|
||||
|
||||
html_parts.append("</div>") # .page
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
html_content = "\n".join(html_parts)
|
||||
out_name = analysis_path.stem.replace("_analyse", "") + "_clean.html"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(html_content, encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def _get_html_styles() -> str:
|
||||
"""Get CSS styles for clean HTML output."""
|
||||
return """
|
||||
<style>
|
||||
:root {
|
||||
--font-main: "Inter", "Noto Sans", system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
font-family: var(--font-main);
|
||||
margin: 32px;
|
||||
line-height: 1.5;
|
||||
font-size: 14px;
|
||||
color: #111827;
|
||||
}
|
||||
.page {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
h1 {
|
||||
font-size: 24px;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
h2 {
|
||||
font-size: 18px;
|
||||
margin-top: 24px;
|
||||
}
|
||||
.meta {
|
||||
font-size: 12px;
|
||||
color: #6b7280;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
.instructions {
|
||||
margin-bottom: 20px;
|
||||
padding: 8px 10px;
|
||||
border-radius: 8px;
|
||||
background: #eff6ff;
|
||||
border: 1px solid #bfdbfe;
|
||||
font-size: 13px;
|
||||
}
|
||||
.text-blocks {
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
.text-block {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
.text-block-title {
|
||||
font-weight: 600;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.task-list {
|
||||
margin-top: 8px;
|
||||
}
|
||||
.task {
|
||||
margin-bottom: 14px;
|
||||
padding-bottom: 8px;
|
||||
border-bottom: 1px dashed #e5e7eb;
|
||||
}
|
||||
.task-title {
|
||||
font-weight: 600;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.gap-line {
|
||||
display: inline-block;
|
||||
border-bottom: 1px solid #000;
|
||||
min-width: 80px;
|
||||
margin: 0 4px;
|
||||
}
|
||||
.footnote {
|
||||
margin-top: 24px;
|
||||
font-size: 11px;
|
||||
color: #9ca3af;
|
||||
}
|
||||
</style>
|
||||
"""
|
||||
Reference in New Issue
Block a user