feat: HTML email report with hints + fix duplicate Social Media sections

1. Email report now renders as styled HTML (matching frontend design): - Progress bars (green=completeness, blue=correctness) - Hierarchical L1→L2 check display - Red hint boxes under failed checks explaining what to fix - Matched text evidence for passed checks 2. Section splitter deduplicates: two "Social Media" headings on the same page are merged into one section instead of creating duplicates. 3. Extracted report builder to agent_doc_check_report.py (175 LOC) to keep routes file under 500 LOC (386 LOC). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 15:13:00 +02:00
parent 56892cf7dc
commit a3287cd5e6
2 changed files with 211 additions and 67 deletions
@@ -0,0 +1,175 @@
 """
 HTML email report builder for document checks.
 Generates a styled HTML report similar to the frontend ChecklistView,
 including L1/L2 check hierarchy, progress bars, and actionable hints.
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from .agent_doc_check_routes import CheckItem, DocCheckResult
 def _bar(pct: int, color: str) -> str:
    bg = {"green": "#22c55e", "yellow": "#eab308", "red": "#ef4444", "blue": "#60a5fa"}
    c = bg.get(color, "#60a5fa")
    return (
        f'<div style="display:inline-block;width:120px;height:8px;background:#e5e7eb;'
        f'border-radius:4px;overflow:hidden;vertical-align:middle;margin-right:8px">'
        f'<div style="width:{pct}%;height:100%;background:{c};border-radius:4px"></div>'
        f'</div><span style="font-size:13px;font-weight:600;color:{c}">{pct}%</span>'
    )
 def _icon(passed: bool, skipped: bool = False) -> str:
    if skipped:
        return '<span style="color:#d1d5db">&mdash;</span>'
    if passed:
        return '<span style="color:#22c55e;font-weight:bold">&#10003;</span>'
    return '<span style="color:#ef4444;font-weight:bold">&#10007;</span>'
 def _hint_box(hint: str) -> str:
    return (
        f'<div style="font-size:11px;color:#dc2626;margin:2px 0 4px 20px;'
        f'padding:4px 8px;background:#fef2f2;border-radius:4px;'
        f'border-left:3px solid #fca5a5">{hint}</div>'
    )
 def build_html_report(
    results: list[DocCheckResult],
    cookie_result: dict | None,
 ) -> str:
    """Build HTML email report styled like the frontend."""
    ok_count = sum(1 for r in results if r.completeness_pct == 100)
    html = [
        '<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
        'max-width:700px;margin:0 auto">',
        '<h2 style="margin-bottom:4px">Dokumenten-Pruefung</h2>',
        f'<p style="color:#6b7280;margin-top:0">'
        f'{len(results)} Dokumente, {ok_count} vollstaendig</p>',
    ]
    for r in results:
        _render_document(html, r)
    if cookie_result:
        _render_cookie_banner(html, cookie_result)
    html.append('</div>')
    return "\n".join(html)
 def _render_document(html: list[str], r: DocCheckResult) -> None:
    pct = r.completeness_pct
    cpct = r.correctness_pct
    bar_color = "green" if pct >= 80 else "yellow" if pct >= 50 else "red"
    status_label = "OK" if pct == 100 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
    if r.error:
        status_label = "FEHLER"
    l1_checks = [c for c in r.checks if c.level == 1]
    l2_by_parent: dict[str, list[CheckItem]] = {}
    for c in r.checks:
        if c.level == 2 and c.parent:
            l2_by_parent.setdefault(c.parent, []).append(c)
    l1_passed = sum(1 for c in l1_checks if c.passed)
    l2_active = [c for c in r.checks if c.level == 2 and not c.skipped]
    l2_passed = sum(1 for c in l2_active if c.passed)
    # Header
    html.append(
        f'<div style="border:1px solid #e5e7eb;border-radius:8px;margin-bottom:12px;overflow:hidden">'
        f'<div style="padding:12px 16px;background:#f9fafb">'
        f'<div style="display:flex;justify-content:space-between;align-items:center"><div>'
        f'<span style="font-size:11px;background:#f3f4f6;padding:2px 8px;border-radius:4px;'
        f'color:#4b5563;font-weight:500;margin-right:8px">{status_label}</span>'
        f'<strong style="font-size:14px">{r.label}</strong>'
        f'<div style="font-size:12px;color:#6b7280;margin-top:2px">'
        f'{l1_passed}/{len(l1_checks)} Pflichtangaben'
    )
    if l2_active:
        html.append(f', {l2_passed}/{len(l2_active)} Detailpruefungen')
    html.append(f'</div></div><div style="text-align:right">{_bar(pct, bar_color)}')
    if cpct and l2_active:
        html.append(f'<br>{_bar(cpct, "blue")}')
    html.append('</div></div></div>')
    # Body
    if r.error:
        html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
    else:
        html.append('<div style="padding:8px 16px 12px">')
        for c in l1_checks:
            _render_l1_check(html, c, l2_by_parent.get(c.id, []))
        if r.word_count:
            html.append(
                f'<div style="font-size:11px;color:#9ca3af;margin-top:8px;'
                f'padding-top:8px;border-top:1px solid #e5e7eb">'
                f'{r.word_count} Woerter analysiert</div>'
            )
        html.append('</div>')
    html.append('</div>')
 def _render_l1_check(
    html: list[str], c: CheckItem, children: list[CheckItem],
 ) -> None:
    l2_sub = [ch for ch in children if not ch.skipped]
    l2_passed = sum(1 for ch in l2_sub if ch.passed)
    style = "color:#991b1b;font-weight:600" if not c.passed else "color:#374151"
    html.append(
        f'<div style="padding:3px 0">{_icon(c.passed)} '
        f'<span style="font-size:13px;{style}">{c.label}</span>'
    )
    if l2_sub:
        html.append(f' <span style="color:#9ca3af;font-size:11px">({l2_passed}/{len(l2_sub)})</span>')
    if not c.passed and c.hint:
        html.append(_hint_box(c.hint))
    html.append('</div>')
    for ch in children:
        if ch.skipped:
            continue
        _render_l2_check(html, ch)
 def _render_l2_check(html: list[str], ch: CheckItem) -> None:
    style = "color:#dc2626;font-weight:500" if not ch.passed else "color:#6b7280"
    html.append(
        f'<div style="padding:2px 0 2px 24px;border-left:2px solid #e5e7eb;margin-left:8px">'
        f'{_icon(ch.passed)} '
        f'<span style="font-size:12px;{style}">{ch.label}</span>'
    )
    if ch.passed and ch.matched_text:
        html.append(
            f'<div style="font-size:10px;color:#9ca3af;font-family:monospace;'
            f'margin-left:20px;overflow:hidden;text-overflow:ellipsis;'
            f'white-space:nowrap">"...{ch.matched_text[:80]}..."</div>'
        )
    if not ch.passed and ch.hint:
        html.append(_hint_box(ch.hint))
    html.append('</div>')
 def _render_cookie_banner(html: list[str], cookie_result: dict) -> None:
    html.append(
        '<div style="border:1px solid #e5e7eb;border-radius:8px;'
        'padding:12px 16px;margin-bottom:12px">'
        '<strong>Cookie-Banner Pruefung</strong><br>'
        f'Banner erkannt: {cookie_result.get("banner_detected", False)}<br>'
        f'Anbieter: {cookie_result.get("banner_provider", "unbekannt")}'
    )
    violations = cookie_result.get("banner_checks", {}).get("violations", [])
    if violations:
        for v in violations[:10]:
            html.append(f'<br>{_icon(False)} {v.get("text", "")[:80]}')
    else:
        html.append('<br><span style="color:#22c55e">Keine Verstoesse erkannt.</span>')
    html.append('</div>')
@@ -141,7 +141,7 @@ async def _run_doc_check(check_id: str, req: DocCheckRequest):
        email_result = send_email(
            recipient=req.recipient,
            subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft",
-            body_html=f"<pre>{summary}</pre>",
+            body_html=summary,
        )
        response = DocCheckResponse(
@@ -284,40 +284,49 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
    Detects sections like 'Cookies', 'Social Media', 'Dienste von Drittanbietern'
    and classifies each by document type for separate checking.
    Deduplicates: if the same doc_type appears twice, texts are merged.
    """
    import re as _re
-    sections = []
+    sections: list[dict] = []
    seen_types: dict[str, int] = {}  # doc_type -> index in sections
    # Split by lines that look like headings (short, followed by longer content)
    lines = text.split("\n")
    current_heading = ""
-    current_text = []
+    current_text: list[str] = []
    def _save_section(heading: str, text_lines: list[str]) -> None:
        sec_text = "\n".join(text_lines)
        if len(sec_text.split()) < 100:
            return
        sec_type = _classify_section(heading)
        if not sec_type:
            return
        # Merge duplicate doc_types (e.g. two "Social Media" headings)
        if sec_type in seen_types:
            idx = seen_types[sec_type]
            sections[idx]["text"] += "\n\n" + sec_text
            sections[idx]["word_count"] = len(sections[idx]["text"].split())
        else:
            seen_types[sec_type] = len(sections)
            sections.append({
                "title": f"{parent_label} > {heading}",
                "text": sec_text,
                "doc_type": sec_type,
                "word_count": len(sec_text.split()),
            })
    for line in lines:
        stripped = line.strip()
        # Detect heading: short line (< 80 chars), not empty, followed by content
        is_heading = (
            5 < len(stripped) < 80
            and not stripped.endswith(".")
            and not stripped.endswith(",")
            and stripped[0].isupper()
        )
        # Skip-headings should NOT start a new section — their text
        # belongs to the previous section (e.g. "Risikoabwägung" inside DSFA)
        is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS
-        if is_heading and not is_skip and current_heading and len("\n".join(current_text)) > 200:
+        if is_heading and not is_skip and current_heading:
-            # Save previous section
+            _save_section(current_heading, current_text)
            sec_text = "\n".join(current_text)
            sec_type = _classify_section(current_heading)
            if sec_type and sec_type != "skip":
                sections.append({
                    "title": f"{parent_label} > {current_heading}",
                    "text": sec_text,
                    "doc_type": sec_type,
                    "word_count": len(sec_text.split()),
                })
        if is_heading and not is_skip:
            current_heading = stripped
@@ -326,16 +335,8 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
            current_text.append(line)
    # Last section
-    if current_heading and len("\n".join(current_text)) > 200:
+    if current_heading:
-        sec_text = "\n".join(current_text)
+        _save_section(current_heading, current_text)
        sec_type = _classify_section(current_heading)
        if sec_type and sec_type != "skip":
            sections.append({
                "title": f"{parent_label} > {current_heading}",
                "text": sec_text,
                "doc_type": sec_type,
                "word_count": len(sec_text.split()),
            })
    return sections
@@ -347,6 +348,10 @@ SKIP_HEADINGS = {
    "risikoabwaegung und datenschutzfolgenabschaetzung",
 }
 # Track already-seen section types to avoid duplicate sub-documents
 # (e.g. two "Social Media" headings on the same page)
 _DEDUP_TYPES = {"social_media", "cookie", "dsfa", "widerruf", "impressum"}
 def _classify_section(heading: str) -> str | None:
    """Classify a section heading into a document type."""
@@ -377,41 +382,5 @@ async def _check_cookie_banner(url: str) -> dict | None:
 def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str:
-    """Build email report."""
+    from .agent_doc_check_report import build_html_report
-    parts = [
+    return build_html_report(results, cookie_result)
        "DOKUMENTEN-PRUEFUNG",
        f"Dokumente geprueft: {len(results)}",
        "",
    ]
    for r in results:
        status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
        if r.error:
            status = "FEHLER"
        detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
        parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
        for check in r.checks:
            if check.skipped:
                continue
            icon = "+" if check.passed else "!!"
            indent = "    " if check.level == 2 else "  "
            parts.append(f"{indent}[{icon}] {check.label}")
        if r.error:
            parts.append(f"  FEHLER: {r.error}")
        parts.append("")
    if cookie_result:
        parts.extend([
            "Cookie-Banner Pruefung:",
            f"  Banner erkannt: {cookie_result.get('banner_detected', False)}",
            f"  Anbieter: {cookie_result.get('banner_provider', 'unbekannt')}",
        ])
        violations = cookie_result.get("banner_checks", {}).get("violations", [])
        if violations:
            for v in violations[:10]:
                parts.append(f"  [!!] {v.get('text', '')[:80]}")
        else:
            parts.append("  Keine Verstoesse erkannt.")
    return "\n".join(parts)