feat: HTML email report with hints + fix duplicate Social Media sections
Build + Deploy / build-admin-compliance (push) Successful in 1m45s
Build + Deploy / build-backend-compliance (push) Successful in 9s
Build + Deploy / build-ai-sdk (push) Successful in 36s
Build + Deploy / build-developer-portal (push) Successful in 7s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 8s
Build + Deploy / build-dsms-gateway (push) Successful in 7s
Build + Deploy / build-dsms-node (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m47s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 44s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s

1. Email report now renders as styled HTML (matching frontend design):
   - Progress bars (green=completeness, blue=correctness)
   - Hierarchical L1→L2 check display
   - Red hint boxes under failed checks explaining what to fix
   - Matched text evidence for passed checks

2. Section splitter deduplicates: two "Social Media" headings on the
   same page are merged into one section instead of creating duplicates.

3. Extracted report builder to agent_doc_check_report.py (175 LOC)
   to keep routes file under 500 LOC (386 LOC).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-07 15:13:00 +02:00
parent 56892cf7dc
commit a3287cd5e6
2 changed files with 211 additions and 67 deletions
@@ -0,0 +1,175 @@
"""
HTML email report builder for document checks.
Generates a styled HTML report similar to the frontend ChecklistView,
including L1/L2 check hierarchy, progress bars, and actionable hints.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .agent_doc_check_routes import CheckItem, DocCheckResult
def _bar(pct: int, color: str) -> str:
bg = {"green": "#22c55e", "yellow": "#eab308", "red": "#ef4444", "blue": "#60a5fa"}
c = bg.get(color, "#60a5fa")
return (
f'<div style="display:inline-block;width:120px;height:8px;background:#e5e7eb;'
f'border-radius:4px;overflow:hidden;vertical-align:middle;margin-right:8px">'
f'<div style="width:{pct}%;height:100%;background:{c};border-radius:4px"></div>'
f'</div><span style="font-size:13px;font-weight:600;color:{c}">{pct}%</span>'
)
def _icon(passed: bool, skipped: bool = False) -> str:
if skipped:
return '<span style="color:#d1d5db">&mdash;</span>'
if passed:
return '<span style="color:#22c55e;font-weight:bold">&#10003;</span>'
return '<span style="color:#ef4444;font-weight:bold">&#10007;</span>'
def _hint_box(hint: str) -> str:
return (
f'<div style="font-size:11px;color:#dc2626;margin:2px 0 4px 20px;'
f'padding:4px 8px;background:#fef2f2;border-radius:4px;'
f'border-left:3px solid #fca5a5">{hint}</div>'
)
def build_html_report(
results: list[DocCheckResult],
cookie_result: dict | None,
) -> str:
"""Build HTML email report styled like the frontend."""
ok_count = sum(1 for r in results if r.completeness_pct == 100)
html = [
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
'max-width:700px;margin:0 auto">',
'<h2 style="margin-bottom:4px">Dokumenten-Pruefung</h2>',
f'<p style="color:#6b7280;margin-top:0">'
f'{len(results)} Dokumente, {ok_count} vollstaendig</p>',
]
for r in results:
_render_document(html, r)
if cookie_result:
_render_cookie_banner(html, cookie_result)
html.append('</div>')
return "\n".join(html)
def _render_document(html: list[str], r: DocCheckResult) -> None:
pct = r.completeness_pct
cpct = r.correctness_pct
bar_color = "green" if pct >= 80 else "yellow" if pct >= 50 else "red"
status_label = "OK" if pct == 100 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
if r.error:
status_label = "FEHLER"
l1_checks = [c for c in r.checks if c.level == 1]
l2_by_parent: dict[str, list[CheckItem]] = {}
for c in r.checks:
if c.level == 2 and c.parent:
l2_by_parent.setdefault(c.parent, []).append(c)
l1_passed = sum(1 for c in l1_checks if c.passed)
l2_active = [c for c in r.checks if c.level == 2 and not c.skipped]
l2_passed = sum(1 for c in l2_active if c.passed)
# Header
html.append(
f'<div style="border:1px solid #e5e7eb;border-radius:8px;margin-bottom:12px;overflow:hidden">'
f'<div style="padding:12px 16px;background:#f9fafb">'
f'<div style="display:flex;justify-content:space-between;align-items:center"><div>'
f'<span style="font-size:11px;background:#f3f4f6;padding:2px 8px;border-radius:4px;'
f'color:#4b5563;font-weight:500;margin-right:8px">{status_label}</span>'
f'<strong style="font-size:14px">{r.label}</strong>'
f'<div style="font-size:12px;color:#6b7280;margin-top:2px">'
f'{l1_passed}/{len(l1_checks)} Pflichtangaben'
)
if l2_active:
html.append(f', {l2_passed}/{len(l2_active)} Detailpruefungen')
html.append(f'</div></div><div style="text-align:right">{_bar(pct, bar_color)}')
if cpct and l2_active:
html.append(f'<br>{_bar(cpct, "blue")}')
html.append('</div></div></div>')
# Body
if r.error:
html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
else:
html.append('<div style="padding:8px 16px 12px">')
for c in l1_checks:
_render_l1_check(html, c, l2_by_parent.get(c.id, []))
if r.word_count:
html.append(
f'<div style="font-size:11px;color:#9ca3af;margin-top:8px;'
f'padding-top:8px;border-top:1px solid #e5e7eb">'
f'{r.word_count} Woerter analysiert</div>'
)
html.append('</div>')
html.append('</div>')
def _render_l1_check(
html: list[str], c: CheckItem, children: list[CheckItem],
) -> None:
l2_sub = [ch for ch in children if not ch.skipped]
l2_passed = sum(1 for ch in l2_sub if ch.passed)
style = "color:#991b1b;font-weight:600" if not c.passed else "color:#374151"
html.append(
f'<div style="padding:3px 0">{_icon(c.passed)} '
f'<span style="font-size:13px;{style}">{c.label}</span>'
)
if l2_sub:
html.append(f' <span style="color:#9ca3af;font-size:11px">({l2_passed}/{len(l2_sub)})</span>')
if not c.passed and c.hint:
html.append(_hint_box(c.hint))
html.append('</div>')
for ch in children:
if ch.skipped:
continue
_render_l2_check(html, ch)
def _render_l2_check(html: list[str], ch: CheckItem) -> None:
style = "color:#dc2626;font-weight:500" if not ch.passed else "color:#6b7280"
html.append(
f'<div style="padding:2px 0 2px 24px;border-left:2px solid #e5e7eb;margin-left:8px">'
f'{_icon(ch.passed)} '
f'<span style="font-size:12px;{style}">{ch.label}</span>'
)
if ch.passed and ch.matched_text:
html.append(
f'<div style="font-size:10px;color:#9ca3af;font-family:monospace;'
f'margin-left:20px;overflow:hidden;text-overflow:ellipsis;'
f'white-space:nowrap">"...{ch.matched_text[:80]}..."</div>'
)
if not ch.passed and ch.hint:
html.append(_hint_box(ch.hint))
html.append('</div>')
def _render_cookie_banner(html: list[str], cookie_result: dict) -> None:
html.append(
'<div style="border:1px solid #e5e7eb;border-radius:8px;'
'padding:12px 16px;margin-bottom:12px">'
'<strong>Cookie-Banner Pruefung</strong><br>'
f'Banner erkannt: {cookie_result.get("banner_detected", False)}<br>'
f'Anbieter: {cookie_result.get("banner_provider", "unbekannt")}'
)
violations = cookie_result.get("banner_checks", {}).get("violations", [])
if violations:
for v in violations[:10]:
html.append(f'<br>{_icon(False)} {v.get("text", "")[:80]}')
else:
html.append('<br><span style="color:#22c55e">Keine Verstoesse erkannt.</span>')
html.append('</div>')
@@ -141,7 +141,7 @@ async def _run_doc_check(check_id: str, req: DocCheckRequest):
email_result = send_email(
recipient=req.recipient,
subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft",
body_html=f"<pre>{summary}</pre>",
body_html=summary,
)
response = DocCheckResponse(
@@ -284,40 +284,49 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
Detects sections like 'Cookies', 'Social Media', 'Dienste von Drittanbietern'
and classifies each by document type for separate checking.
Deduplicates: if the same doc_type appears twice, texts are merged.
"""
import re as _re
sections = []
sections: list[dict] = []
seen_types: dict[str, int] = {} # doc_type -> index in sections
# Split by lines that look like headings (short, followed by longer content)
lines = text.split("\n")
current_heading = ""
current_text = []
current_text: list[str] = []
def _save_section(heading: str, text_lines: list[str]) -> None:
sec_text = "\n".join(text_lines)
if len(sec_text.split()) < 100:
return
sec_type = _classify_section(heading)
if not sec_type:
return
# Merge duplicate doc_types (e.g. two "Social Media" headings)
if sec_type in seen_types:
idx = seen_types[sec_type]
sections[idx]["text"] += "\n\n" + sec_text
sections[idx]["word_count"] = len(sections[idx]["text"].split())
else:
seen_types[sec_type] = len(sections)
sections.append({
"title": f"{parent_label} > {heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
for line in lines:
stripped = line.strip()
# Detect heading: short line (< 80 chars), not empty, followed by content
is_heading = (
5 < len(stripped) < 80
and not stripped.endswith(".")
and not stripped.endswith(",")
and stripped[0].isupper()
)
# Skip-headings should NOT start a new section — their text
# belongs to the previous section (e.g. "Risikoabwägung" inside DSFA)
is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS
if is_heading and not is_skip and current_heading and len("\n".join(current_text)) > 200:
# Save previous section
sec_text = "\n".join(current_text)
sec_type = _classify_section(current_heading)
if sec_type and sec_type != "skip":
sections.append({
"title": f"{parent_label} > {current_heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
if is_heading and not is_skip and current_heading:
_save_section(current_heading, current_text)
if is_heading and not is_skip:
current_heading = stripped
@@ -326,16 +335,8 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
current_text.append(line)
# Last section
if current_heading and len("\n".join(current_text)) > 200:
sec_text = "\n".join(current_text)
sec_type = _classify_section(current_heading)
if sec_type and sec_type != "skip":
sections.append({
"title": f"{parent_label} > {current_heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
if current_heading:
_save_section(current_heading, current_text)
return sections
@@ -347,6 +348,10 @@ SKIP_HEADINGS = {
"risikoabwaegung und datenschutzfolgenabschaetzung",
}
# Track already-seen section types to avoid duplicate sub-documents
# (e.g. two "Social Media" headings on the same page)
_DEDUP_TYPES = {"social_media", "cookie", "dsfa", "widerruf", "impressum"}
def _classify_section(heading: str) -> str | None:
"""Classify a section heading into a document type."""
@@ -377,41 +382,5 @@ async def _check_cookie_banner(url: str) -> dict | None:
def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str:
"""Build email report."""
parts = [
"DOKUMENTEN-PRUEFUNG",
f"Dokumente geprueft: {len(results)}",
"",
]
for r in results:
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
if r.error:
status = "FEHLER"
detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
for check in r.checks:
if check.skipped:
continue
icon = "+" if check.passed else "!!"
indent = " " if check.level == 2 else " "
parts.append(f"{indent}[{icon}] {check.label}")
if r.error:
parts.append(f" FEHLER: {r.error}")
parts.append("")
if cookie_result:
parts.extend([
"Cookie-Banner Pruefung:",
f" Banner erkannt: {cookie_result.get('banner_detected', False)}",
f" Anbieter: {cookie_result.get('banner_provider', 'unbekannt')}",
])
violations = cookie_result.get("banner_checks", {}).get("violations", [])
if violations:
for v in violations[:10]:
parts.append(f" [!!] {v.get('text', '')[:80]}")
else:
parts.append(" Keine Verstoesse erkannt.")
return "\n".join(parts)
from .agent_doc_check_report import build_html_report
return build_html_report(results, cookie_result)