feat: HTML email report with hints + fix duplicate Social Media sections
Build + Deploy / build-admin-compliance (push) Successful in 1m45s
Build + Deploy / build-backend-compliance (push) Successful in 9s
Build + Deploy / build-ai-sdk (push) Successful in 36s
Build + Deploy / build-developer-portal (push) Successful in 7s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 8s
Build + Deploy / build-dsms-gateway (push) Successful in 7s
Build + Deploy / build-dsms-node (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m47s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 44s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
Build + Deploy / build-admin-compliance (push) Successful in 1m45s
Build + Deploy / build-backend-compliance (push) Successful in 9s
Build + Deploy / build-ai-sdk (push) Successful in 36s
Build + Deploy / build-developer-portal (push) Successful in 7s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 8s
Build + Deploy / build-dsms-gateway (push) Successful in 7s
Build + Deploy / build-dsms-node (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m47s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 44s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
1. Email report now renders as styled HTML (matching frontend design): - Progress bars (green=completeness, blue=correctness) - Hierarchical L1→L2 check display - Red hint boxes under failed checks explaining what to fix - Matched text evidence for passed checks 2. Section splitter deduplicates: two "Social Media" headings on the same page are merged into one section instead of creating duplicates. 3. Extracted report builder to agent_doc_check_report.py (175 LOC) to keep routes file under 500 LOC (386 LOC). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
HTML email report builder for document checks.
|
||||
|
||||
Generates a styled HTML report similar to the frontend ChecklistView,
|
||||
including L1/L2 check hierarchy, progress bars, and actionable hints.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .agent_doc_check_routes import CheckItem, DocCheckResult
|
||||
|
||||
|
||||
def _bar(pct: int, color: str) -> str:
|
||||
bg = {"green": "#22c55e", "yellow": "#eab308", "red": "#ef4444", "blue": "#60a5fa"}
|
||||
c = bg.get(color, "#60a5fa")
|
||||
return (
|
||||
f'<div style="display:inline-block;width:120px;height:8px;background:#e5e7eb;'
|
||||
f'border-radius:4px;overflow:hidden;vertical-align:middle;margin-right:8px">'
|
||||
f'<div style="width:{pct}%;height:100%;background:{c};border-radius:4px"></div>'
|
||||
f'</div><span style="font-size:13px;font-weight:600;color:{c}">{pct}%</span>'
|
||||
)
|
||||
|
||||
|
||||
def _icon(passed: bool, skipped: bool = False) -> str:
|
||||
if skipped:
|
||||
return '<span style="color:#d1d5db">—</span>'
|
||||
if passed:
|
||||
return '<span style="color:#22c55e;font-weight:bold">✓</span>'
|
||||
return '<span style="color:#ef4444;font-weight:bold">✗</span>'
|
||||
|
||||
|
||||
def _hint_box(hint: str) -> str:
|
||||
return (
|
||||
f'<div style="font-size:11px;color:#dc2626;margin:2px 0 4px 20px;'
|
||||
f'padding:4px 8px;background:#fef2f2;border-radius:4px;'
|
||||
f'border-left:3px solid #fca5a5">{hint}</div>'
|
||||
)
|
||||
|
||||
|
||||
def build_html_report(
|
||||
results: list[DocCheckResult],
|
||||
cookie_result: dict | None,
|
||||
) -> str:
|
||||
"""Build HTML email report styled like the frontend."""
|
||||
ok_count = sum(1 for r in results if r.completeness_pct == 100)
|
||||
html = [
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:700px;margin:0 auto">',
|
||||
'<h2 style="margin-bottom:4px">Dokumenten-Pruefung</h2>',
|
||||
f'<p style="color:#6b7280;margin-top:0">'
|
||||
f'{len(results)} Dokumente, {ok_count} vollstaendig</p>',
|
||||
]
|
||||
|
||||
for r in results:
|
||||
_render_document(html, r)
|
||||
|
||||
if cookie_result:
|
||||
_render_cookie_banner(html, cookie_result)
|
||||
|
||||
html.append('</div>')
|
||||
return "\n".join(html)
|
||||
|
||||
|
||||
def _render_document(html: list[str], r: DocCheckResult) -> None:
|
||||
pct = r.completeness_pct
|
||||
cpct = r.correctness_pct
|
||||
bar_color = "green" if pct >= 80 else "yellow" if pct >= 50 else "red"
|
||||
status_label = "OK" if pct == 100 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
|
||||
if r.error:
|
||||
status_label = "FEHLER"
|
||||
|
||||
l1_checks = [c for c in r.checks if c.level == 1]
|
||||
l2_by_parent: dict[str, list[CheckItem]] = {}
|
||||
for c in r.checks:
|
||||
if c.level == 2 and c.parent:
|
||||
l2_by_parent.setdefault(c.parent, []).append(c)
|
||||
|
||||
l1_passed = sum(1 for c in l1_checks if c.passed)
|
||||
l2_active = [c for c in r.checks if c.level == 2 and not c.skipped]
|
||||
l2_passed = sum(1 for c in l2_active if c.passed)
|
||||
|
||||
# Header
|
||||
html.append(
|
||||
f'<div style="border:1px solid #e5e7eb;border-radius:8px;margin-bottom:12px;overflow:hidden">'
|
||||
f'<div style="padding:12px 16px;background:#f9fafb">'
|
||||
f'<div style="display:flex;justify-content:space-between;align-items:center"><div>'
|
||||
f'<span style="font-size:11px;background:#f3f4f6;padding:2px 8px;border-radius:4px;'
|
||||
f'color:#4b5563;font-weight:500;margin-right:8px">{status_label}</span>'
|
||||
f'<strong style="font-size:14px">{r.label}</strong>'
|
||||
f'<div style="font-size:12px;color:#6b7280;margin-top:2px">'
|
||||
f'{l1_passed}/{len(l1_checks)} Pflichtangaben'
|
||||
)
|
||||
if l2_active:
|
||||
html.append(f', {l2_passed}/{len(l2_active)} Detailpruefungen')
|
||||
html.append(f'</div></div><div style="text-align:right">{_bar(pct, bar_color)}')
|
||||
if cpct and l2_active:
|
||||
html.append(f'<br>{_bar(cpct, "blue")}')
|
||||
html.append('</div></div></div>')
|
||||
|
||||
# Body
|
||||
if r.error:
|
||||
html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
|
||||
else:
|
||||
html.append('<div style="padding:8px 16px 12px">')
|
||||
for c in l1_checks:
|
||||
_render_l1_check(html, c, l2_by_parent.get(c.id, []))
|
||||
if r.word_count:
|
||||
html.append(
|
||||
f'<div style="font-size:11px;color:#9ca3af;margin-top:8px;'
|
||||
f'padding-top:8px;border-top:1px solid #e5e7eb">'
|
||||
f'{r.word_count} Woerter analysiert</div>'
|
||||
)
|
||||
html.append('</div>')
|
||||
html.append('</div>')
|
||||
|
||||
|
||||
def _render_l1_check(
|
||||
html: list[str], c: CheckItem, children: list[CheckItem],
|
||||
) -> None:
|
||||
l2_sub = [ch for ch in children if not ch.skipped]
|
||||
l2_passed = sum(1 for ch in l2_sub if ch.passed)
|
||||
|
||||
style = "color:#991b1b;font-weight:600" if not c.passed else "color:#374151"
|
||||
html.append(
|
||||
f'<div style="padding:3px 0">{_icon(c.passed)} '
|
||||
f'<span style="font-size:13px;{style}">{c.label}</span>'
|
||||
)
|
||||
if l2_sub:
|
||||
html.append(f' <span style="color:#9ca3af;font-size:11px">({l2_passed}/{len(l2_sub)})</span>')
|
||||
if not c.passed and c.hint:
|
||||
html.append(_hint_box(c.hint))
|
||||
html.append('</div>')
|
||||
|
||||
for ch in children:
|
||||
if ch.skipped:
|
||||
continue
|
||||
_render_l2_check(html, ch)
|
||||
|
||||
|
||||
def _render_l2_check(html: list[str], ch: CheckItem) -> None:
|
||||
style = "color:#dc2626;font-weight:500" if not ch.passed else "color:#6b7280"
|
||||
html.append(
|
||||
f'<div style="padding:2px 0 2px 24px;border-left:2px solid #e5e7eb;margin-left:8px">'
|
||||
f'{_icon(ch.passed)} '
|
||||
f'<span style="font-size:12px;{style}">{ch.label}</span>'
|
||||
)
|
||||
if ch.passed and ch.matched_text:
|
||||
html.append(
|
||||
f'<div style="font-size:10px;color:#9ca3af;font-family:monospace;'
|
||||
f'margin-left:20px;overflow:hidden;text-overflow:ellipsis;'
|
||||
f'white-space:nowrap">"...{ch.matched_text[:80]}..."</div>'
|
||||
)
|
||||
if not ch.passed and ch.hint:
|
||||
html.append(_hint_box(ch.hint))
|
||||
html.append('</div>')
|
||||
|
||||
|
||||
def _render_cookie_banner(html: list[str], cookie_result: dict) -> None:
|
||||
html.append(
|
||||
'<div style="border:1px solid #e5e7eb;border-radius:8px;'
|
||||
'padding:12px 16px;margin-bottom:12px">'
|
||||
'<strong>Cookie-Banner Pruefung</strong><br>'
|
||||
f'Banner erkannt: {cookie_result.get("banner_detected", False)}<br>'
|
||||
f'Anbieter: {cookie_result.get("banner_provider", "unbekannt")}'
|
||||
)
|
||||
violations = cookie_result.get("banner_checks", {}).get("violations", [])
|
||||
if violations:
|
||||
for v in violations[:10]:
|
||||
html.append(f'<br>{_icon(False)} {v.get("text", "")[:80]}')
|
||||
else:
|
||||
html.append('<br><span style="color:#22c55e">Keine Verstoesse erkannt.</span>')
|
||||
html.append('</div>')
|
||||
@@ -141,7 +141,7 @@ async def _run_doc_check(check_id: str, req: DocCheckRequest):
|
||||
email_result = send_email(
|
||||
recipient=req.recipient,
|
||||
subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft",
|
||||
body_html=f"<pre>{summary}</pre>",
|
||||
body_html=summary,
|
||||
)
|
||||
|
||||
response = DocCheckResponse(
|
||||
@@ -284,40 +284,49 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
|
||||
|
||||
Detects sections like 'Cookies', 'Social Media', 'Dienste von Drittanbietern'
|
||||
and classifies each by document type for separate checking.
|
||||
Deduplicates: if the same doc_type appears twice, texts are merged.
|
||||
"""
|
||||
import re as _re
|
||||
sections = []
|
||||
sections: list[dict] = []
|
||||
seen_types: dict[str, int] = {} # doc_type -> index in sections
|
||||
|
||||
# Split by lines that look like headings (short, followed by longer content)
|
||||
lines = text.split("\n")
|
||||
current_heading = ""
|
||||
current_text = []
|
||||
current_text: list[str] = []
|
||||
|
||||
def _save_section(heading: str, text_lines: list[str]) -> None:
|
||||
sec_text = "\n".join(text_lines)
|
||||
if len(sec_text.split()) < 100:
|
||||
return
|
||||
sec_type = _classify_section(heading)
|
||||
if not sec_type:
|
||||
return
|
||||
# Merge duplicate doc_types (e.g. two "Social Media" headings)
|
||||
if sec_type in seen_types:
|
||||
idx = seen_types[sec_type]
|
||||
sections[idx]["text"] += "\n\n" + sec_text
|
||||
sections[idx]["word_count"] = len(sections[idx]["text"].split())
|
||||
else:
|
||||
seen_types[sec_type] = len(sections)
|
||||
sections.append({
|
||||
"title": f"{parent_label} > {heading}",
|
||||
"text": sec_text,
|
||||
"doc_type": sec_type,
|
||||
"word_count": len(sec_text.split()),
|
||||
})
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
# Detect heading: short line (< 80 chars), not empty, followed by content
|
||||
is_heading = (
|
||||
5 < len(stripped) < 80
|
||||
and not stripped.endswith(".")
|
||||
and not stripped.endswith(",")
|
||||
and stripped[0].isupper()
|
||||
)
|
||||
|
||||
# Skip-headings should NOT start a new section — their text
|
||||
# belongs to the previous section (e.g. "Risikoabwägung" inside DSFA)
|
||||
is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS
|
||||
|
||||
if is_heading and not is_skip and current_heading and len("\n".join(current_text)) > 200:
|
||||
# Save previous section
|
||||
sec_text = "\n".join(current_text)
|
||||
sec_type = _classify_section(current_heading)
|
||||
if sec_type and sec_type != "skip":
|
||||
sections.append({
|
||||
"title": f"{parent_label} > {current_heading}",
|
||||
"text": sec_text,
|
||||
"doc_type": sec_type,
|
||||
"word_count": len(sec_text.split()),
|
||||
})
|
||||
if is_heading and not is_skip and current_heading:
|
||||
_save_section(current_heading, current_text)
|
||||
|
||||
if is_heading and not is_skip:
|
||||
current_heading = stripped
|
||||
@@ -326,16 +335,8 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
|
||||
current_text.append(line)
|
||||
|
||||
# Last section
|
||||
if current_heading and len("\n".join(current_text)) > 200:
|
||||
sec_text = "\n".join(current_text)
|
||||
sec_type = _classify_section(current_heading)
|
||||
if sec_type and sec_type != "skip":
|
||||
sections.append({
|
||||
"title": f"{parent_label} > {current_heading}",
|
||||
"text": sec_text,
|
||||
"doc_type": sec_type,
|
||||
"word_count": len(sec_text.split()),
|
||||
})
|
||||
if current_heading:
|
||||
_save_section(current_heading, current_text)
|
||||
|
||||
return sections
|
||||
|
||||
@@ -347,6 +348,10 @@ SKIP_HEADINGS = {
|
||||
"risikoabwaegung und datenschutzfolgenabschaetzung",
|
||||
}
|
||||
|
||||
# Track already-seen section types to avoid duplicate sub-documents
|
||||
# (e.g. two "Social Media" headings on the same page)
|
||||
_DEDUP_TYPES = {"social_media", "cookie", "dsfa", "widerruf", "impressum"}
|
||||
|
||||
|
||||
def _classify_section(heading: str) -> str | None:
|
||||
"""Classify a section heading into a document type."""
|
||||
@@ -377,41 +382,5 @@ async def _check_cookie_banner(url: str) -> dict | None:
|
||||
|
||||
|
||||
def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str:
|
||||
"""Build email report."""
|
||||
parts = [
|
||||
"DOKUMENTEN-PRUEFUNG",
|
||||
f"Dokumente geprueft: {len(results)}",
|
||||
"",
|
||||
]
|
||||
for r in results:
|
||||
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
|
||||
if r.error:
|
||||
status = "FEHLER"
|
||||
detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
|
||||
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
|
||||
|
||||
for check in r.checks:
|
||||
if check.skipped:
|
||||
continue
|
||||
icon = "+" if check.passed else "!!"
|
||||
indent = " " if check.level == 2 else " "
|
||||
parts.append(f"{indent}[{icon}] {check.label}")
|
||||
|
||||
if r.error:
|
||||
parts.append(f" FEHLER: {r.error}")
|
||||
parts.append("")
|
||||
|
||||
if cookie_result:
|
||||
parts.extend([
|
||||
"Cookie-Banner Pruefung:",
|
||||
f" Banner erkannt: {cookie_result.get('banner_detected', False)}",
|
||||
f" Anbieter: {cookie_result.get('banner_provider', 'unbekannt')}",
|
||||
])
|
||||
violations = cookie_result.get("banner_checks", {}).get("violations", [])
|
||||
if violations:
|
||||
for v in violations[:10]:
|
||||
parts.append(f" [!!] {v.get('text', '')[:80]}")
|
||||
else:
|
||||
parts.append(" Keine Verstoesse erkannt.")
|
||||
|
||||
return "\n".join(parts)
|
||||
from .agent_doc_check_report import build_html_report
|
||||
return build_html_report(results, cookie_result)
|
||||
|
||||
Reference in New Issue
Block a user