cb5dad1a2f
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-python-backend (push) Successful in 45s
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 20s
CI / loc-budget (push) Failing after 17s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Drei zusammenhaengende Fixes fuer den VW-Befund (6 Vendors statt 100+): A — audit_quality_checks.py: drei systemische Vorbehalte die IMMER prominent gezeigt werden: * banner_detected=False trotz Cookie-Doc → HIGH 'CMP-Tool ungeladen' * cookie_doc >= 30k chars aber cmp_vendors < 15 → HIGH/MEDIUM 'Vendor-Liste auffaellig kurz fuer Doc-Groesse' * submitted URL aber 0/Mini-Text → MEDIUM 'URL nicht ladbar' Rote Audit-Vorbehalt-Box ueber dem GF-1-Pager. GF-Summary sagt 'Audit unvollstaendig' statt faelschlich 'Keine kritischen Themen'. gf_one_pager nimmt audit_quality_findings in top_findings auf (BEVOR andere Findings). B — cookies_table_parser laeuft jetzt auch auf gecrawltem Cookie-Doc- Text (nicht nur bei User-Paste). Wenn der dsi-discovery-Response Tab/ Pipe-getrennte Tabellen-Reihen liefert, parsen wir sie deterministisch. D — consent-tester/dsi-discovery extrahiert jetzt zusaetzlich zum Text die <table>-Elemente aus dem DOM als list[str] (Tab-getrennt pro Zeile, mind. 2 Zellen, mind. 3 Zeilen, max 10 Tabellen pro Doc). Backend schleust diese als 'html_table'-cmp_payload ein und jagt sie zuerst durch cookies_table_parser → 100% deterministische Vendor-Extraktion ohne LLM. VW-Erwartung: aus der 65k-Cookie-Tabelle werden jetzt 30-50 Vendors deterministisch geparst statt 6 vom LLM-Cascade. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
326 lines
12 KiB
Python
326 lines
12 KiB
Python
"""
|
|
P82 — GF-1-Pager (Geschaeftsfuehrer-Kurzfassung).
|
|
|
|
Eine kompakte 5-7-Bullet-Zusammenfassung ganz oben in der Mail. GF liest
|
|
sonst die 124k-Char-Komplettpruefung nicht. Ton sachlich, keine Panik
|
|
(Memory: feedback_breakpilot_tonalitaet).
|
|
|
|
Bildet ab:
|
|
- Compliance-Score + Vergleichswert (wenn Vorlauf vorhanden)
|
|
- Top-3 priorisierte Themen (HIGH oder kritisches MEDIUM)
|
|
- Aufwand-Schaetzung (4-8 Wochen) + Wer-macht-was (DSB / IT / Marketing)
|
|
- Realer Risiko-Hinweis (ohne 4%-Weltumsatz-Drohung)
|
|
|
|
Wird VOR Critical-Findings und Exec-Summary gerendert.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_AREA_LABEL = {
|
|
"banner": "Cookie-Banner",
|
|
"cookie": "Cookie-Richtlinie",
|
|
"dse": "Datenschutzerklaerung",
|
|
"impressum": "Impressum",
|
|
"agb": "AGB",
|
|
"library_mismatch": "Cookie-Klassifikation",
|
|
"vendor": "Vendor-Liste / VVT",
|
|
"consent": "Einwilligung",
|
|
"rights": "Betroffenenrechte",
|
|
}
|
|
|
|
|
|
def _normalize_finding(item: dict) -> dict:
|
|
sev = str(item.get("severity") or item.get("level") or "").upper()
|
|
if sev not in ("HIGH", "MEDIUM", "LOW"):
|
|
sev = "MEDIUM"
|
|
label = (item.get("label") or item.get("title")
|
|
or item.get("check") or item.get("name") or "").strip()
|
|
if not label:
|
|
return {}
|
|
area = (item.get("area") or item.get("doc_type") or item.get("category") or "").lower()
|
|
return {
|
|
"severity": sev,
|
|
"label": label[:200],
|
|
"area": _AREA_LABEL.get(area, area.replace("_", " ").title() or "Allgemein"),
|
|
"owner": item.get("owner") or _guess_owner(label, area),
|
|
}
|
|
|
|
|
|
def _guess_owner(label: str, area: str) -> str:
|
|
"""Heuristik: wer ist der wahrscheinliche Ansprechpartner."""
|
|
lab = label.lower()
|
|
if any(w in lab for w in ("banner", "cookie", "consent",
|
|
"einwilligung", "tracking")):
|
|
return "DSB + Marketing/CMP-Admin"
|
|
if any(w in lab for w in ("vendor", "avv", "auftragsverarbeitung",
|
|
"drittland", "schrems")):
|
|
return "DSB + Einkauf/Legal"
|
|
if any(w in lab for w in ("impressum", "agb", "widerruf", "kontakt")):
|
|
return "Legal + Web-Team"
|
|
if any(w in lab for w in ("dsfa", "dsr", "loeschfrist", "art. 15",
|
|
"auskunft", "betroffenenrecht")):
|
|
return "DSB"
|
|
if any(w in lab for w in ("tom", "verschluesselung", "backup",
|
|
"incident", "logging")):
|
|
return "IT-Security + DSB"
|
|
if area in ("banner", "cookie"):
|
|
return "DSB + Marketing"
|
|
return "DSB"
|
|
|
|
|
|
def _collect_top_findings(
|
|
banner_result: dict | None,
|
|
scorecard: dict | None,
|
|
library_mismatch_findings: list[dict] | None,
|
|
audit_quality_findings: list[dict] | None = None,
|
|
limit: int = 5,
|
|
) -> list[dict]:
|
|
out: list[dict] = []
|
|
|
|
# 0) Audit-Quality-Vorbehalte (Banner-Detect-Fail, Vendor-thin) zuerst —
|
|
# die sind WICHTIGER als alle anderen Findings weil sie den Audit
|
|
# selbst infrage stellen.
|
|
for aq in (audit_quality_findings or []):
|
|
if isinstance(aq, dict):
|
|
out.append({
|
|
"severity": aq.get("severity", "HIGH"),
|
|
"label": aq.get("label", "Audit-Vorbehalt"),
|
|
"area": aq.get("area", "Audit-Qualitaet"),
|
|
"owner": aq.get("owner", "DSB + Web-Team"),
|
|
})
|
|
|
|
# 1) Banner deep-check findings (HIGH zuerst)
|
|
if banner_result:
|
|
for ph in (banner_result.get("phases") or {}).values():
|
|
if not isinstance(ph, dict):
|
|
continue
|
|
for f in (ph.get("findings") or []):
|
|
if not isinstance(f, dict):
|
|
continue
|
|
n = _normalize_finding({**f, "area": "banner"})
|
|
if n:
|
|
out.append(n)
|
|
|
|
# 2) Library-Mismatch HIGH (Marketing-Cookies als essential deklariert)
|
|
for mm in (library_mismatch_findings or []):
|
|
if isinstance(mm, dict) and mm.get("severity") == "HIGH":
|
|
out.append({
|
|
"severity": "HIGH",
|
|
"label": f'Cookie "{mm.get("cookie","?")}" als '
|
|
f'{mm.get("declared_category","?")} deklariert, '
|
|
f'tatsaechlicher Zweck typischerweise '
|
|
f'{mm.get("library_category","?")}',
|
|
"area": _AREA_LABEL["library_mismatch"],
|
|
"owner": "DSB + Marketing/CMP-Admin",
|
|
})
|
|
|
|
# 3) Scorecard FAILs (MC-Audit)
|
|
if scorecard:
|
|
for entry in (scorecard.get("failed") or scorecard.get("items") or []):
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
n = _normalize_finding(entry)
|
|
if n and n["severity"] == "HIGH":
|
|
out.append(n)
|
|
|
|
# Sort: HIGH first, then MEDIUM, stable order. Dedup by label.
|
|
seen: set[str] = set()
|
|
order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
|
|
out.sort(key=lambda f: order.get(f["severity"], 3))
|
|
dedup: list[dict] = []
|
|
for f in out:
|
|
key = f["label"].lower()[:80]
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
dedup.append(f)
|
|
if len(dedup) >= limit:
|
|
break
|
|
return dedup
|
|
|
|
|
|
def _score_color(score: float | int | None) -> str:
|
|
if score is None:
|
|
return "#64748b"
|
|
try:
|
|
s = float(score)
|
|
except (TypeError, ValueError):
|
|
return "#64748b"
|
|
if s >= 80:
|
|
return "#16a34a"
|
|
if s >= 60:
|
|
return "#ca8a04"
|
|
return "#dc2626"
|
|
|
|
|
|
def _delta_html(curr: float | None, prev: float | None) -> str:
|
|
if curr is None or prev is None:
|
|
return ""
|
|
try:
|
|
d = float(curr) - float(prev)
|
|
except (TypeError, ValueError):
|
|
return ""
|
|
if abs(d) < 0.5:
|
|
return (
|
|
' <span style="color:#64748b;font-size:11px">'
|
|
'(unveraendert ggue. letztem Lauf)</span>'
|
|
)
|
|
arrow = "↑" if d > 0 else "↓"
|
|
color = "#16a34a" if d > 0 else "#dc2626"
|
|
return (
|
|
f' <span style="color:{color};font-size:11px">'
|
|
f'{arrow} {abs(d):.1f} Punkte ggue. letztem Lauf</span>'
|
|
)
|
|
|
|
|
|
def build_gf_one_pager_html(
|
|
site_name: str,
|
|
scorecard: dict | None = None,
|
|
previous_scorecard: dict | None = None,
|
|
banner_result: dict | None = None,
|
|
library_mismatch_findings: list[dict] | None = None,
|
|
scan_context: dict | None = None,
|
|
audit_quality_findings: list[dict] | None = None,
|
|
) -> str:
|
|
"""5-7-Bullet-Zusammenfassung. Leere Top-Findings: nur Status-Bullet."""
|
|
score = None
|
|
if scorecard:
|
|
score = scorecard.get("compliance_score") or scorecard.get("score")
|
|
prev_score = None
|
|
if previous_scorecard:
|
|
prev_score = (previous_scorecard.get("compliance_score")
|
|
or previous_scorecard.get("score"))
|
|
|
|
top = _collect_top_findings(
|
|
banner_result=banner_result,
|
|
scorecard=scorecard,
|
|
library_mismatch_findings=library_mismatch_findings,
|
|
audit_quality_findings=audit_quality_findings,
|
|
limit=6,
|
|
)
|
|
audit_warn = bool(audit_quality_findings)
|
|
|
|
n_high = sum(1 for f in top if f["severity"] == "HIGH")
|
|
n_med = sum(1 for f in top if f["severity"] == "MEDIUM")
|
|
|
|
if score is not None:
|
|
score_str = f'{float(score):.0f}/100'
|
|
else:
|
|
score_str = "—"
|
|
score_color = _score_color(score)
|
|
|
|
ctx_line = ""
|
|
if scan_context:
|
|
bits: list[str] = []
|
|
if scan_context.get("industry"):
|
|
bits.append(scan_context["industry"])
|
|
if scan_context.get("business_model"):
|
|
bits.append(scan_context["business_model"].upper())
|
|
if scan_context.get("employee_count"):
|
|
bits.append(f'{scan_context["employee_count"]} MA')
|
|
if bits:
|
|
ctx_line = (
|
|
'<div style="font-size:11px;color:#64748b;margin-bottom:6px">'
|
|
f'Klassifizierung: {" · ".join(bits)}'
|
|
'</div>'
|
|
)
|
|
|
|
bullets: list[str] = []
|
|
sev_pill = {
|
|
"HIGH": '<span style="background:#fee2e2;color:#991b1b;'
|
|
'padding:1px 6px;border-radius:8px;font-size:10px;'
|
|
'font-weight:600">HOCH</span>',
|
|
"MEDIUM": '<span style="background:#fef3c7;color:#92400e;'
|
|
'padding:1px 6px;border-radius:8px;font-size:10px;'
|
|
'font-weight:600">MITTEL</span>',
|
|
"LOW": '<span style="background:#dbeafe;color:#1e40af;'
|
|
'padding:1px 6px;border-radius:8px;font-size:10px;'
|
|
'font-weight:600">NIEDRIG</span>',
|
|
}
|
|
try:
|
|
from compliance.services.finding_confidence import confidence_pill_html
|
|
except Exception:
|
|
def confidence_pill_html(_label: str) -> str:
|
|
return ""
|
|
|
|
for f in top:
|
|
bullets.append(
|
|
f'<li style="margin-bottom:4px;font-size:12px;line-height:1.45">'
|
|
f'{sev_pill.get(f["severity"], "")} <strong>{f["area"]}:</strong> '
|
|
f'{f["label"]}'
|
|
f'{confidence_pill_html(f["label"])} '
|
|
f'<span style="color:#64748b">— typisch zustaendig: '
|
|
f'{f["owner"]}</span></li>'
|
|
)
|
|
|
|
if not bullets:
|
|
if audit_warn:
|
|
bullets.append(
|
|
'<li style="margin-bottom:4px;font-size:12px;color:#991b1b">'
|
|
'<strong>Audit selbst war unvollstaendig</strong> — siehe '
|
|
'roten Audit-Vorbehalt-Block weiter unten. Eine pauschale '
|
|
'"alles ok"-Aussage ist auf Basis dieser Datenlage nicht '
|
|
'moeglich.</li>'
|
|
)
|
|
else:
|
|
bullets.append(
|
|
'<li style="margin-bottom:4px;font-size:12px;color:#475569">'
|
|
'Keine kritischen Themen erkannt — der Audit-Lauf hat fuer '
|
|
'die geprueften Dokumente keine HIGH-Findings produziert. '
|
|
'Details im weiteren Verlauf der Mail.</li>'
|
|
)
|
|
|
|
return (
|
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
|
'max-width:760px;margin:0 auto 16px;padding:18px 20px;'
|
|
'background:#f8fafc;border:1px solid #cbd5e1;border-radius:8px">'
|
|
'<div style="font-size:11px;color:#475569;text-transform:uppercase;'
|
|
'letter-spacing:1.4px;margin-bottom:4px;font-weight:600">'
|
|
f'Kurzfassung fuer die Geschaeftsfuehrung — {site_name or "—"}'
|
|
'</div>'
|
|
+ ctx_line +
|
|
'<div style="display:flex;align-items:baseline;gap:14px;'
|
|
'margin:8px 0 14px;flex-wrap:wrap">'
|
|
f'<div style="font-size:28px;font-weight:700;color:{score_color}">'
|
|
f'{score_str}</div>'
|
|
'<div style="font-size:11px;color:#64748b">'
|
|
f'Compliance-Score{_delta_html(score, prev_score)}</div>'
|
|
f'<div style="margin-left:auto;font-size:11px;color:#475569">'
|
|
f'<strong>{n_high}</strong> hoch · '
|
|
f'<strong>{n_med}</strong> mittel'
|
|
'</div></div>'
|
|
'<div style="font-size:11px;color:#475569;margin-bottom:6px;'
|
|
'font-weight:600;text-transform:uppercase;letter-spacing:1px">'
|
|
'Was kurzfristig angegangen werden sollte'
|
|
'</div>'
|
|
'<ul style="margin:0 0 12px 18px;padding:0">'
|
|
+ "".join(bullets) +
|
|
'</ul>'
|
|
'<div style="font-size:11px;color:#475569;line-height:1.5;'
|
|
'padding:8px 10px;background:#fff;border:1px solid #e2e8f0;'
|
|
'border-radius:4px">'
|
|
+ (
|
|
'<strong style="color:#991b1b">Wichtig — Audit unvollstaendig:'
|
|
'</strong> An mindestens einer Stelle ist unser Crawler an '
|
|
'Grenzen gestossen (siehe roter Audit-Vorbehalt-Block weiter '
|
|
'unten). Diese Bereiche sollten manuell oder im Copy-Paste-Modus '
|
|
'nachgereicht werden, bevor eine belastbare Compliance-Aussage '
|
|
'getroffen wird.'
|
|
if audit_warn else
|
|
'<strong>Realistische Einordnung:</strong> Wir analysieren das '
|
|
'Aussenbild Ihrer Website automatisiert — einzelne Findings '
|
|
'koennen durch interne Dokumentation bereits abgedeckt sein. '
|
|
'Empfohlenes Vorgehen: priorisierte Punkte mit DSB / Marketing / '
|
|
'IT in einem Termin durchsprechen (4-8 Wochen sind ein '
|
|
'realistischer Zeitrahmen fuer die Umsetzung). Eine pauschale '
|
|
'Bussgeld-Erwartung leiten wir aus diesem Audit nicht ab.'
|
|
)
|
|
+ '</div>'
|
|
'</div>'
|
|
)
|