feat(audit): P86 Branchen-Benchmark + P35/P77/P78 Textsignale

P86 — industry_benchmark.py: zieht alle Snapshots mit derselben scan_context.industry, berechnet Median + Percentile, rendert 'Sie 42% — Automotive-Median 58% (Stichprobe: 12)'. Min Sample 3. P35 — banner_text 'Speichern' ohne 'Ablehnen' = MEDIUM. Mehrdeutiges Label nach EDPB 03/2022 Deceptive-Design-Guidelines. P77 — DSE mit prominenter Cookie-Sektion (Vendor-Hints: Speicherdauer, Anbieter, Datenkategorie) ersetzt die Forderung nach separater Cookie-Richtlinie. Positives Signal statt False-Positive. P78 — Art. 26-Klausel im DSE-Text erkannt → positives Signal 'JC-Konstrukt dokumentiert'. Vermeidet False-Positive bei Konzern-Schwester-Kooperationen. Alle in Mail eingehaengt: Branchen-Block nach GF-1-Pager, Signale-Block nach Konsistenz-Check. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 16:43:15 +02:00
parent df8832c521
commit 30e43afba6
3 changed files with 372 additions and 2 deletions
@@ -0,0 +1,117 @@
+"""
+P86 — Branchen-Benchmark.
+
+Vergleicht den eigenen Compliance-Score mit dem Branchen-Median aus
+allen bisherigen Snapshots derselben industry (P79 scan_context).
+Liefert: "Sie 42% — Automotive-Median 58% (Stichprobe: 12 Sites)".
+
+Wird in der Mail-Composition direkt unter dem Score im GF-1-Pager
+gerendert. Mindest-Stichprobe = 3 vergleichbare Snapshots, sonst skip.
+
+Heuristik fuer Score-Extraktion aus banner_result:
+- banner_result.completeness_pct ODER
+- banner_result.correctness_pct ODER
+- 100 - len(banner_checks.violations) * 5 als Fallback.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+logger = logging.getLogger(__name__)
+
+_MIN_SAMPLE = 3
+
+
+def _extract_score(banner_result: dict | None) -> float | None:
+    if not isinstance(banner_result, dict):
+        return None
+    for key in ("compliance_score", "completeness_pct", "correctness_pct"):
+        v = banner_result.get(key)
+        if isinstance(v, (int, float)):
+            return float(v)
+    bc = banner_result.get("banner_checks") or {}
+    if isinstance(bc, dict):
+        viols = bc.get("violations") or []
+        if isinstance(viols, list):
+            return max(0.0, 100.0 - len(viols) * 5)
+    return None
+
+
+def compute_benchmark(
+    db: Session,
+    industry: str,
+    current_score: float | None,
+    current_check_id: str,
+) -> dict | None:
+    if not industry or current_score is None:
+        return None
+    # Snapshots mit gleicher industry in scan_context.
+    rows = db.execute(text(
+        """
+        SELECT banner_result FROM compliance.compliance_check_snapshots
+        WHERE check_id != :cid
+          AND scan_context IS NOT NULL
+          AND scan_context->>'industry' = :ind
+        ORDER BY created_at DESC
+        LIMIT 50
+        """
+    ), {"cid": current_check_id, "ind": industry}).fetchall()
+    scores: list[float] = []
+    for r in rows:
+        br = r[0]
+        if isinstance(br, str):
+            try:
+                br = json.loads(br)
+            except Exception:
+                continue
+        s = _extract_score(br)
+        if s is not None:
+            scores.append(s)
+    if len(scores) < _MIN_SAMPLE:
+        return None
+    scores.sort()
+    n = len(scores)
+    median = scores[n // 2] if n % 2 else (scores[n // 2 - 1] + scores[n // 2]) / 2
+    pct_lower = round(sum(1 for s in scores if s < current_score) / n * 100)
+    return {
+        "industry":  industry,
+        "current":   round(current_score, 1),
+        "median":    round(median, 1),
+        "sample_size": n,
+        "percentile": pct_lower,  # 80 = besser als 80% der Branche
+    }
+
+
+def build_benchmark_html(bench: dict) -> str:
+    if not bench:
+        return ""
+    delta = bench["current"] - bench["median"]
+    if delta >= 5:
+        color = "#16a34a"
+        verdict = "ueber dem Branchen-Median"
+    elif delta <= -5:
+        color = "#dc2626"
+        verdict = "unter dem Branchen-Median"
+    else:
+        color = "#ca8a04"
+        verdict = "etwa auf Branchen-Median"
+    return (
+        '<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
+        'max-width:760px;margin:0 auto 12px;padding:8px 14px;'
+        'background:#f0f9ff;border:1px solid #bfdbfe;border-radius:6px;'
+        'font-size:11px;color:#1e293b">'
+        f'<strong>Branchen-Vergleich ({bench["industry"]}):</strong> '
+        f'Ihr Score <strong>{bench["current"]:.1f}</strong> '
+        f'<span style="color:{color}">({verdict}, '
+        f'Median {bench["median"]:.1f})</span>. '
+        f'<span style="color:#64748b">Sie sind besser als '
+        f'{bench["percentile"]}% der bisher von uns gepruften '
+        f'{bench["sample_size"]} Sites in dieser Branche.</span>'
+        '</div>'
+    )