30e43afba6
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / detect-changes (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 15s
P86 — industry_benchmark.py: zieht alle Snapshots mit derselben scan_context.industry, berechnet Median + Percentile, rendert 'Sie 42% — Automotive-Median 58% (Stichprobe: 12)'. Min Sample 3. P35 — banner_text 'Speichern' ohne 'Ablehnen' = MEDIUM. Mehrdeutiges Label nach EDPB 03/2022 Deceptive-Design-Guidelines. P77 — DSE mit prominenter Cookie-Sektion (Vendor-Hints: Speicherdauer, Anbieter, Datenkategorie) ersetzt die Forderung nach separater Cookie-Richtlinie. Positives Signal statt False-Positive. P78 — Art. 26-Klausel im DSE-Text erkannt → positives Signal 'JC-Konstrukt dokumentiert'. Vermeidet False-Positive bei Konzern-Schwester-Kooperationen. Alle in Mail eingehaengt: Branchen-Block nach GF-1-Pager, Signale-Block nach Konsistenz-Check. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
118 lines
3.7 KiB
Python
118 lines
3.7 KiB
Python
"""
|
|
P86 — Branchen-Benchmark.
|
|
|
|
Vergleicht den eigenen Compliance-Score mit dem Branchen-Median aus
|
|
allen bisherigen Snapshots derselben industry (P79 scan_context).
|
|
Liefert: "Sie 42% — Automotive-Median 58% (Stichprobe: 12 Sites)".
|
|
|
|
Wird in der Mail-Composition direkt unter dem Score im GF-1-Pager
|
|
gerendert. Mindest-Stichprobe = 3 vergleichbare Snapshots, sonst skip.
|
|
|
|
Heuristik fuer Score-Extraktion aus banner_result:
|
|
- banner_result.completeness_pct ODER
|
|
- banner_result.correctness_pct ODER
|
|
- 100 - len(banner_checks.violations) * 5 als Fallback.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from typing import Any
|
|
|
|
from sqlalchemy import text
|
|
from sqlalchemy.orm import Session
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_MIN_SAMPLE = 3
|
|
|
|
|
|
def _extract_score(banner_result: dict | None) -> float | None:
|
|
if not isinstance(banner_result, dict):
|
|
return None
|
|
for key in ("compliance_score", "completeness_pct", "correctness_pct"):
|
|
v = banner_result.get(key)
|
|
if isinstance(v, (int, float)):
|
|
return float(v)
|
|
bc = banner_result.get("banner_checks") or {}
|
|
if isinstance(bc, dict):
|
|
viols = bc.get("violations") or []
|
|
if isinstance(viols, list):
|
|
return max(0.0, 100.0 - len(viols) * 5)
|
|
return None
|
|
|
|
|
|
def compute_benchmark(
|
|
db: Session,
|
|
industry: str,
|
|
current_score: float | None,
|
|
current_check_id: str,
|
|
) -> dict | None:
|
|
if not industry or current_score is None:
|
|
return None
|
|
# Snapshots mit gleicher industry in scan_context.
|
|
rows = db.execute(text(
|
|
"""
|
|
SELECT banner_result FROM compliance.compliance_check_snapshots
|
|
WHERE check_id != :cid
|
|
AND scan_context IS NOT NULL
|
|
AND scan_context->>'industry' = :ind
|
|
ORDER BY created_at DESC
|
|
LIMIT 50
|
|
"""
|
|
), {"cid": current_check_id, "ind": industry}).fetchall()
|
|
scores: list[float] = []
|
|
for r in rows:
|
|
br = r[0]
|
|
if isinstance(br, str):
|
|
try:
|
|
br = json.loads(br)
|
|
except Exception:
|
|
continue
|
|
s = _extract_score(br)
|
|
if s is not None:
|
|
scores.append(s)
|
|
if len(scores) < _MIN_SAMPLE:
|
|
return None
|
|
scores.sort()
|
|
n = len(scores)
|
|
median = scores[n // 2] if n % 2 else (scores[n // 2 - 1] + scores[n // 2]) / 2
|
|
pct_lower = round(sum(1 for s in scores if s < current_score) / n * 100)
|
|
return {
|
|
"industry": industry,
|
|
"current": round(current_score, 1),
|
|
"median": round(median, 1),
|
|
"sample_size": n,
|
|
"percentile": pct_lower, # 80 = besser als 80% der Branche
|
|
}
|
|
|
|
|
|
def build_benchmark_html(bench: dict) -> str:
|
|
if not bench:
|
|
return ""
|
|
delta = bench["current"] - bench["median"]
|
|
if delta >= 5:
|
|
color = "#16a34a"
|
|
verdict = "ueber dem Branchen-Median"
|
|
elif delta <= -5:
|
|
color = "#dc2626"
|
|
verdict = "unter dem Branchen-Median"
|
|
else:
|
|
color = "#ca8a04"
|
|
verdict = "etwa auf Branchen-Median"
|
|
return (
|
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
|
'max-width:760px;margin:0 auto 12px;padding:8px 14px;'
|
|
'background:#f0f9ff;border:1px solid #bfdbfe;border-radius:6px;'
|
|
'font-size:11px;color:#1e293b">'
|
|
f'<strong>Branchen-Vergleich ({bench["industry"]}):</strong> '
|
|
f'Ihr Score <strong>{bench["current"]:.1f}</strong> '
|
|
f'<span style="color:{color}">({verdict}, '
|
|
f'Median {bench["median"]:.1f})</span>. '
|
|
f'<span style="color:#64748b">Sie sind besser als '
|
|
f'{bench["percentile"]}% der bisher von uns gepruften '
|
|
f'{bench["sample_size"]} Sites in dieser Branche.</span>'
|
|
'</div>'
|
|
)
|