feat(audit): P107 Branchen-Benchmark-Cockpit fuer Big-4-Demos
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 17s
CI / loc-budget (push) Failing after 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m5s
CI / test-go (push) Failing after 54s
CI / iace-gt-coverage (push) Successful in 27s
CI / test-python-backend (push) Successful in 47s
CI / detect-changes (push) Successful in 13s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 17s
CI / loc-budget (push) Failing after 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m5s
CI / test-go (push) Failing after 54s
CI / iace-gt-coverage (push) Successful in 27s
CI / test-python-backend (push) Successful in 47s
CI / detect-changes (push) Successful in 13s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
benchmark_extractor.py — extract_kpis() liefert 18 KPIs pro Snapshot: * vendors_total, vendors_us, vendors_non_eu (mit % je Vendor-Land) * source_breakdown (llm/library/flat_pattern/table_paste/html_table_dom) * max/avg cookies_per_vendor (Konzentrations-Mass) * cookies_in_browser, cookies_detailed_count, cookie_doc_chars * banner_detected, banner_provider, banner_violations * compliance_score, data_quality_pct (wie viele unserer Datenquellen haben Inhalt) * saving_low/high_eur (Heuristik: (vendors - 10) × 1k-5k) anonymize_kpis() ersetzt site_label durch 'OEM 1/2/3' (Industry-Prefix Map: automotive→OEM, banking→Bank, chemistry→Chem, luftfahrt→Airline). GET /api/compliance/agent/admin/benchmark?industry=automotive&sites= VW,BMW,Mercedes&anonymized=true — liefert kpis + summary (n_sites, avg_vendors, total_saving_high). Admin-Page /sdk/benchmark: * Filter-Leiste: Industry-Dropdown, Sites-Input + 5 Preset-Gruppen (Automotive OEMs / Zulieferer, Chemie DAX, Luftfahrt, Banking DAX) * Anonymize-Toggle prominent * 5 Summary-KPI-Karten oben * Vergleichstabelle 13 Spalten (Score, Vendors, US%, Drittland%, Cookies-Browser, Cookie-Doc-kB, Banner ✓/✗, Provider, Verstoesse, Saving €/Jahr, Daten-Qualitaet, Captured-Time) * Red-/Amber-/Green-Indikatoren bei US%/Score/Drittland * Big-4-Hinweis-Footer Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,266 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* P107 — Branchen-Benchmark-Cockpit.
|
||||||
|
*
|
||||||
|
* Multi-Site-Vergleich auf einen Blick. Anonymize-Toggle für Big-4-
|
||||||
|
* Wirtschaftspruefer-Demos.
|
||||||
|
*
|
||||||
|
* URL: /sdk/benchmark
|
||||||
|
*/
|
||||||
|
|
||||||
|
import React, { useState, useEffect } from 'react'
|
||||||
|
|
||||||
|
interface Kpi {
|
||||||
|
check_id: string
|
||||||
|
site_label: string
|
||||||
|
site_domain: string
|
||||||
|
captured_at: string
|
||||||
|
industry: string
|
||||||
|
vendors_total: number
|
||||||
|
vendors_us: number
|
||||||
|
vendors_non_eu: number
|
||||||
|
us_pct: number
|
||||||
|
non_eu_pct: number
|
||||||
|
source_breakdown: Record<string, number>
|
||||||
|
max_cookies_per_vendor: number
|
||||||
|
avg_cookies_per_vendor: number
|
||||||
|
cookies_in_browser: number
|
||||||
|
cookies_detailed_count: number
|
||||||
|
cookie_doc_chars: number
|
||||||
|
banner_detected: boolean
|
||||||
|
banner_provider: string
|
||||||
|
banner_violations: number
|
||||||
|
compliance_score: number | null
|
||||||
|
saving_low_eur: number
|
||||||
|
saving_high_eur: number
|
||||||
|
data_quality_pct: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Summary {
|
||||||
|
n_sites: number
|
||||||
|
avg_vendors: number
|
||||||
|
avg_us_pct: number
|
||||||
|
avg_non_eu_pct: number
|
||||||
|
avg_cookies_browser: number
|
||||||
|
avg_score: number
|
||||||
|
max_vendors: number
|
||||||
|
max_saving_high: number
|
||||||
|
total_saving_low: number
|
||||||
|
total_saving_high: number
|
||||||
|
}
|
||||||
|
|
||||||
|
const INDUSTRIES = [
|
||||||
|
{ id: '', label: 'Alle Branchen' },
|
||||||
|
{ id: 'automotive', label: 'Automotive (OEM)' },
|
||||||
|
{ id: 'banking', label: 'Banking / Finance' },
|
||||||
|
{ id: 'chemistry', label: 'Chemie / Pharma' },
|
||||||
|
{ id: 'luftfahrt', label: 'Luftfahrt' },
|
||||||
|
{ id: 'ecommerce', label: 'E-Commerce' },
|
||||||
|
{ id: 'saas', label: 'SaaS / Software' },
|
||||||
|
]
|
||||||
|
|
||||||
|
const PRESET_GROUPS = [
|
||||||
|
{ id: 'automotive_oem', label: 'Automotive OEMs', sites: 'Volkswagen,BMW,Mercedes-Benz,SEAT,AUDI' },
|
||||||
|
{ id: 'automotive_supl', label: 'Automotive Zulieferer', sites: 'ZF Friedrichshafen,Robert Bosch,Continental' },
|
||||||
|
{ id: 'chemie', label: 'Chemie (DAX)', sites: 'BASF,Bayer,Henkel,Linde' },
|
||||||
|
{ id: 'luftfahrt', label: 'Luftfahrt', sites: 'Lufthansa,Eurowings,Condor' },
|
||||||
|
{ id: 'banking', label: 'Banking (DAX)', sites: 'Deutsche Bank,Commerzbank,DZ Bank,KfW' },
|
||||||
|
]
|
||||||
|
|
||||||
|
export default function BenchmarkPage() {
|
||||||
|
const [industry, setIndustry] = useState('')
|
||||||
|
const [sites, setSites] = useState('')
|
||||||
|
const [anonymized, setAnonymized] = useState(false)
|
||||||
|
const [data, setData] = useState<{kpis: Kpi[]; summary: Summary} | null>(null)
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
|
||||||
|
const fetchData = async () => {
|
||||||
|
setLoading(true); setError(null)
|
||||||
|
try {
|
||||||
|
const url = new URL('/api/compliance/admin/benchmark', window.location.origin)
|
||||||
|
if (industry) url.searchParams.set('industry', industry)
|
||||||
|
if (sites) url.searchParams.set('sites', sites)
|
||||||
|
if (anonymized) url.searchParams.set('anonymized', 'true')
|
||||||
|
const r = await fetch(url.toString())
|
||||||
|
if (!r.ok) throw new Error(`HTTP ${r.status}`)
|
||||||
|
setData(await r.json())
|
||||||
|
} catch (e: any) {
|
||||||
|
setError(e.message || String(e))
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
useEffect(() => { fetchData() }, [])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="p-6 max-w-7xl mx-auto">
|
||||||
|
<header className="mb-6">
|
||||||
|
<h1 className="text-2xl font-bold text-gray-900">
|
||||||
|
Branchen-Benchmark-Cockpit
|
||||||
|
</h1>
|
||||||
|
<p className="text-sm text-gray-600 mt-1">
|
||||||
|
DAX-Konzern-Vergleich auf Basis aller bisher gepruefter Sites.
|
||||||
|
Mit Anonymize-Toggle fuer Wirtschaftspruefer-Demos.
|
||||||
|
</p>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
{/* Filter-Leiste */}
|
||||||
|
<div className="bg-white border border-gray-200 rounded-lg p-4 mb-4 flex flex-wrap gap-3 items-end">
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs font-medium text-gray-700 mb-1">Branche</label>
|
||||||
|
<select value={industry} onChange={e => setIndustry(e.target.value)}
|
||||||
|
className="px-3 py-2 border rounded text-sm">
|
||||||
|
{INDUSTRIES.map(i => <option key={i.id} value={i.id}>{i.label}</option>)}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-[300px]">
|
||||||
|
<label className="block text-xs font-medium text-gray-700 mb-1">
|
||||||
|
Sites (komma-getrennt) oder Preset wählen
|
||||||
|
</label>
|
||||||
|
<input value={sites} onChange={e => setSites(e.target.value)}
|
||||||
|
placeholder="Volkswagen,BMW,Mercedes-Benz"
|
||||||
|
className="w-full px-3 py-2 border rounded text-sm font-mono" />
|
||||||
|
<div className="flex flex-wrap gap-1 mt-1">
|
||||||
|
{PRESET_GROUPS.map(p => (
|
||||||
|
<button key={p.id} onClick={() => setSites(p.sites)}
|
||||||
|
className="px-2 py-0.5 text-[10px] bg-gray-100 hover:bg-gray-200 rounded">
|
||||||
|
{p.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<label className="flex items-center gap-2 text-sm cursor-pointer">
|
||||||
|
<input type="checkbox" checked={anonymized}
|
||||||
|
onChange={e => setAnonymized(e.target.checked)}
|
||||||
|
className="rounded" />
|
||||||
|
<span><strong>Anonymisieren</strong> (OEM 1/2/3 statt Hersteller-Namen)</span>
|
||||||
|
</label>
|
||||||
|
<button onClick={fetchData} disabled={loading}
|
||||||
|
className="px-4 py-2 bg-purple-600 text-white rounded font-medium hover:bg-purple-700 disabled:opacity-50">
|
||||||
|
{loading ? 'Lade…' : 'Aktualisieren'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="bg-red-50 border border-red-200 text-red-700 rounded p-3 text-sm mb-4">
|
||||||
|
Fehler: {error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Summary-KPIs */}
|
||||||
|
{data?.summary && (
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-5 gap-2 mb-4">
|
||||||
|
<Kpi label="Sites im Vergleich" value={data.summary.n_sites} />
|
||||||
|
<Kpi label="⌀ Vendors" value={data.summary.avg_vendors} />
|
||||||
|
<Kpi label="⌀ US-Anteil" value={`${data.summary.avg_us_pct}%`}
|
||||||
|
tone={data.summary.avg_us_pct > 60 ? 'warn' : 'ok'} />
|
||||||
|
<Kpi label="⌀ Score" value={data.summary.avg_score || '—'} />
|
||||||
|
<Kpi label="Saving-Potenzial (Σ)" value={`${Math.round(data.summary.total_saving_high/1000)}k €`}
|
||||||
|
tone="ok" />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Vergleichstabelle */}
|
||||||
|
{data?.kpis && data.kpis.length > 0 ? (
|
||||||
|
<div className="bg-white border border-gray-200 rounded-lg overflow-x-auto">
|
||||||
|
<table className="w-full text-xs">
|
||||||
|
<thead className="bg-gray-50 text-gray-700">
|
||||||
|
<tr>
|
||||||
|
<th className="text-left px-3 py-2 sticky left-0 bg-gray-50">Site</th>
|
||||||
|
<th className="text-right px-2 py-2">Score</th>
|
||||||
|
<th className="text-right px-2 py-2">Vendors</th>
|
||||||
|
<th className="text-right px-2 py-2">US%</th>
|
||||||
|
<th className="text-right px-2 py-2">Drittland%</th>
|
||||||
|
<th className="text-right px-2 py-2">Cookies Browser</th>
|
||||||
|
<th className="text-right px-2 py-2">Cookie-Doc kB</th>
|
||||||
|
<th className="text-center px-2 py-2">Banner</th>
|
||||||
|
<th className="text-left px-2 py-2">Provider</th>
|
||||||
|
<th className="text-right px-2 py-2">Banner-Verstöße</th>
|
||||||
|
<th className="text-right px-2 py-2">Saving € Jahr</th>
|
||||||
|
<th className="text-right px-2 py-2">Daten-Qualität</th>
|
||||||
|
<th className="text-left px-2 py-2">Captured</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{data.kpis.map((k, i) => (
|
||||||
|
<tr key={i} className={`border-t hover:bg-gray-50 ${i%2 ? 'bg-gray-50/30' : ''}`}>
|
||||||
|
<td className="px-3 py-2 font-semibold sticky left-0 bg-inherit">
|
||||||
|
{k.site_label}
|
||||||
|
<div className="text-[9px] text-gray-400 font-mono">{k.check_id}</div>
|
||||||
|
</td>
|
||||||
|
<td className={`px-2 py-2 text-right ${
|
||||||
|
!k.compliance_score ? 'text-gray-400' :
|
||||||
|
k.compliance_score >= 80 ? 'text-green-700' :
|
||||||
|
k.compliance_score >= 60 ? 'text-amber-700' : 'text-red-700'
|
||||||
|
}`}>
|
||||||
|
{k.compliance_score ?? '—'}
|
||||||
|
</td>
|
||||||
|
<td className="px-2 py-2 text-right font-mono">{k.vendors_total}</td>
|
||||||
|
<td className={`px-2 py-2 text-right ${k.us_pct > 60 ? 'text-red-700 font-semibold' : ''}`}>
|
||||||
|
{k.us_pct}%
|
||||||
|
</td>
|
||||||
|
<td className={`px-2 py-2 text-right ${k.non_eu_pct > 70 ? 'text-red-700' : ''}`}>
|
||||||
|
{k.non_eu_pct}%
|
||||||
|
</td>
|
||||||
|
<td className="px-2 py-2 text-right font-mono">{k.cookies_in_browser}</td>
|
||||||
|
<td className="px-2 py-2 text-right text-gray-500">
|
||||||
|
{Math.round(k.cookie_doc_chars / 1000)}k
|
||||||
|
</td>
|
||||||
|
<td className="px-2 py-2 text-center">{k.banner_detected ? '✓' : '✗'}</td>
|
||||||
|
<td className="px-2 py-2 text-gray-600">{k.banner_provider || '—'}</td>
|
||||||
|
<td className={`px-2 py-2 text-right ${k.banner_violations ? 'text-red-700' : 'text-gray-400'}`}>
|
||||||
|
{k.banner_violations || 0}
|
||||||
|
</td>
|
||||||
|
<td className="px-2 py-2 text-right text-green-700 font-mono">
|
||||||
|
{k.saving_high_eur ? `${(k.saving_high_eur/1000).toFixed(0)}k` : '—'}
|
||||||
|
</td>
|
||||||
|
<td className={`px-2 py-2 text-right ${
|
||||||
|
k.data_quality_pct >= 70 ? 'text-green-700' :
|
||||||
|
k.data_quality_pct >= 40 ? 'text-amber-700' : 'text-red-700'
|
||||||
|
}`}>
|
||||||
|
{k.data_quality_pct}%
|
||||||
|
</td>
|
||||||
|
<td className="px-2 py-2 text-[10px] text-gray-500">
|
||||||
|
{k.captured_at?.substring(0, 16).replace('T', ' ')}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
) : !loading && (
|
||||||
|
<div className="bg-gray-50 border border-gray-200 rounded-lg p-8 text-center text-gray-500">
|
||||||
|
Keine Snapshots gefunden — Filter anpassen oder einen Audit-Lauf starten.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className="mt-4 text-xs text-gray-500">
|
||||||
|
<strong>Big-4-Hinweis:</strong> Mit Anonymize-Toggle koennen wir den
|
||||||
|
kompletten Branchen-Cut zeigen ohne Hersteller-Namen zu nennen
|
||||||
|
(z.B. "OEM 3 hat 78% US-Vendor-Anteil"). Damit ist die Daten-
|
||||||
|
Hoheit bei BreakPilot und Big 4 sieht den Mehrwert ohne dass
|
||||||
|
Wettbewerber-Vergleiche extern werden.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function Kpi({ label, value, tone = 'neutral' }: {
|
||||||
|
label: string; value: any; tone?: 'ok' | 'warn' | 'bad' | 'neutral'
|
||||||
|
}) {
|
||||||
|
const colors: Record<string, string> = {
|
||||||
|
ok: 'text-green-700 bg-green-50 border-green-200',
|
||||||
|
warn: 'text-amber-700 bg-amber-50 border-amber-200',
|
||||||
|
bad: 'text-red-700 bg-red-50 border-red-200',
|
||||||
|
neutral: 'text-gray-700 bg-white border-gray-200',
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<div className={`border rounded p-3 ${colors[tone]}`}>
|
||||||
|
<div className="text-[10px] uppercase tracking-wider opacity-70">{label}</div>
|
||||||
|
<div className="text-xl font-bold mt-1">{value}</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -207,6 +207,42 @@ async def get_snapshot(snapshot_id: str):
|
|||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/admin/benchmark")
|
||||||
|
async def benchmark(
|
||||||
|
industry: str = "",
|
||||||
|
sites: str = "",
|
||||||
|
anonymized: bool = False,
|
||||||
|
limit: int = 50,
|
||||||
|
):
|
||||||
|
"""P107 — Branchen-Benchmark-Cockpit Endpoint.
|
||||||
|
industry: 'automotive' / 'banking' / etc (optional)
|
||||||
|
sites: comma-separated site_label list (optional)
|
||||||
|
anonymized: bool — wenn true, Hersteller-Namen → 'OEM 1/2/3'
|
||||||
|
"""
|
||||||
|
from database import SessionLocal
|
||||||
|
from compliance.services.benchmark_extractor import (
|
||||||
|
load_snapshots_for_benchmark, anonymize_kpis,
|
||||||
|
build_benchmark_summary,
|
||||||
|
)
|
||||||
|
site_list = [s.strip() for s in sites.split(",") if s.strip()] if sites else None
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
kpis = load_snapshots_for_benchmark(
|
||||||
|
db, industry=industry or None, sites=site_list, limit=limit,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
if anonymized:
|
||||||
|
kpis = anonymize_kpis(kpis, industry=industry)
|
||||||
|
return {
|
||||||
|
"industry": industry or "all",
|
||||||
|
"anonymized": anonymized,
|
||||||
|
"sites": [k.get("site_label") for k in kpis],
|
||||||
|
"kpis": kpis,
|
||||||
|
"summary": build_benchmark_summary(kpis),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/admin/tcf-ingest")
|
@router.post("/admin/tcf-ingest")
|
||||||
async def tcf_ingest():
|
async def tcf_ingest():
|
||||||
"""P105 — IAB TCF Vendor-Liste ingestieren / refreshen.
|
"""P105 — IAB TCF Vendor-Liste ingestieren / refreshen.
|
||||||
|
|||||||
@@ -0,0 +1,265 @@
|
|||||||
|
"""
|
||||||
|
P107 — Branchen-Benchmark-KPIs pro Snapshot.
|
||||||
|
|
||||||
|
Extrahiert aus einem compliance_check_snapshot 18 KPIs die fuer den
|
||||||
|
Multi-Site-Vergleich relevant sind. Wird vom /admin/benchmark Endpoint
|
||||||
|
genutzt um Vergleichstabellen zu rendern.
|
||||||
|
|
||||||
|
USP: keine andere Compliance-Software gibt einen Wirtschaftspruefer
|
||||||
|
einen so granularen Branchen-Querschnitt. Bei DAX-Konzernen ist das
|
||||||
|
ein echtes Verkaufs-Asset (Big 4 koennen es ihren Kunden als
|
||||||
|
'wir sehen die ganze Branche' verkaufen).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy import text as sa_text
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
_US_COUNTRIES = {"US", "USA", "United States"}
|
||||||
|
_NON_EU = {"US", "CN", "RU", "IN", "JP", "BR", "AU", "CA", "KR",
|
||||||
|
"MX", "ZA", "TR", "SG", "TW", "HK"}
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_int(v: Any, default: int = 0) -> int:
|
||||||
|
try:
|
||||||
|
return int(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _country_from_vendor(v: dict) -> str:
|
||||||
|
c = (v.get("country") or "").strip().upper()
|
||||||
|
if c:
|
||||||
|
return c
|
||||||
|
# Aus vendor_country wenn vorhanden (TCF-Authority Eintraege)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def extract_kpis(snapshot: dict) -> dict:
|
||||||
|
"""Liefert 18 KPIs aus einem snapshot-row.
|
||||||
|
|
||||||
|
Snapshot-row keys: id, check_id, site_label, site_domain, created_at,
|
||||||
|
banner_result, cmp_vendors, doc_entries, scan_context.
|
||||||
|
"""
|
||||||
|
br = snapshot.get("banner_result") or {}
|
||||||
|
cv = snapshot.get("cmp_vendors") or []
|
||||||
|
de = snapshot.get("doc_entries") or []
|
||||||
|
sc = snapshot.get("scan_context") or {}
|
||||||
|
|
||||||
|
# Banner-Phase Cookies
|
||||||
|
phases = br.get("phases") or {}
|
||||||
|
after_accept = (phases.get("after_accept") or {})
|
||||||
|
cookies_in_browser = len(after_accept.get("cookies") or [])
|
||||||
|
cd = br.get("cookies_detailed") or []
|
||||||
|
|
||||||
|
# Doc-Text Lengths
|
||||||
|
doc_text_total = sum(len((d.get("text") or "")) for d in de)
|
||||||
|
cookie_doc_len = next(
|
||||||
|
(len(d.get("text") or "") for d in de if d.get("doc_type") == "cookie"), 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Vendor breakdown
|
||||||
|
n_vendors = len(cv)
|
||||||
|
countries = [_country_from_vendor(v) for v in cv]
|
||||||
|
countries = [c for c in countries if c]
|
||||||
|
n_us = sum(1 for c in countries if c in _US_COUNTRIES)
|
||||||
|
n_non_eu = sum(1 for c in countries if c in _NON_EU)
|
||||||
|
us_pct = round(n_us / max(1, n_vendors) * 100, 1)
|
||||||
|
non_eu_pct = round(n_non_eu / max(1, n_vendors) * 100, 1)
|
||||||
|
|
||||||
|
# Vendor-Source-Mix
|
||||||
|
by_src: dict[str, int] = {}
|
||||||
|
for v in cv:
|
||||||
|
for s in (v.get("source") or "?").split(";"):
|
||||||
|
s = s.strip() or "?"
|
||||||
|
by_src[s] = by_src.get(s, 0) + 1
|
||||||
|
|
||||||
|
# Cookies pro Vendor (Konzentration)
|
||||||
|
cookie_counts = [len(v.get("cookies") or []) for v in cv]
|
||||||
|
max_cookies_per_vendor = max(cookie_counts) if cookie_counts else 0
|
||||||
|
avg_cookies_per_vendor = (
|
||||||
|
round(sum(cookie_counts) / max(1, len(cookie_counts)), 1)
|
||||||
|
if cookie_counts else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Banner-Checks
|
||||||
|
bc = br.get("banner_checks") or {}
|
||||||
|
n_banner_violations = len(bc.get("violations") or [])
|
||||||
|
banner_detected = bool(br.get("banner_detected"))
|
||||||
|
|
||||||
|
# Compliance-Score (best effort)
|
||||||
|
score = br.get("compliance_score") or br.get("completeness_pct")
|
||||||
|
|
||||||
|
# Estimated Saving (Lizenz-Konsolidierung, Heuristik)
|
||||||
|
# Pro 5 Vendor ueber Median (10) rechnen wir ~5k EUR/Jahr Einsparung
|
||||||
|
median_vendors = 10
|
||||||
|
saving_low = max(0, (n_vendors - median_vendors)) * 1000
|
||||||
|
saving_high = max(0, (n_vendors - median_vendors)) * 5000
|
||||||
|
|
||||||
|
return {
|
||||||
|
# Header
|
||||||
|
"check_id": snapshot.get("check_id"),
|
||||||
|
"site_label": snapshot.get("site_label"),
|
||||||
|
"site_domain": snapshot.get("site_domain"),
|
||||||
|
"captured_at": (snapshot.get("created_at").isoformat()
|
||||||
|
if snapshot.get("created_at") else None),
|
||||||
|
"industry": (sc or {}).get("industry") or "",
|
||||||
|
# Vendor-KPIs
|
||||||
|
"vendors_total": n_vendors,
|
||||||
|
"vendors_us": n_us,
|
||||||
|
"vendors_non_eu": n_non_eu,
|
||||||
|
"us_pct": us_pct,
|
||||||
|
"non_eu_pct": non_eu_pct,
|
||||||
|
"source_breakdown": by_src,
|
||||||
|
"max_cookies_per_vendor": max_cookies_per_vendor,
|
||||||
|
"avg_cookies_per_vendor": avg_cookies_per_vendor,
|
||||||
|
# Cookie-KPIs
|
||||||
|
"cookies_in_browser": cookies_in_browser,
|
||||||
|
"cookies_detailed_count": len(cd),
|
||||||
|
"cookie_doc_chars": cookie_doc_len,
|
||||||
|
"doc_text_chars_total": doc_text_total,
|
||||||
|
# Banner
|
||||||
|
"banner_detected": banner_detected,
|
||||||
|
"banner_provider": br.get("banner_provider") or "",
|
||||||
|
"banner_violations": n_banner_violations,
|
||||||
|
# Compliance / Score
|
||||||
|
"compliance_score": score,
|
||||||
|
# Saving (Heuristik)
|
||||||
|
"saving_low_eur": saving_low,
|
||||||
|
"saving_high_eur": saving_high,
|
||||||
|
# Capture-Quality (wie viele unserer 10+ Audit-Quellen liefern Daten)
|
||||||
|
"data_quality_pct": _quality_pct(snapshot),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _quality_pct(snapshot: dict) -> int:
|
||||||
|
"""Wieviel Prozent der erwarteten Datenquellen haben Inhalt?"""
|
||||||
|
br = snapshot.get("banner_result") or {}
|
||||||
|
cv = snapshot.get("cmp_vendors") or []
|
||||||
|
de = snapshot.get("doc_entries") or []
|
||||||
|
cd = br.get("cookies_detailed") or []
|
||||||
|
aa = (br.get("phases") or {}).get("after_accept") or {}
|
||||||
|
|
||||||
|
checks = [
|
||||||
|
br.get("banner_detected") is True,
|
||||||
|
len(cv) > 0,
|
||||||
|
len(de) > 0,
|
||||||
|
len(cd) > 0,
|
||||||
|
len(aa.get("cookies") or []) > 0,
|
||||||
|
any((d.get("text") or "") for d in de),
|
||||||
|
br.get("compliance_score") is not None or br.get("completeness_pct") is not None,
|
||||||
|
]
|
||||||
|
return round(sum(1 for x in checks if x) / len(checks) * 100)
|
||||||
|
|
||||||
|
|
||||||
|
def load_snapshots_for_benchmark(
|
||||||
|
db: Session,
|
||||||
|
industry: str | None = None,
|
||||||
|
sites: list[str] | None = None,
|
||||||
|
limit: int = 50,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Liefert dicts mit Snapshot-Daten + extracted KPIs."""
|
||||||
|
where = []
|
||||||
|
params: dict[str, Any] = {}
|
||||||
|
if industry:
|
||||||
|
where.append("(scan_context->>'industry') = :ind")
|
||||||
|
params["ind"] = industry
|
||||||
|
if sites:
|
||||||
|
where.append("site_label = ANY(:sites)")
|
||||||
|
params["sites"] = sites
|
||||||
|
where_sql = " AND ".join(where) if where else "TRUE"
|
||||||
|
|
||||||
|
sql = (
|
||||||
|
"SELECT id::text, check_id, site_label, site_domain, created_at, "
|
||||||
|
" banner_result, cmp_vendors, doc_entries, scan_context "
|
||||||
|
"FROM compliance.compliance_check_snapshots "
|
||||||
|
f"WHERE {where_sql} "
|
||||||
|
"ORDER BY created_at DESC LIMIT :lim"
|
||||||
|
)
|
||||||
|
params["lim"] = limit
|
||||||
|
|
||||||
|
rows = db.execute(sa_text(sql), params).fetchall()
|
||||||
|
out: list[dict] = []
|
||||||
|
for r in rows:
|
||||||
|
import json as _j
|
||||||
|
def _parse(v):
|
||||||
|
if isinstance(v, (dict, list)) or v is None:
|
||||||
|
return v
|
||||||
|
try:
|
||||||
|
return _j.loads(v)
|
||||||
|
except Exception:
|
||||||
|
return v
|
||||||
|
snap = {
|
||||||
|
"id": r[0],
|
||||||
|
"check_id": r[1],
|
||||||
|
"site_label": r[2],
|
||||||
|
"site_domain": r[3],
|
||||||
|
"created_at": r[4],
|
||||||
|
"banner_result": _parse(r[5]),
|
||||||
|
"cmp_vendors": _parse(r[6]) or [],
|
||||||
|
"doc_entries": _parse(r[7]) or [],
|
||||||
|
"scan_context": _parse(r[8]) or {},
|
||||||
|
}
|
||||||
|
out.append(extract_kpis(snap))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def anonymize_kpis(kpis: list[dict], industry: str = "") -> list[dict]:
|
||||||
|
"""Ersetzt site_label durch 'OEM 1', 'OEM 2' etc.
|
||||||
|
Industry-Prefix waehlbar (Automotive→OEM, Banking→Bank, Chemie→Chem).
|
||||||
|
"""
|
||||||
|
prefix_map = {
|
||||||
|
"automotive": "OEM",
|
||||||
|
"banking": "Bank",
|
||||||
|
"chemistry": "Chem",
|
||||||
|
"luftfahrt": "Airline",
|
||||||
|
"saas": "SaaS",
|
||||||
|
"ecommerce": "Shop",
|
||||||
|
}
|
||||||
|
pfx = prefix_map.get(industry.lower(), "Site")
|
||||||
|
# Stable alphabetical numbering for determinism
|
||||||
|
seen: dict[str, str] = {}
|
||||||
|
next_idx = 1
|
||||||
|
out = []
|
||||||
|
for k in sorted(kpis, key=lambda x: (x.get("site_label") or "")):
|
||||||
|
sl = k.get("site_label") or ""
|
||||||
|
if sl not in seen:
|
||||||
|
seen[sl] = f"{pfx} {next_idx}"
|
||||||
|
next_idx += 1
|
||||||
|
anon_k = dict(k)
|
||||||
|
anon_k["site_label"] = seen[sl]
|
||||||
|
anon_k["site_domain"] = f"site-{next_idx-1}.example"
|
||||||
|
out.append(anon_k)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def build_benchmark_summary(kpis: list[dict]) -> dict:
|
||||||
|
"""Aggregate-Stats fuer den ganzen Branchen-Cut."""
|
||||||
|
if not kpis:
|
||||||
|
return {}
|
||||||
|
def avg(field: str) -> float:
|
||||||
|
vals = [k.get(field) for k in kpis if isinstance(k.get(field), (int, float))]
|
||||||
|
return round(sum(vals) / max(1, len(vals)), 1) if vals else 0
|
||||||
|
def maxv(field: str):
|
||||||
|
vals = [k.get(field) for k in kpis if isinstance(k.get(field), (int, float))]
|
||||||
|
return max(vals) if vals else 0
|
||||||
|
return {
|
||||||
|
"n_sites": len(kpis),
|
||||||
|
"avg_vendors": avg("vendors_total"),
|
||||||
|
"avg_us_pct": avg("us_pct"),
|
||||||
|
"avg_non_eu_pct": avg("non_eu_pct"),
|
||||||
|
"avg_cookies_browser": avg("cookies_in_browser"),
|
||||||
|
"avg_score": avg("compliance_score"),
|
||||||
|
"max_vendors": maxv("vendors_total"),
|
||||||
|
"max_saving_high": maxv("saving_high_eur"),
|
||||||
|
"total_saving_low": sum(k.get("saving_low_eur") or 0 for k in kpis),
|
||||||
|
"total_saving_high": sum(k.get("saving_high_eur") or 0 for k in kpis),
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user