feat(vvt): recipient-type classification + 3-section VVT table
Per user request: BMW (and others) put their own services AND external
vendors in the same cookie-policy widget. The VVT-Tabelle now groups
them by Art. 30(1)(d) DSGVO recipient category so the DSB can act on
the right buckets:
- INTERNAL — owner processing for itself ('BMW AG — XYZ')
- GROUP_COMPANY — same brand family, different legal entity ('BMW Bank')
- PROCESSOR — Auftragsverarbeiter, AVV-pflichtig (Adobe, Akamai)
- CONTROLLER — independent / joint controller (Meta Pixel, Google
Ads, LinkedIn — they run their own profiles)
- AUTHORITY — government bodies (rare in cookies)
- OTHER — fallback
New module vendor_classifier.py:
- owner_from_url(url) — derive site-owner token (bmw.de -> 'BMW',
mercedes-benz.de -> 'Mercedes-Benz')
- classify(name, category, owner) — strict 5-tier heuristic:
* INTERNAL: vendor name first-token is '<Owner>' / '<Owner> AG' /
'<Owner> SE' / '<Owner> GmbH' / '<Owner> AG & Co. KG'
* GROUP_COMPANY: starts with '<Owner> ' but isn't '<Owner> AG'
* CONTROLLER: matches a known joint-controller list (Meta, Google
Ads, YouTube, LinkedIn Insight, TikTok, Pinterest, Taboola,
Outbrain, Criteo, Twitter, Reddit, ...)
* PROCESSOR: legal-form suffix in name (GmbH, AG, Inc., A/S,
B.V., S.A., Ltd., LLC, ...)
* OTHER: anything else
vendor_extractor.extract_vendors_from_payloads now takes owner_name:
- Passes it through to classify() for every extracted vendor record
- The route derives owner_name via _company_name_from_url(doc_entries)
- LLM-extracted vendors are classified the same way (so V3 fallback
also produces tagged records)
agent_doc_check_extras.build_vvt_table_html rewritten:
- Buckets vendors by recipient_type
- Renders one section per non-empty bucket, in canonical order
(RECIPIENT_TYPE_SECTIONS), each with section header + count + bad
count + nested table
- Within each section: sorted by compliance_score ascending
- Response JSON cmp_vendors includes recipient_type so the frontend
can later import per-category into the VVT module
Expected BMW result: ~60 INTERNAL rows (BMW AG own services),
~25 PROCESSOR rows (Adobe, Adform, Akamai, AWS, ...), ~5 CONTROLLER
rows (Meta Pixel, Google, LinkedIn, Pinterest, Outbrain, Taboola).
This commit is contained in:
@@ -237,58 +237,32 @@ def _category_label(kat: str) -> str:
|
||||
def build_vvt_table_html(vendors: list[dict]) -> str:
|
||||
"""Render the per-vendor VVT-style table for the email report.
|
||||
|
||||
One row per vendor. Columns: Name | Kategorie | Sitz | Cookies |
|
||||
Opt-Out (Status) | Privacy (Status) | Compliance-Score.
|
||||
Splits vendors into 3-4 sections by recipient_type (Art. 30(1)(d)
|
||||
DSGVO):
|
||||
|
||||
Vendors are expected to come from vendor_extractor.extract_vendors_from_payloads
|
||||
and have already been scored by cookie_link_validator.score_vendors.
|
||||
1. INTERNAL — own departments / own systems
|
||||
2. GROUP_COMPANY — parent/subsidiary (if any)
|
||||
3. PROCESSOR — Auftragsverarbeiter (AVV-pflichtig)
|
||||
4. CONTROLLER — joint/independent controllers (Meta, Google,
|
||||
LinkedIn — they build own profiles)
|
||||
5. AUTHORITY / OTHER — rest
|
||||
|
||||
Within each section: rows sorted by compliance_score ascending so
|
||||
the weakest entries surface first.
|
||||
"""
|
||||
if not vendors:
|
||||
return ""
|
||||
|
||||
vendors = sorted(vendors, key=lambda v: v.get("compliance_score", 0))
|
||||
rows: list[str] = []
|
||||
# Import here to avoid pulling backend service deps at module load
|
||||
from compliance.services.vendor_classifier import RECIPIENT_TYPE_SECTIONS
|
||||
|
||||
# Bucket vendors by recipient_type
|
||||
by_type: dict[str, list[dict]] = {}
|
||||
for v in vendors:
|
||||
name = v.get("name") or "Unbekannt"
|
||||
category = _category_label(v.get("category", ""))
|
||||
country = v.get("country") or "—"
|
||||
cookies = v.get("cookies") or []
|
||||
n_cookies = len(cookies)
|
||||
score = int(v.get("compliance_score", 0))
|
||||
flags = v.get("compliance_flags") or []
|
||||
|
||||
opt_status = _link_status_badge(
|
||||
v.get("opt_out_url"), v.get("opt_out_ok"),
|
||||
v.get("opt_out_status"),
|
||||
)
|
||||
privacy_status = _link_status_badge(
|
||||
v.get("privacy_policy_url"), v.get("privacy_ok"),
|
||||
v.get("privacy_status"),
|
||||
)
|
||||
|
||||
score_color = ("#16a34a" if score >= 80 else
|
||||
"#d97706" if score >= 50 else "#dc2626")
|
||||
flag_str = ""
|
||||
if flags:
|
||||
flag_str = (
|
||||
f'<div style="font-size:10px;color:#94a3b8;margin-top:2px">'
|
||||
f'{", ".join(flags[:4])}</div>'
|
||||
)
|
||||
rows.append(
|
||||
f'<tr style="border-top:1px solid #e2e8f0">'
|
||||
f'<td style="padding:6px 8px;color:#1e293b;font-size:11px">'
|
||||
f'{name}{flag_str}</td>'
|
||||
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{category}</td>'
|
||||
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{country}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:center;color:#475569;font-size:11px">'
|
||||
f'{n_cookies}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:center">{opt_status}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:center">{privacy_status}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:right;font-weight:600;'
|
||||
f'color:{score_color};font-size:11px">{score}%</td>'
|
||||
f'</tr>'
|
||||
)
|
||||
rt = (v.get("recipient_type") or "OTHER").upper()
|
||||
by_type.setdefault(rt, []).append(v)
|
||||
|
||||
# Top summary
|
||||
n_total = len(vendors)
|
||||
n_critical = sum(1 for v in vendors if v.get("compliance_score", 0) < 50)
|
||||
summary = (
|
||||
@@ -297,15 +271,40 @@ def build_vvt_table_html(vendors: list[dict]) -> str:
|
||||
if n_critical else " — alle ueber 50%")
|
||||
)
|
||||
|
||||
return (
|
||||
out: list[str] = [
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:12px 16px;'
|
||||
'background:#fafafa;border:1px solid #e5e7eb;border-radius:8px">'
|
||||
'background:#fafafa;border:1px solid #e5e7eb;border-radius:8px">',
|
||||
'<h3 style="margin:0 0 4px;font-size:14px;color:#334155">'
|
||||
'VVT-Vorschlag: Drittanbieter aus Cookie-Richtlinie</h3>'
|
||||
'VVT-Vorschlag: Drittanbieter aus Cookie-Richtlinie</h3>',
|
||||
f'<p style="margin:0 0 10px;font-size:11px;color:#6b7280">{summary}. '
|
||||
'Sortiert nach Compliance-Score (niedrig zuerst — diese Eintraege '
|
||||
'pruefen).</p>'
|
||||
'Gruppiert nach Empfaengerkategorie (Art. 30(1)(d) DSGVO), innerhalb '
|
||||
'jeder Gruppe nach Compliance-Score sortiert.</p>',
|
||||
]
|
||||
|
||||
for rtype, section_label in RECIPIENT_TYPE_SECTIONS:
|
||||
rows = by_type.get(rtype) or []
|
||||
if not rows:
|
||||
continue
|
||||
rows = sorted(rows, key=lambda v: v.get("compliance_score", 0))
|
||||
n = len(rows)
|
||||
n_bad = sum(1 for v in rows if v.get("compliance_score", 0) < 50)
|
||||
bad_hint = (f' <span style="color:#dc2626">({n_bad} unter 50%)</span>'
|
||||
if n_bad else "")
|
||||
out.append(
|
||||
f'<h4 style="margin:14px 0 4px;font-size:12px;color:#1e293b;'
|
||||
f'border-top:1px solid #e2e8f0;padding-top:8px">'
|
||||
f'{section_label} <span style="color:#94a3b8;font-weight:400">'
|
||||
f'({n}){bad_hint}</span></h4>'
|
||||
)
|
||||
out.append(_render_vendor_section(rows))
|
||||
|
||||
out.append('</div>')
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _render_vendor_section(rows: list[dict]) -> str:
|
||||
body: list[str] = [
|
||||
'<table style="width:100%;border-collapse:collapse;font-size:11px">'
|
||||
'<thead><tr style="background:#f1f5f9;color:#475569;text-align:left">'
|
||||
'<th style="padding:5px 8px">Name</th>'
|
||||
@@ -315,9 +314,50 @@ def build_vvt_table_html(vendors: list[dict]) -> str:
|
||||
'<th style="padding:5px 8px;text-align:center">Opt-Out</th>'
|
||||
'<th style="padding:5px 8px;text-align:center">Privacy</th>'
|
||||
'<th style="padding:5px 8px;text-align:right">Score</th>'
|
||||
'</tr></thead><tbody>'
|
||||
+ "".join(rows)
|
||||
+ '</tbody></table></div>'
|
||||
'</tr></thead><tbody>',
|
||||
]
|
||||
for v in rows:
|
||||
body.append(_render_vendor_row_full(v))
|
||||
body.append('</tbody></table>')
|
||||
return "".join(body)
|
||||
|
||||
|
||||
def _render_vendor_row_full(v: dict) -> str:
|
||||
name = v.get("name") or "Unbekannt"
|
||||
category = _category_label(v.get("category", ""))
|
||||
country = v.get("country") or "—"
|
||||
cookies = v.get("cookies") or []
|
||||
n_cookies = len(cookies)
|
||||
score = int(v.get("compliance_score", 0))
|
||||
flags = v.get("compliance_flags") or []
|
||||
opt_status = _link_status_badge(
|
||||
v.get("opt_out_url"), v.get("opt_out_ok"), v.get("opt_out_status"),
|
||||
)
|
||||
privacy_status = _link_status_badge(
|
||||
v.get("privacy_policy_url"), v.get("privacy_ok"),
|
||||
v.get("privacy_status"),
|
||||
)
|
||||
score_color = ("#16a34a" if score >= 80 else
|
||||
"#d97706" if score >= 50 else "#dc2626")
|
||||
flag_str = ""
|
||||
if flags:
|
||||
flag_str = (
|
||||
f'<div style="font-size:10px;color:#94a3b8;margin-top:2px">'
|
||||
f'{", ".join(flags[:4])}</div>'
|
||||
)
|
||||
return (
|
||||
f'<tr style="border-top:1px solid #e2e8f0">'
|
||||
f'<td style="padding:6px 8px;color:#1e293b;font-size:11px">'
|
||||
f'{name}{flag_str}</td>'
|
||||
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{category}</td>'
|
||||
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{country}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:center;color:#475569;font-size:11px">'
|
||||
f'{n_cookies}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:center">{opt_status}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:center">{privacy_status}</td>'
|
||||
f'<td style="padding:6px 8px;text-align:right;font-weight:600;'
|
||||
f'color:{score_color};font-size:11px">{score}%</td>'
|
||||
f'</tr>'
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user