""" Aggregator: Doc-Check-Results + cmp_vendors + redundancy_report -> einheitliche Finding-Records fuer unified_findings_store. Speichert nur ABGELEITETE/normalisierte Findings (siehe Memory 'feedback_oem_data_legal.md'): keine rohen CMP-Cookie-Texte, keine 1:1-Spiegelung fremder Vendor-Listen — nur eigene Risk-/Status-Bewertung. Hook: from compliance.services.unified_findings_collector import collect from compliance.services.unified_findings_store import record_findings findings = collect(check_id, results, cmp_vendors, redundancy_report, doc_texts) record_findings(check_id, findings) """ from __future__ import annotations import logging from typing import Any logger = logging.getLogger(__name__) _SEVERITY_DEFAULT = { "mc": "MEDIUM", "pflichtangabe": "MEDIUM", "vendor": "MEDIUM", "redundanz": "LOW", } # Mapping cmp_vendor.flag → action_recipe key + Default-Severity _VENDOR_FLAG_SEVERITY = { "no_cookies_listed": ("HIGH", "Cookie-Auflistung fehlt"), "no_country": ("MEDIUM", "Sitzland des Anbieters fehlt"), "no_privacy_url": ("HIGH", "Datenschutzerklaerung des Anbieters fehlt"), "broken_privacy_url": ("HIGH", "Datenschutz-URL nicht erreichbar"), "no_opt_out_url": ("MEDIUM", "Widerspruchs-/Opt-Out-Link fehlt"), "broken_opt_out": ("MEDIUM", "Opt-Out-Link nicht erreichbar"), "no_name": ("HIGH", "Anbieter-Name fehlt"), "no_purpose": ("HIGH", "Verarbeitungszweck fehlt"), "cookies_no_expiry": ("LOW", "Cookie-Speicherdauer fehlt"), "cookies_no_names": ("LOW", "Cookie-Namen fehlen"), } def _safe_recipe(key: str) -> dict: """Lookup mit lazy-import — recipes-Modul ist optional.""" try: from compliance.services.finding_action_recipes import recipe_for r = recipe_for(key) return dict(r) if r else {} except Exception: return {} def _safe_anchor(label: str, doc_text: str, doc_id: str) -> dict: """Anchor-Lookup mit lazy-import + best-effort.""" if not label or not doc_text: return {} try: from compliance.services.doc_anchor_locator import locate_anchor a = locate_anchor(label, doc_text, doc_id) return a or {} except Exception: return {} def _from_doc_check( check_id: str, r: Any, doc_text: str, ) -> list[dict]: """Convert one DocCheckResult into unified-finding rows.""" out: list[dict] = [] if r.error and r.error.startswith("Nicht anwendbar"): out.append({ "source_type": "pflichtangabe", "doc_type": r.doc_type, "severity": "INFO", "status": "na", "regulation": "", "label": f"{r.label}: {r.error}", "hint": r.error, "action_recipe": {}, "payload": {"scenario": r.scenario}, }) return out if r.error: out.append({ "source_type": "pflichtangabe", "doc_type": r.doc_type, "severity": "HIGH", "status": "failed", "regulation": "", "label": f"{r.label}: Dokument nicht erreichbar", "hint": r.error[:400], "action_recipe": {}, "payload": {}, }) return out for c in (r.checks or []): is_mc = (c.id or "").startswith("mc-") source = "mc" if is_mc else "pflichtangabe" if c.passed: status = "passed" elif c.skipped: status = "skipped" else: status = "failed" severity = (c.severity or _SEVERITY_DEFAULT[source]).upper() # Nur fuer Fails Anchor + Recipe — Pass-Eintraege halten wir mager recipe: dict = {} anchor: dict = {} if status == "failed": # Recipe per Label-Substring (mehr als nur exakte Keys) recipe = _safe_recipe(c.label or "") or _safe_recipe(c.id or "") anchor = _safe_anchor(c.label or "", doc_text, r.doc_type) out.append({ "source_type": source, "doc_type": r.doc_type, "severity": severity, "status": status, "regulation": c.regulation or "", "label": c.label or "", "hint": c.hint or "", "action_recipe": recipe, "anchor_excerpt": (anchor.get("anchor_phrase") or "")[:800], "anchor_conf": _conf_to_score(anchor), "payload": { "mc_id": c.id, "level": c.level, "parent": c.parent, "matched_text": (c.matched_text or "")[:300], "article": c.article or "", "anchor_method": anchor.get("method"), "anchor_position": anchor.get("position_hint"), }, }) return out def _conf_to_score(anchor: dict) -> float: if not anchor: return 0.0 try: return float(anchor.get("score") or 0.0) except (TypeError, ValueError): return 0.0 def _from_vendors(check_id: str, vendors: list[dict]) -> list[dict]: """Per-vendor flag -> finding row.""" out: list[dict] = [] for v in vendors or []: name = v.get("name") or v.get("vendor_name") or "Unbekannter Anbieter" country = v.get("country") or "" risk = v.get("compliance_risk") or {} for flag in (v.get("compliance_flags") or v.get("flags") or []): sev, label = _VENDOR_FLAG_SEVERITY.get( flag, ("LOW", flag.replace("_", " ").title()), ) out.append({ "source_type": "vendor", "doc_type": "-", "severity": sev, "status": "failed", "regulation": "DSGVO", "label": f"{name} — {label}", "hint": _vendor_hint(flag, name), "action_recipe": _safe_recipe(flag), "vendor_name": name, "category": (v.get("category") or "")[:64], "payload": { "flag": flag, "country": country, "compliance_score": v.get("compliance_score"), "category": v.get("category"), "risk_label": risk.get("label"), "high_risk_cookies": risk.get("high_risk_cookie_count"), "schrems_ii_cookies": risk.get("schrems_ii_affected_cookies"), }, }) return out def _vendor_hint(flag: str, name: str) -> str: hints = { "no_cookies_listed": f"Bei '{name}' sind keine Cookies dokumentiert — DSK-Orientierungshilfe " "verlangt Name + Zweck + Speicherdauer pro Cookie.", "no_country": f"Sitzland von '{name}' fehlt — bei Drittland-Anbieter " "Art. 44 ff. DSGVO erforderlich.", "no_privacy_url": f"Link zur Datenschutzerklaerung von '{name}' fehlt — Art. 13 Abs. 1 lit. e.", "broken_privacy_url": f"Privacy-URL von '{name}' nicht erreichbar (404/Timeout).", "no_opt_out_url": f"Opt-Out/Widerspruchs-Link fuer '{name}' fehlt — Art. 21 DSGVO.", "broken_opt_out": f"Opt-Out-Link von '{name}' nicht erreichbar.", "no_name": "Anbieter ohne Name erfasst — Art. 13 Abs. 1 lit. a.", "no_purpose": f"Verarbeitungszweck fuer '{name}' fehlt — Art. 13 Abs. 1 lit. c.", } return hints.get(flag, f"Flag: {flag}") def _from_redundancies(check_id: str, report: dict | None) -> list[dict]: """Each redundancy category -> finding row (status='info', sev='LOW').""" if not report: return [] out: list[dict] = [] for r in (report.get("redundancies") or []): cat = r.get("category_label") or r.get("category") or "Unbekannt" vendors = r.get("vendors") or [] sav = r.get("estimated_saving_year_eur") or [0, 0] out.append({ "source_type": "redundanz", "doc_type": "-", "severity": "LOW", "status": "info", "regulation": "Cost-Optimization", "label": f"Mehrfach-Anbieter in '{cat}' ({len(vendors)} Tools)", "hint": ( f"Anbieter: {', '.join(vendors[:6])}" + (f" (+{len(vendors)-6} weitere)" if len(vendors) > 6 else "") + (f" · EU-Empfehlung: {r['suggested_eu_tool']}" if r.get("suggested_eu_tool") else "") ), "action_recipe": { "what": "Konsolidierung auf 1 Tool pro Kategorie pruefen.", "why": (r.get("consolidation_hint") or "Mehrfach-Lizenzen + Vertrags-Overhead reduzieren."), "fix_text": "Migrations-Plan zu einem Anbieter erarbeiten; " "Vertraege ueberlappend kuendigen.", }, "category": cat, "payload": { "vendors": vendors[:20], "saving_year_eur_low": sav[0], "saving_year_eur_high": sav[1], "suggested_eu_tool": r.get("suggested_eu_tool"), "caveats": (r.get("caveats") or [])[:4], }, }) return out def collect( check_id: str, results: list[Any], cmp_vendors: list[dict] | None, redundancy_report: dict | None, doc_texts: dict[str, str] | None = None, ) -> list[dict]: """Bundle all 4 finding sources into one list ready for record_findings().""" out: list[dict] = [] texts = doc_texts or {} for r in (results or []): try: out.extend(_from_doc_check(check_id, r, texts.get(r.doc_type, ""))) except Exception as e: logger.warning("collect: doc result %s failed: %s", getattr(r, "doc_type", "?"), e) try: out.extend(_from_vendors(check_id, cmp_vendors or [])) except Exception as e: logger.warning("collect: vendors failed: %s", e) try: out.extend(_from_redundancies(check_id, redundancy_report)) except Exception as e: logger.warning("collect: redundancies failed: %s", e) logger.info("collect: check=%s total_findings=%d", check_id, len(out)) return out