Files
breakpilot-core/control-pipeline/services/document_scope_resolver.py
Benjamin Admin 3ffa3f5793 feat(control-pipeline): add Document Compliance Engine — scope detection + document requirements
New service: document_scope_resolver.py with 28 document rules covering:
- Base (impressum, privacy_policy)
- Tracking (cookie_banner, cookie_policy)
- E-Commerce (AGB, withdrawal, shipping, pricing, payment)
- Digital (digital_content_terms, no_withdrawal_notice)
- SaaS (ToS, service_description, DPA, SLA)
- AI (transparency_notice, automated_decisions)
- Hardware (warranty, return, CE, safety)
- Environmental (WEEE, battery disposal)
- Marketplace (seller terms, ranking transparency)
- Subscription (cancellation terms)

API: POST /v1/document-compliance/required
Input: company flags + jurisdiction → Output: required documents + assessment

Includes confidence scoring, escalation detection (e.g. ecommerce
without distance_selling flag), and reasoning. 19 tests covering all
business model combinations including B2B-only exclusions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 08:39:55 +02:00

484 lines
16 KiB
Python

"""
Document Scope Resolver — determines which legal documents are required
based on company flags (website, ecommerce, SaaS, hardware, AI, etc.).
Deterministic, no LLM needed. Complements the Applicability Engine
(which handles Controls) with a document-level compliance layer.
Usage:
result = resolve_required_documents(
flags={"has_website": True, "has_ecommerce": True, "distance_selling": True},
jurisdiction="DE",
)
# result["required_documents"] → list of required document types
# result["assessment"] → confidence, escalation, reasoning
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field, asdict
from typing import Any, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Document Requirement Rules
# ---------------------------------------------------------------------------
DOCUMENT_RULES: dict[str, dict[str, Any]] = {
# === IMMER bei Website ===
"impressum": {
"label": "Impressum / Anbieterkennzeichnung",
"required_if_any": ["has_website"],
"jurisdiction": ["DE", "AT"],
"legal_basis": "TMG § 5 / MedienG § 24",
"mandatory": True,
"category": "base",
},
"privacy_policy": {
"label": "Datenschutzerklaerung",
"required_if_any": ["has_website", "has_user_accounts"],
"jurisdiction": ["DE", "AT", "EU"],
"legal_basis": "DSGVO Art. 13/14",
"mandatory": True,
"category": "base",
},
# === Bei Tracking/Cookies ===
"cookie_banner": {
"label": "Cookie-Banner mit Einwilligungsmanagement",
"required_if_any": ["uses_tracking", "uses_cookies_marketing"],
"not_required_if": ["strictly_necessary_cookies_only"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "TTDSG § 25, ePrivacy-RL 2002/58/EG",
"mandatory": True,
"category": "tracking",
},
"cookie_policy": {
"label": "Cookie-Richtlinie / Cookie-Details",
"required_if_any": ["uses_tracking", "uses_cookies_marketing"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "TTDSG § 25",
"mandatory": True,
"category": "tracking",
"note": "Kann Teil der Datenschutzerklaerung sein",
},
# === Bei E-Commerce ===
"terms_and_conditions": {
"label": "Allgemeine Geschaeftsbedingungen (AGB)",
"required_if_any": ["has_ecommerce", "has_saas"],
"jurisdiction": ["DE", "AT"],
"legal_basis": "BGB §§ 305ff",
"mandatory": True,
"category": "ecommerce",
},
"agb_checkout_summary": {
"label": "Vertragswesentliche Informationen im Checkout",
"required_if_any": ["has_checkout"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 8(2), BGB § 312j",
"mandatory": True,
"category": "ecommerce",
"note": "Preis, Laufzeit, Kuendigungsbedingungen VOR Bestellung sichtbar",
},
"withdrawal_policy": {
"label": "Widerrufsbelehrung",
"required_if_any": ["distance_selling"],
"not_required_if": ["b2b_only"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 9-16, BGB § 355ff",
"mandatory": True,
"category": "ecommerce",
},
"pricing_transparency": {
"label": "Preisangaben / Preistransparenz",
"required_if_any": ["has_ecommerce"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "PAngV, Consumer Rights Directive Art. 6(1)(e)",
"mandatory": True,
"category": "ecommerce",
},
"shipping_information": {
"label": "Versand- und Lieferinformationen",
"required_if_any": ["sells_physical_products"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 6(1)(g)",
"mandatory": True,
"category": "ecommerce",
},
"payment_terms": {
"label": "Zahlungsbedingungen und akzeptierte Zahlungsmittel",
"required_if_any": ["has_ecommerce"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 6(1)(g), BGB § 312d",
"mandatory": True,
"category": "ecommerce",
},
# === Digitaler Verkauf ===
"digital_content_terms": {
"label": "Vertragsbedingungen fuer digitale Inhalte",
"required_if_any": ["sells_digital_products"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 5(2), BGB § 327ff",
"mandatory": True,
"category": "digital",
},
"no_withdrawal_notice": {
"label": "Hinweis auf Widerrufsverzicht bei sofortiger Ausfuehrung",
"required_if_any": ["sells_digital_products"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 16(m), BGB § 356(5)",
"mandatory": True,
"category": "digital",
"note": "Checkbox: Zustimmung zur sofortigen Ausfuehrung + Kenntnis Widerrufsverlust",
},
# === SaaS ===
"terms_of_service": {
"label": "Nutzungsbedingungen / Terms of Service",
"required_if_any": ["has_saas"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "BGB, DSGVO",
"mandatory": True,
"category": "saas",
},
"service_description": {
"label": "Leistungsbeschreibung",
"required_if_any": ["has_saas"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 6(1)(a), BGB § 312d",
"mandatory": True,
"category": "saas",
},
"data_processing_agreement": {
"label": "Auftragsverarbeitungsvertrag (AVV/DPA)",
"required_if_any": ["has_saas"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "DSGVO Art. 28",
"mandatory": True,
"category": "saas",
"note": "Pflicht wenn personenbezogene Daten im Auftrag verarbeitet werden",
},
"sla": {
"label": "Service Level Agreement (SLA)",
"required_if_any": ["has_saas"],
"not_required_if": ["b2c_only"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Vertragsrecht",
"mandatory": False,
"category": "saas",
"note": "Empfohlen fuer B2B SaaS",
},
"acceptable_use_policy": {
"label": "Acceptable Use Policy / Nutzungsrichtlinie",
"required_if_any": ["has_saas", "operates_marketplace"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Vertragsrecht, DSA (EU) 2022/2065",
"mandatory": False,
"category": "saas",
},
# === KI ===
"ai_transparency_notice": {
"label": "KI-Transparenzhinweis",
"required_if_any": ["uses_ai"],
"jurisdiction": ["EU"],
"legal_basis": "AI Act Art. 52",
"mandatory": True,
"category": "ai",
},
"automated_decision_explanation": {
"label": "Erklaerung automatisierter Entscheidungen",
"required_if_any": ["automated_decisions"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "DSGVO Art. 22",
"mandatory": True,
"category": "ai",
},
# === Hardware ===
"warranty_information": {
"label": "Gewaehrleistungs- und Garantieinformationen",
"required_if_any": ["sells_physical_products"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "BGB § 437ff, Warenkauf-RL (EU) 2019/771",
"mandatory": True,
"category": "hardware",
},
"return_policy": {
"label": "Rueckgabe- und Ruecksendebedingungen",
"required_if_any": ["distance_selling"],
"not_required_if": ["b2b_only"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Consumer Rights Directive Art. 9-16",
"mandatory": True,
"category": "hardware",
},
"ce_conformity_declaration": {
"label": "EU-Konformitaetserklaerung (CE)",
"required_if_any": ["sells_regulated_products"],
"jurisdiction": ["EU"],
"legal_basis": "Maschinenverordnung (EU) 2023/1230, Blue Guide",
"mandatory": True,
"category": "hardware",
},
"product_safety_instructions": {
"label": "Sicherheitshinweise und Bedienungsanleitung",
"required_if_any": ["sells_regulated_products"],
"jurisdiction": ["EU"],
"legal_basis": "Produktsicherheitsverordnung (EU) 2023/988",
"mandatory": True,
"category": "hardware",
},
# === Umwelt / Batterie ===
"weee_information": {
"label": "WEEE-Registrierung und Entsorgungshinweise",
"required_if_any": ["sells_electronics"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "WEEE-RL 2012/19/EU, ElektroG",
"mandatory": True,
"category": "environmental",
},
"battery_disposal_information": {
"label": "Batterie-Entsorgungshinweise",
"required_if_any": ["contains_battery"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "Batterieverordnung (EU) 2023/1542",
"mandatory": True,
"category": "environmental",
},
# === Marketplace ===
"marketplace_seller_terms": {
"label": "Haendler-AGB / Plattform-Teilnahmebedingungen",
"required_if_any": ["operates_marketplace"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "P2B-VO (EU) 2019/1150",
"mandatory": True,
"category": "marketplace",
},
"marketplace_ranking_transparency": {
"label": "Transparenz zu Ranking- und Listungskriterien",
"required_if_any": ["operates_marketplace"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "P2B-VO (EU) 2019/1150 Art. 5",
"mandatory": True,
"category": "marketplace",
},
# === Subscription ===
"subscription_cancellation_terms": {
"label": "Kuendigungsbedingungen bei Abonnements",
"required_if_any": ["subscription_model"],
"jurisdiction": ["DE", "EU"],
"legal_basis": "BGB § 309 Nr. 9, Faire-Verbrauchervertraege-RL",
"mandatory": True,
"category": "subscription",
},
}
# Signals that indicate the company is B2C (not B2B-only)
_B2C_INDICATORS = {"distance_selling", "has_checkout", "sells_digital_products"}
# ---------------------------------------------------------------------------
# Resolver
# ---------------------------------------------------------------------------
@dataclass
class DocumentRequirement:
"""A single required document."""
document_type: str
label: str
mandatory: bool
legal_basis: str
category: str
reason: str
note: Optional[str] = None
@dataclass
class DocumentAssessment:
"""Assessment of document requirements completeness."""
confidence: float = 1.0
escalation_flag: bool = False
escalation_reason: Optional[str] = None
reasoning: str = ""
warnings: list = field(default_factory=list)
def resolve_required_documents(
flags: dict[str, bool],
jurisdiction: str = "DE",
) -> dict[str, Any]:
"""Determine which legal documents are required based on company flags.
Args:
flags: dict of scope signals (e.g., has_website, has_ecommerce, uses_ai)
jurisdiction: country code (DE, AT, EU)
Returns:
dict with required_documents, total_required, total_recommended, assessment
"""
active_signals = {k for k, v in flags.items() if v is True}
required = []
recommended = []
for doc_type, rule in DOCUMENT_RULES.items():
# Check jurisdiction — DE and AT are part of EU
rule_jurisdictions = rule.get("jurisdiction", ["DE", "EU"])
jurisdiction_match = (
jurisdiction in rule_jurisdictions
or (jurisdiction in ("DE", "AT") and "EU" in rule_jurisdictions)
)
if not jurisdiction_match:
continue
# Check if any required signal is active
triggers = rule.get("required_if_any", [])
if not any(sig in active_signals for sig in triggers):
continue
# Check exclusions
exclusions = rule.get("not_required_if", [])
if any(sig in active_signals for sig in exclusions):
continue
# Determine triggered signals for reasoning
matched_signals = [sig for sig in triggers if sig in active_signals]
reason = f"Aktiv wegen: {', '.join(matched_signals)}"
doc = DocumentRequirement(
document_type=doc_type,
label=rule.get("label", doc_type),
mandatory=rule.get("mandatory", True),
legal_basis=rule.get("legal_basis", ""),
category=rule.get("category", "other"),
reason=reason,
note=rule.get("note"),
)
if doc.mandatory:
required.append(doc)
else:
recommended.append(doc)
# Assessment
assessment = _assess_documents(flags, active_signals, required, recommended)
return {
"required_documents": [asdict(d) for d in required],
"recommended_documents": [asdict(d) for d in recommended],
"total_required": len(required),
"total_recommended": len(recommended),
"jurisdiction": jurisdiction,
"active_flags": sorted(active_signals),
"assessment": asdict(assessment),
}
def _assess_documents(
flags: dict[str, bool],
active_signals: set[str],
required: list[DocumentRequirement],
recommended: list[DocumentRequirement],
) -> DocumentAssessment:
"""Compute assessment for document requirements."""
assessment = DocumentAssessment()
warnings = []
# Confidence scoring
score = 0.0
# Has website flag? (+0.20)
if "has_website" in active_signals:
score += 0.20
else:
warnings.append("has_website nicht gesetzt — Basis-Dokumente koennten fehlen")
# E-commerce flags specified? (+0.20)
ecom_flags = {"has_ecommerce", "has_checkout", "distance_selling", "b2b_only"}
if active_signals & ecom_flags:
score += 0.20
elif "has_website" in active_signals:
warnings.append("Keine E-Commerce-Flags — unklar ob Webshop vorhanden")
# Tracking specified? (+0.15)
tracking_flags = {"uses_tracking", "uses_cookies_marketing", "strictly_necessary_cookies_only"}
if active_signals & tracking_flags:
score += 0.15
else:
warnings.append("Keine Cookie/Tracking-Flags — Cookie-Banner-Pflicht unklar")
# Products specified? (+0.15)
product_flags = {"sells_physical_products", "sells_digital_products", "sells_regulated_products"}
if active_signals & product_flags:
score += 0.15
# Enough signals overall? (+0.15)
if len(active_signals) >= 4:
score += 0.15
elif len(active_signals) >= 2:
score += 0.10
# Documents found? (+0.15)
if len(required) >= 3:
score += 0.15
elif len(required) >= 1:
score += 0.05
assessment.confidence = round(min(score, 1.0), 2)
# Escalation detection
escalation_reasons = []
# E-commerce without distance_selling flag
if "has_ecommerce" in active_signals and "distance_selling" not in active_signals and "b2b_only" not in active_signals:
escalation_reasons.append(
"E-Commerce aktiv aber distance_selling/b2b_only nicht spezifiziert — "
"Widerrufsrecht-Pflicht unklar"
)
# Marketplace without payment clarification
if "operates_marketplace" in active_signals and "operates_payment_service" not in active_signals:
escalation_reasons.append(
"Marketplace aktiv — Pruefung ob eigene Zahlungsabwicklung oder externer PSP"
)
# Very few flags
if len(active_signals) < 2:
escalation_reasons.append(
"Zu wenige Flags fuer belastbare Dokumenten-Ableitung"
)
if escalation_reasons:
assessment.escalation_flag = True
assessment.escalation_reason = " | ".join(escalation_reasons)
assessment.confidence = min(assessment.confidence, 0.75)
# Reasoning
parts = []
parts.append(f"{len(active_signals)} Flags aktiv: {', '.join(sorted(active_signals))}")
parts.append(f"{len(required)} Pflichtdokumente, {len(recommended)} empfohlen")
categories = set(d.category for d in required)
if categories:
parts.append(f"Kategorien: {', '.join(sorted(categories))}")
if warnings:
parts.append(f"Hinweise: {'; '.join(warnings)}")
if assessment.escalation_flag:
parts.append(f"ESKALATION: {assessment.escalation_reason}")
assessment.reasoning = ". ".join(parts) + "."
assessment.warnings = warnings
return assessment