Files
breakpilot-compliance/backend-compliance/tests/test_widerrufsbelehrung_reachability_check.py
T
Benjamin Admin e8ff75cbfe feat: Backlog 1-5 — soft-hints, chatbot-discovery, API-payload, LLM-Agent
5 Backlog-Items aus dem Multi-Site-Briefing in einem Sprint:

1. B13 B2C-Soft-Hints — Versicherungs/Tarif/Buchungs-Marker
   _B2C_WEAK erweitert um "Reiseversicherung", "Tarifrechner",
   "Online-Antrag", "Flug buchen", "Stromtarif" etc.
   Fängt Allianz-Reise-Chatbot (vorher False-Negative).

2. Chatbot-Policy-Discovery (chatbot_policy_discovery.py)
   Probt 14 Standard-Slugs (privacypolicychatbot, chatbot-datenschutz,
   ai-policy, ki-datenschutz, ...) × 5 Lang-Prefixe auf jeder
   submitted Origin. Successful >300-Wort-Findings werden in
   doc_texts['dse'] gemerged. Audit-Trail über
   doc_entries[dse].chatbot_policy_sources.
   Hebt Westfield-iAdvize-Lücke.

3. API-Response-Payload erweitert
   phase_f_persist.response um extra_findings, audit_walk und
   html_blocks erweitert. B-Wiring-Output (B1, B3-B18) ist nicht
   mehr nur im Mail-HTML versteckt — externe Aufrufer sehen jeden
   Finding. Schema additiv, legacy clients ignorieren neue Felder.

4. Plausibility-LLM Empty-Response-Fix
   Resilienz-Strategie A→B→C→D:
   A) format='json' (strict, default)
   B) format='' (loose, _try_extract_json mit ```json-fence + prose-
      wrap-Unterstützung)
   C) Split-Batch-Recursion (vorhanden)
   D) Give up, leeres dict (callers behandeln als skipped)
   Plus _post_llm() als isolierter LLM-Call-Helper, catched
   Network-Errors.

5. Specialist-Agents Phase 2 LLM (MVP) — Impressum-Agent
   impressum_agent_llm.py: qwen3:30b-a3b mit § 5 TMG System-Prompt,
   business_scope-hints aus profile_dict. Output identisches Schema
   wie pattern-agent für ein Merge ohne API-Bruch.
   _b18_wiring.py orchestriert beide Agents + deduplet nach
   field_id, rendert lila V2-Block mit KB/LLM-Tags pro Finding.
   Pattern-first im Dedup (deterministisch + stable).

Tests: 107/107 grün (7 Test-Suites + chatbot-discovery + b18).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-07 18:41:54 +02:00

149 lines
5.3 KiB
Python

"""Tests for B13 Widerrufsbelehrung-Reachability-Check."""
from compliance.services.widerrufsbelehrung_reachability_check import (
_detect_b2c_scope,
check_widerrufsbelehrung_reachability,
)
def _state(widerruf_entry=None, home_text="", doc_entries=None,
footer_text=""):
entries = list(doc_entries or [])
if widerruf_entry is not None:
entries.append({"doc_type": "widerruf", **widerruf_entry})
return {
"doc_entries": entries,
"home_text": home_text,
"footer_text": footer_text,
}
class TestDetectB2CScope:
def test_strong_b2c_warenkorb(self):
s = _state(home_text="Legen Sie das Produkt in den Warenkorb.")
scope, hits = _detect_b2c_scope(s)
assert scope == "b2c_strong"
assert any("warenkorb" in h for h in hits)
def test_b2b_only_overrides_b2c(self):
s = _state(home_text="Wir verkaufen ausschließlich an Unternehmer. "
"Warenkorb für Großkunden.")
scope, _ = _detect_b2c_scope(s)
assert scope == "b2b_only"
def test_weak_signals_two_promote_to_likely(self):
s = _state(home_text="Unser Shop bietet Wallbox-Produkte mit "
"Rechnung zur Bestellung.")
scope, _ = _detect_b2c_scope(s)
assert scope == "b2c_likely"
def test_single_weak_signal_stays_unknown(self):
s = _state(home_text="Wir bieten einen Shop.")
scope, _ = _detect_b2c_scope(s)
assert scope == "unknown"
def test_versicherung_combo_promotes_to_likely(self):
s = _state(home_text="Reiseversicherung jetzt online "
"abschließen. Tarifrechner verfügbar.")
scope, _ = _detect_b2c_scope(s)
assert scope == "b2c_likely"
def test_buchung_combo_promotes_to_likely(self):
s = _state(home_text="Flug buchen oder Hotel reservieren.")
scope, _ = _detect_b2c_scope(s)
assert scope == "b2c_likely"
def test_empty_state(self):
s = _state()
scope, _ = _detect_b2c_scope(s)
assert scope == "unknown"
class TestCheck:
def test_no_widerruf_entry_no_finding(self):
out = check_widerrufsbelehrung_reachability(_state())
assert out == []
def test_widerruf_reachable_no_finding(self):
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "https://shop.de/widerruf",
"text": "Belehrung " * 100,
"discovery_attempted": True,
},
home_text="Warenkorb / zur Kasse.",
))
assert out == []
def test_unreachable_plus_b2c_strong_high_finding(self):
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "", "text": "",
"discovery_attempted": True,
"rejected_url": "https://shop.de/widerruf",
},
home_text="In den Warenkorb. Lieferzeit 2 Tage. Preis inkl. MwSt.",
))
assert len(out) == 1
f = out[0]
assert f["check_id"] == "WIDERRUF-REACH-001"
assert f["severity"] == "HIGH"
assert f["b2c_scope"] == "b2c_strong"
assert "Art. 246a" in f["norm"]
assert "shop.de/widerruf" in f["evidence"]
def test_unreachable_plus_b2c_likely_medium(self):
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "", "text": "",
"discovery_attempted": True,
},
home_text="Shop bietet Wallbox und Tarif buchen.",
))
assert len(out) == 1
assert out[0]["severity"] == "MEDIUM"
assert out[0]["b2c_scope"] == "b2c_likely"
def test_unreachable_plus_b2b_only_no_finding(self):
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "", "text": "",
"discovery_attempted": True,
},
home_text="B2B only — kein Verkauf an Verbraucher.",
))
assert out == []
def test_unreachable_plus_unknown_scope_no_finding(self):
# Pure agency / B2B-services without clear shop signals — silent.
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "", "text": "",
"discovery_attempted": True,
},
home_text="Wir sind eine Beratungsagentur für Mittelstand.",
))
assert out == []
def test_discovery_not_attempted_no_finding(self):
# Avoid false positives when discovery had no homepage to crawl.
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "", "text": "",
"discovery_attempted": False,
},
home_text="In den Warenkorb. Preis inkl. MwSt.",
))
assert out == []
def test_footer_widerruf_link_suppresses_finding(self):
out = check_widerrufsbelehrung_reachability(_state(
widerruf_entry={
"url": "", "text": "",
"discovery_attempted": True,
},
home_text="Warenkorb. Lieferzeit. Preis inkl. MwSt.",
footer_text='<a href="/widerruf">Widerruf</a>',
))
assert out == []