e8ff75cbfe
5 Backlog-Items aus dem Multi-Site-Briefing in einem Sprint:
1. B13 B2C-Soft-Hints — Versicherungs/Tarif/Buchungs-Marker
_B2C_WEAK erweitert um "Reiseversicherung", "Tarifrechner",
"Online-Antrag", "Flug buchen", "Stromtarif" etc.
Fängt Allianz-Reise-Chatbot (vorher False-Negative).
2. Chatbot-Policy-Discovery (chatbot_policy_discovery.py)
Probt 14 Standard-Slugs (privacypolicychatbot, chatbot-datenschutz,
ai-policy, ki-datenschutz, ...) × 5 Lang-Prefixe auf jeder
submitted Origin. Successful >300-Wort-Findings werden in
doc_texts['dse'] gemerged. Audit-Trail über
doc_entries[dse].chatbot_policy_sources.
Hebt Westfield-iAdvize-Lücke.
3. API-Response-Payload erweitert
phase_f_persist.response um extra_findings, audit_walk und
html_blocks erweitert. B-Wiring-Output (B1, B3-B18) ist nicht
mehr nur im Mail-HTML versteckt — externe Aufrufer sehen jeden
Finding. Schema additiv, legacy clients ignorieren neue Felder.
4. Plausibility-LLM Empty-Response-Fix
Resilienz-Strategie A→B→C→D:
A) format='json' (strict, default)
B) format='' (loose, _try_extract_json mit ```json-fence + prose-
wrap-Unterstützung)
C) Split-Batch-Recursion (vorhanden)
D) Give up, leeres dict (callers behandeln als skipped)
Plus _post_llm() als isolierter LLM-Call-Helper, catched
Network-Errors.
5. Specialist-Agents Phase 2 LLM (MVP) — Impressum-Agent
impressum_agent_llm.py: qwen3:30b-a3b mit § 5 TMG System-Prompt,
business_scope-hints aus profile_dict. Output identisches Schema
wie pattern-agent für ein Merge ohne API-Bruch.
_b18_wiring.py orchestriert beide Agents + deduplet nach
field_id, rendert lila V2-Block mit KB/LLM-Tags pro Finding.
Pattern-first im Dedup (deterministisch + stable).
Tests: 107/107 grün (7 Test-Suites + chatbot-discovery + b18).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
149 lines
5.3 KiB
Python
149 lines
5.3 KiB
Python
"""Tests for B13 Widerrufsbelehrung-Reachability-Check."""
|
|
|
|
from compliance.services.widerrufsbelehrung_reachability_check import (
|
|
_detect_b2c_scope,
|
|
check_widerrufsbelehrung_reachability,
|
|
)
|
|
|
|
|
|
def _state(widerruf_entry=None, home_text="", doc_entries=None,
|
|
footer_text=""):
|
|
entries = list(doc_entries or [])
|
|
if widerruf_entry is not None:
|
|
entries.append({"doc_type": "widerruf", **widerruf_entry})
|
|
return {
|
|
"doc_entries": entries,
|
|
"home_text": home_text,
|
|
"footer_text": footer_text,
|
|
}
|
|
|
|
|
|
class TestDetectB2CScope:
|
|
def test_strong_b2c_warenkorb(self):
|
|
s = _state(home_text="Legen Sie das Produkt in den Warenkorb.")
|
|
scope, hits = _detect_b2c_scope(s)
|
|
assert scope == "b2c_strong"
|
|
assert any("warenkorb" in h for h in hits)
|
|
|
|
def test_b2b_only_overrides_b2c(self):
|
|
s = _state(home_text="Wir verkaufen ausschließlich an Unternehmer. "
|
|
"Warenkorb für Großkunden.")
|
|
scope, _ = _detect_b2c_scope(s)
|
|
assert scope == "b2b_only"
|
|
|
|
def test_weak_signals_two_promote_to_likely(self):
|
|
s = _state(home_text="Unser Shop bietet Wallbox-Produkte mit "
|
|
"Rechnung zur Bestellung.")
|
|
scope, _ = _detect_b2c_scope(s)
|
|
assert scope == "b2c_likely"
|
|
|
|
def test_single_weak_signal_stays_unknown(self):
|
|
s = _state(home_text="Wir bieten einen Shop.")
|
|
scope, _ = _detect_b2c_scope(s)
|
|
assert scope == "unknown"
|
|
|
|
def test_versicherung_combo_promotes_to_likely(self):
|
|
s = _state(home_text="Reiseversicherung jetzt online "
|
|
"abschließen. Tarifrechner verfügbar.")
|
|
scope, _ = _detect_b2c_scope(s)
|
|
assert scope == "b2c_likely"
|
|
|
|
def test_buchung_combo_promotes_to_likely(self):
|
|
s = _state(home_text="Flug buchen oder Hotel reservieren.")
|
|
scope, _ = _detect_b2c_scope(s)
|
|
assert scope == "b2c_likely"
|
|
|
|
def test_empty_state(self):
|
|
s = _state()
|
|
scope, _ = _detect_b2c_scope(s)
|
|
assert scope == "unknown"
|
|
|
|
|
|
class TestCheck:
|
|
def test_no_widerruf_entry_no_finding(self):
|
|
out = check_widerrufsbelehrung_reachability(_state())
|
|
assert out == []
|
|
|
|
def test_widerruf_reachable_no_finding(self):
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "https://shop.de/widerruf",
|
|
"text": "Belehrung " * 100,
|
|
"discovery_attempted": True,
|
|
},
|
|
home_text="Warenkorb / zur Kasse.",
|
|
))
|
|
assert out == []
|
|
|
|
def test_unreachable_plus_b2c_strong_high_finding(self):
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "", "text": "",
|
|
"discovery_attempted": True,
|
|
"rejected_url": "https://shop.de/widerruf",
|
|
},
|
|
home_text="In den Warenkorb. Lieferzeit 2 Tage. Preis inkl. MwSt.",
|
|
))
|
|
assert len(out) == 1
|
|
f = out[0]
|
|
assert f["check_id"] == "WIDERRUF-REACH-001"
|
|
assert f["severity"] == "HIGH"
|
|
assert f["b2c_scope"] == "b2c_strong"
|
|
assert "Art. 246a" in f["norm"]
|
|
assert "shop.de/widerruf" in f["evidence"]
|
|
|
|
def test_unreachable_plus_b2c_likely_medium(self):
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "", "text": "",
|
|
"discovery_attempted": True,
|
|
},
|
|
home_text="Shop bietet Wallbox und Tarif buchen.",
|
|
))
|
|
assert len(out) == 1
|
|
assert out[0]["severity"] == "MEDIUM"
|
|
assert out[0]["b2c_scope"] == "b2c_likely"
|
|
|
|
def test_unreachable_plus_b2b_only_no_finding(self):
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "", "text": "",
|
|
"discovery_attempted": True,
|
|
},
|
|
home_text="B2B only — kein Verkauf an Verbraucher.",
|
|
))
|
|
assert out == []
|
|
|
|
def test_unreachable_plus_unknown_scope_no_finding(self):
|
|
# Pure agency / B2B-services without clear shop signals — silent.
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "", "text": "",
|
|
"discovery_attempted": True,
|
|
},
|
|
home_text="Wir sind eine Beratungsagentur für Mittelstand.",
|
|
))
|
|
assert out == []
|
|
|
|
def test_discovery_not_attempted_no_finding(self):
|
|
# Avoid false positives when discovery had no homepage to crawl.
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "", "text": "",
|
|
"discovery_attempted": False,
|
|
},
|
|
home_text="In den Warenkorb. Preis inkl. MwSt.",
|
|
))
|
|
assert out == []
|
|
|
|
def test_footer_widerruf_link_suppresses_finding(self):
|
|
out = check_widerrufsbelehrung_reachability(_state(
|
|
widerruf_entry={
|
|
"url": "", "text": "",
|
|
"discovery_attempted": True,
|
|
},
|
|
home_text="Warenkorb. Lieferzeit. Preis inkl. MwSt.",
|
|
footer_text='<a href="/widerruf">Widerruf</a>',
|
|
))
|
|
assert out == []
|