"""Tests für Impressum-Agent v3 (Sprint 1.12). Mockt rag_document_checker damit Tests offline laufen + prüft die Layer-0-Boost-Logik isoliert. """ from __future__ import annotations import asyncio import pytest from compliance.services.specialist_agents import ( AgentInput, ImpressumAgent, Severity, ) from compliance.services.specialist_agents.impressum.regex_boost import ( BOOST_KEYWORDS, boost_matches_db_mc, compute_regex_boosts, ) TESLA_TEXT = ( "Tesla Germany GmbH\nLudwig-Prandtl-Strasse 25-29\n12526 Berlin\n" "E-Mail: info@tesla.com\n" "Telefon: +49 89 1250 16 800\n" "Management: Elon Musk\n" "Handelsregister: HRB 218904 B Charlottenburg\n" "USt-IdNr: DE123456789\n" ) def _run(coro): return asyncio.get_event_loop().run_until_complete(coro) def test_compute_regex_boosts_detects_basic_fields(): hits = compute_regex_boosts(TESLA_TEXT, business_scope=set()) # Tesla hat klassische Pflichtangaben assert "kontakt_email" in hits assert "kontakt_telefon" in hits assert "handelsregister" in hits assert "ust_id" in hits assert "vertretungsberechtigte" in hits # "Management" # KFZ-Auto-Detect → aufsichtsbehoerde wäre relevant aber kein # Pattern getroffen (KBA nicht genannt) def test_compute_regex_boosts_short_text_empty(): assert compute_regex_boosts("x", business_scope=set()) == set() def test_boost_matches_db_mc_finds_telefon(): boosts = {"kontakt_telefon"} pass_crit = [ "Telefonnummer angeben", "Erreichbar per Telefon und E-Mail", ] matched = boost_matches_db_mc(boosts, pass_crit) assert matched == "kontakt_telefon" def test_boost_matches_db_mc_returns_none_when_unrelated(): boosts = {"kontakt_telefon"} pass_crit = [ "Cookie-Banner muss zentriert sein", ] assert boost_matches_db_mc(boosts, pass_crit) is None def test_boost_matches_db_mc_uses_fail_criteria(): """Wörter aus fail_criteria sollen die Zuordnung mit unterstützen.""" boosts = {"name_anbieter"} pass_crit = ["Sichtbar"] fail_crit = ["Keine Postadresse angegeben", "Adresse fehlt"] matched = boost_matches_db_mc(boosts, pass_crit, fail_crit) assert matched == "name_anbieter" def test_boost_matches_db_mc_eto_address_case(): """Konkreter ETO-Fall: AUTH-1954-A07 'Postadresse + Geschäftssitz'.""" boosts = {"name_anbieter"} pass_crit = [ "Vollständige Postadresse (Straße, Hausnummer, PLZ, Ort, Land)", "Oder: Eindeutige Angabe des Geschäftssitzes", "Adresse ist aktuell und korrekt", ] matched = boost_matches_db_mc(boosts, pass_crit) assert matched == "name_anbieter" def test_boost_keywords_cover_all_field_ids(): """Jedes mcs.py field_id muss in BOOST_KEYWORDS ein Eintrag haben.""" from compliance.services.specialist_agents.impressum.mcs import MCS for mc in MCS: assert mc.field_id in BOOST_KEYWORDS, ( f"BOOST_KEYWORDS missing for {mc.field_id}" ) @pytest.fixture def mock_v3(monkeypatch): """Mockt run_v3_pipeline mit deterministischen Fake-Results.""" async def _fake_pipeline(text, scope, db_url=""): results = [ {"control_id": "AUTH-1954-A04", "passed": True, "label": "Anbieterkennzeichnung dokumentiert", "severity": "HIGH", "regulation": "TMG", "article": "§ 5", "hint": "", "matched_text": "Tesla Germany GmbH", "source": "keyword_match"}, {"control_id": "DATA-2786-A04", "passed": False, "label": "Freiwilligkeit der TDDDG-Einwilligungen", "severity": "MEDIUM", "regulation": "TDDDG", "article": "§ 25", "hint": "Bitte Freiwilligkeit dokumentieren", "matched_text": "", "source": ""}, ] telemetry = { "layer_0_field_hits": 5, "layer_0_field_ids": ["kontakt_email", "kontakt_telefon", "handelsregister", "ust_id", "vertretungsberechtigte"], "layer_1_pass": 1, "layer_1_fail": 1, "layer_0_boost_overrides": 0, "total_mcs": 2, } return results, telemetry monkeypatch.setattr( "compliance.services.specialist_agents.impressum.agent.run_v3_pipeline", _fake_pipeline, ) async def _no_validator(*a, **kw): return {} monkeypatch.setattr( "compliance.services.specialist_agents.impressum.agent.validate_present", _no_validator, ) def test_agent_uses_db_mcs(mock_v3): agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput(doc_type="impressum", text=TESLA_TEXT))) db_mc_findings = [f for f in out.findings if f.check_id.startswith("DBMC-")] assert len(db_mc_findings) == 1 assert db_mc_findings[0].check_id == "DBMC-DATA-2786-A04" assert db_mc_findings[0].severity == Severity.MEDIUM.value assert "TDDDG" in db_mc_findings[0].norm def test_agent_emits_boost_coverage(mock_v3): agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput(doc_type="impressum", text=TESLA_TEXT))) # 2 DB-MCs + 12 Pattern-Boost-Slots = 14 coverage entries assert out.mc_total >= 14 boost_ok = [c for c in out.mc_coverage if c.mc_id.startswith("IMP-MC-") and c.status == "ok"] assert len(boost_ok) == 5 # 5 boost_ids im fake def test_agent_notes_telemetry(mock_v3): agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput(doc_type="impressum", text=TESLA_TEXT))) assert "v3-pipeline" in out.notes assert "Pattern-Boosts" in out.notes def test_short_text_skipped(): agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput(doc_type="impressum", text="x"))) assert all(c.status == "skipped" for c in out.mc_coverage) assert not out.findings def test_agent_version_is_three(): agent = ImpressumAgent() assert agent.agent_version == "3.0"