"""Ground-Truth-Vergleich: lässt jedes Impressum durch den Agenten laufen und vergleicht Output gegen expected_findings / expected_clean. Hauptzweck: Pattern-Lücken sofort sichtbar machen sobald sie auftauchen. """ from __future__ import annotations import asyncio import pytest from compliance.services.specialist_agents import AgentInput, ImpressumAgent from tests.fixtures.impressum_groundtruth import ( ALL_GROUND_TRUTH, CORE_IMPRESSUM_MCS, ) def _run(coro): return asyncio.get_event_loop().run_until_complete(coro) @pytest.fixture(autouse=True) def _agent_offline(monkeypatch): """GT-Tests offline + deterministisch: LLM-Eskalation aus, MC-Laden ueber die gemockte Main-Tool-Engine (CORE_IMPRESSUM_MCS), Embedding aus. Der Agent delegiert jetzt ans Main Tool — daher `_load_controls` mocken.""" import copy async def _no_cascade(*a, **kw): return None, [] monkeypatch.setattr( "compliance.services.specialist_agents._semantic_validator.cascade", _no_cascade, ) async def _fake_load(doc_type, db_url, limit, business_scope=None): return copy.deepcopy(CORE_IMPRESSUM_MCS) monkeypatch.setattr( "compliance.services.rag_document_checker._load_controls", _fake_load, ) async def _no_embed(*a, **kw): return None async def _no_match(*a, **kw): return set() monkeypatch.setattr( "compliance.services.mc_embedding_matcher.ensure_mc_embeddings", _no_embed, raising=False, ) monkeypatch.setattr( "compliance.services.mc_embedding_matcher.embedding_match", _no_match, raising=False, ) @pytest.mark.parametrize("gt", ALL_GROUND_TRUTH, ids=lambda g: g.name) def test_no_false_positives_on_expected_clean(gt): """Felder die laut GT da sind dürfen keine Findings produzieren.""" agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput( doc_type="impressum", text=gt.text, business_scope=list(gt.business_scope), ))) fp_field_ids = { f.field_id for f in out.findings if f.field_id in gt.expected_clean } assert not fp_field_ids, ( f"{gt.name}: FALSE-POSITIVE Findings für " f"explizit erwartete Felder: {sorted(fp_field_ids)}. " f"Alle Findings: " f"{sorted({f.field_id for f in out.findings})}." ) @pytest.mark.parametrize("gt", ALL_GROUND_TRUTH, ids=lambda g: g.name) def test_high_findings_have_norm_and_action(gt): """Falls Findings da sind, müssen sie norm + action enthalten.""" agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput( doc_type="impressum", text=gt.text, business_scope=list(gt.business_scope), ))) for f in out.findings: assert f.norm, f"{gt.name}: Finding {f.check_id} ohne norm" assert f.action, f"{gt.name}: Finding {f.check_id} ohne action" def test_eto_no_findings_at_all(): """ETO-Impressum ist vollständig — 0 Findings erwartet.""" agent = ImpressumAgent() gt = next(g for g in ALL_GROUND_TRUTH if "ETO" in g.name) out = _run(agent.evaluate(AgentInput( doc_type="impressum", text=gt.text, business_scope=list(gt.business_scope), ))) assert not out.findings, ( f"ETO sollte 0 Findings haben, hat aber: " f"{[f.field_id for f in out.findings]}" ) def test_bmw_passes_full_check(): """BMW-Impressum hat alle Pflichtangaben — 0 Findings.""" agent = ImpressumAgent() gt = next(g for g in ALL_GROUND_TRUTH if "BMW" in g.name) out = _run(agent.evaluate(AgentInput( doc_type="impressum", text=gt.text, business_scope=list(gt.business_scope), ))) assert not out.findings, ( f"BMW sollte 0 Findings haben, hat aber: " f"{[f.field_id for f in out.findings]}" ) def test_hectronic_passes_with_editorial_scope(): """Hectronic nennt § 18 MStV → kein Finding bei editorial-scope.""" agent = ImpressumAgent() gt = next(g for g in ALL_GROUND_TRUTH if "Hectronic" in g.name) out = _run(agent.evaluate(AgentInput( doc_type="impressum", text=gt.text, business_scope=list(gt.business_scope), ))) field_ids = {f.field_id for f in out.findings} assert "verantwortlicher_redaktion" not in field_ids, ( f"Hectronic nennt § 18 MStV — sollte kein Finding sein. " f"Got: {sorted(field_ids)}" )