"""Tests für den Semantic-Validator-Layer.""" from __future__ import annotations import asyncio import pytest from compliance.services.specialist_agents import AgentInput, ImpressumAgent from compliance.services.specialist_agents._semantic_validator import ( STANDARD_LABELS, build_rename_action, standard_label, validate_present, ) def _run(coro): return asyncio.get_event_loop().run_until_complete(coro) def test_standard_labels_cover_impressum_fields(): """Alle Impressum-Pflichtangaben müssen ein Standard-Label haben.""" for fid in ( "kontakt_telefon", "kontakt_email", "vertretungsberechtigte", "handelsregister", "ust_id", "name_anbieter", ): assert fid in STANDARD_LABELS, f"missing standard label: {fid}" def test_build_rename_action_includes_old_and_new(): a = build_rename_action("kontakt_telefon", "Telefonnr.") assert "Telefonnr." in a assert "Telefon" in a assert "Best-Practice" in a or "Umbenennung" in a def test_standard_label_falls_back_to_field_id(): assert standard_label("kontakt_telefon") == "Telefon" assert standard_label("ghost_field") == "ghost_field" def test_validate_present_short_text_returns_empty(): out = _run(validate_present( "x", [("kontakt_telefon", "Telefon")], )) assert out == {} def test_validate_present_no_fields_returns_empty(): out = _run(validate_present("Long impressum text" * 100, [])) assert out == {} def test_semantic_demotion_high_to_low(monkeypatch): """Wenn LLM bestätigt dass Pflichtangabe da ist: HIGH→LOW. Test-Setup: Impressum-Text OHNE jegliche Telefon-Markierung (Pattern matched nicht). LLM-Mock behauptet aber 'Funkanschluss' wäre ein abweichendes Label für die Telefonnummer. """ from compliance.services.specialist_agents._escalation import ( EscalationResult, SourceType, ) from compliance.services.specialist_agents._base import EscalationLog async def _fake_cascade(sys_prompt, user_prompt, expect_json=True, skip_ovh=False): # Nur auf den SVL-Prompt reagieren if "FEHLENDE PFLICHTANGABEN" not in user_prompt: return None, [] log = EscalationLog( stage=SourceType.LLM_LOCAL, model="qwen2.5:7b", duration_ms=42, success=True, ) res = EscalationResult( content='{"results":[]}', stage=SourceType.LLM_LOCAL, model="qwen2.5:7b", log=log, parsed={"results": [{ "field_id": "kontakt_telefon", "found": True, "label_used": "Funkanschluss", "evidence": "Funkanschluss 0761/123456", "confidence": 0.9, }]}, ) return res, [log] monkeypatch.setattr( "compliance.services.specialist_agents._semantic_validator.cascade", _fake_cascade, ) monkeypatch.setattr( "compliance.services.specialist_agents.impressum.agent.cascade", _fake_cascade, ) # Text OHNE Telefon-Label → MC matched nicht → HIGH-Finding text = ( "Beispiel GmbH\nMusterstr. 1\n12345 Berlin\n" "E-Mail: x@y.de\nFunkanschluss 0761/123456\n" "Geschäftsführer: Max Mustermann\n" "Handelsregister Berlin HRB 12345\n" "USt-IdNr: DE123456789" ) agent = ImpressumAgent() out = _run(agent.evaluate(AgentInput(doc_type="impressum", text=text))) telefon_findings = [f for f in out.findings if f.field_id == "kontakt_telefon"] assert telefon_findings, "expected MC-miss → finding" f = telefon_findings[0] # Erwartet: SVL hat demoted zu LOW assert f.severity == "LOW", ( f"Erwartet: LOW nach semantic-demote, got: {f.severity}. " f"Finding: {f}" ) assert f.severity_reason == "label_mismatch" assert "Funkanschluss" in f.action assert "Telefon" in f.action