breakpilot-compliance/backend-compliance/tests/test_impressum_groundtruth.py

"""Ground-Truth-Vergleich: lässt jedes Impressum durch den Agenten
laufen und vergleicht Output gegen expected_findings / expected_clean.

Hauptzweck: Pattern-Lücken sofort sichtbar machen sobald sie auftauchen.
"""

from __future__ import annotations

import asyncio

import pytest

from compliance.services.specialist_agents import AgentInput, ImpressumAgent
from tests.fixtures.impressum_groundtruth import ALL_GROUND_TRUTH


def _run(coro):
    return asyncio.get_event_loop().run_until_complete(coro)


@pytest.fixture(autouse=True)
def _no_llm(monkeypatch):
    """Skip LLM-Eskalation in den GT-Tests — wir testen MC-Pattern,
    nicht LLM-Halluzinationen."""
    async def _no_cascade(*a, **kw): return None, []
    monkeypatch.setattr(
        "compliance.services.specialist_agents.impressum.agent.cascade",
        _no_cascade,
    )


@pytest.mark.parametrize("gt", ALL_GROUND_TRUTH, ids=lambda g: g.name)
def test_no_false_positives_on_expected_clean(gt):
    """Felder die laut GT da sind dürfen keine Findings produzieren."""
    agent = ImpressumAgent()
    out = _run(agent.evaluate(AgentInput(
        doc_type="impressum",
        text=gt.text,
        business_scope=list(gt.business_scope),
    )))
    fp_field_ids = {
        f.field_id for f in out.findings
        if f.field_id in gt.expected_clean
    }
    assert not fp_field_ids, (
        f"{gt.name}: FALSE-POSITIVE Findings für "
        f"explizit erwartete Felder: {sorted(fp_field_ids)}. "
        f"Alle Findings: "
        f"{sorted({f.field_id for f in out.findings})}."
    )


@pytest.mark.parametrize("gt", ALL_GROUND_TRUTH, ids=lambda g: g.name)
def test_high_findings_have_norm_and_action(gt):
    """Falls Findings da sind, müssen sie norm + action enthalten."""
    agent = ImpressumAgent()
    out = _run(agent.evaluate(AgentInput(
        doc_type="impressum",
        text=gt.text,
        business_scope=list(gt.business_scope),
    )))
    for f in out.findings:
        assert f.norm, f"{gt.name}: Finding {f.check_id} ohne norm"
        assert f.action, f"{gt.name}: Finding {f.check_id} ohne action"


def test_eto_no_findings_at_all():
    """ETO-Impressum ist vollständig — 0 Findings erwartet."""
    agent = ImpressumAgent()
    gt = next(g for g in ALL_GROUND_TRUTH if "ETO" in g.name)
    out = _run(agent.evaluate(AgentInput(
        doc_type="impressum",
        text=gt.text,
        business_scope=list(gt.business_scope),
    )))
    assert not out.findings, (
        f"ETO sollte 0 Findings haben, hat aber: "
        f"{[f.field_id for f in out.findings]}"
    )


def test_bmw_passes_full_check():
    """BMW-Impressum hat alle Pflichtangaben — 0 Findings."""
    agent = ImpressumAgent()
    gt = next(g for g in ALL_GROUND_TRUTH if "BMW" in g.name)
    out = _run(agent.evaluate(AgentInput(
        doc_type="impressum",
        text=gt.text,
        business_scope=list(gt.business_scope),
    )))
    assert not out.findings, (
        f"BMW sollte 0 Findings haben, hat aber: "
        f"{[f.field_id for f in out.findings]}"
    )


def test_hectronic_passes_with_editorial_scope():
    """Hectronic nennt § 18 MStV → kein Finding bei editorial-scope."""
    agent = ImpressumAgent()
    gt = next(g for g in ALL_GROUND_TRUTH if "Hectronic" in g.name)
    out = _run(agent.evaluate(AgentInput(
        doc_type="impressum",
        text=gt.text,
        business_scope=list(gt.business_scope),
    )))
    field_ids = {f.field_id for f in out.findings}
    assert "verantwortlicher_redaktion" not in field_ids, (
        f"Hectronic nennt § 18 MStV — sollte kein Finding sein. "
        f"Got: {sorted(field_ids)}"
    )