breakpilot-compliance/backend-compliance/tests/test_b18_impressum_agent.py

"""Tests for B18 Impressum-Specialist-Agent (Pattern + LLM)."""

import asyncio
from unittest.mock import AsyncMock, MagicMock, patch

from compliance.api.agent_check._b18_wiring import _render, run_b18
from compliance.services.specialist_agents.impressum_agent_llm import (
    _parse_response,
)


_GOOD_IMPRESSUM = """
Acme GmbH
Musterstraße 1
10115 Berlin

Handelsregister: HRB 12345 Berlin
USt-IdNr: DE123456789
Geschäftsführer: Max Mustermann

Telefon: +49 30 12345
E-Mail: info@acme.example
"""

_BAD_IMPRESSUM = (
    "Acme GmbH, Musterstraße 1, 10115 Berlin. "
    "Kontakt: info@acme.example. "
    "Wir freuen uns ueber Ihren Besuch auf unserer Website "
    "und ueber Ihr Interesse an unserem Unternehmen und unseren "
    "Produkten. Bitte beachten Sie auch unsere weiteren Hinweise."
)


class TestParseResponse:
    def test_pure_json(self):
        out = _parse_response('{"findings":[{"field_id":"foo","severity":"HIGH"}]}')
        assert len(out) == 1
        assert out[0]["field_id"] == "foo"

    def test_markdown_fenced_json(self):
        out = _parse_response('```json\n{"findings":[{"field_id":"x"}]}\n```')
        assert len(out) == 1

    def test_prose_wrapped(self):
        out = _parse_response(
            'Hier ist die Analyse: {"findings":[{"field_id":"y"}]} Ende.'
        )
        assert len(out) == 1

    def test_empty(self):
        assert _parse_response("") == []

    def test_garbage(self):
        assert _parse_response("not json at all") == []


class TestRunB18Wiring:
    def test_short_impressum_skipped(self):
        state = {"doc_texts": {"impressum": "tiny"}}
        asyncio.run(run_b18(state))
        assert "impressum_agent_html" not in state

    def test_no_impressum_skipped(self):
        asyncio.run(run_b18({"doc_texts": {}}))

    def test_merges_pattern_and_llm(self):
        # Pattern-agent will likely find no gaps in _GOOD_IMPRESSUM.
        # Mock the LLM to return a fake additional finding.
        async def fake_llm(text, scope):
            return [{
                "check_id": "IMPRESSUM-AGENT-LLM-DPO",
                "agent": "impressum_agent_v2_llm",
                "field_id": "dpo",
                "severity": "MEDIUM",
                "title": "DSB-Verweis fehlt",
                "norm": "§ 5 TMG / DDG (LLM)",
                "evidence": "kein Hinweis auf DSB",
                "action": "DSB im Impressum verlinken",
            }]
        with patch(
            "compliance.api.agent_check._b18_wiring.evaluate_llm",
            new=fake_llm,
        ):
            state = {"doc_texts": {"impressum": _GOOD_IMPRESSUM},
                     "profile_dict": {}}
            asyncio.run(run_b18(state))
        assert "impressum_agent_html" in state
        extras = state.get("extra_findings") or []
        ids = [f.get("check_id") for f in extras]
        assert any("LLM-DPO" in i for i in ids)

    def test_dedup_pattern_vs_llm_same_field(self):
        # Pattern agent returns ust_id; mocked LLM also returns ust_id —
        # only one should survive the dedup.
        async def fake_llm(text, scope):
            return [{
                "check_id": "IMPRESSUM-AGENT-LLM-UST_ID",
                "agent": "impressum_agent_v2_llm",
                "field_id": "ust_id",
                "severity": "HIGH",
                "title": "duplicate ust_id finding",
                "norm": "§ 5 TMG",
                "evidence": "—",
                "action": "—",
            }]
        with patch(
            "compliance.api.agent_check._b18_wiring.evaluate_llm",
            new=fake_llm,
        ):
            state = {"doc_texts": {"impressum": _BAD_IMPRESSUM},
                     "profile_dict": {}}
            asyncio.run(run_b18(state))
        ust_findings = [
            f for f in state.get("extra_findings") or []
            if (f.get("field_id") or "").lower() == "ust_id"
        ]
        assert len(ust_findings) == 1


class TestRender:
    def test_render_with_two_findings(self):
        merged = [
            {"check_id": "X", "title": "A", "severity": "HIGH",
             "agent": "impressum_agent_v1", "norm": "n", "action": "do"},
            {"check_id": "Y", "title": "B", "severity": "MEDIUM",
             "agent": "impressum_agent_v2_llm", "norm": "n", "action": "do"},
        ]
        html = _render(merged, merged[:1], merged[1:])
        assert "KB" in html  # pattern tag
        assert "LLM" in html  # llm tag
        assert "Pattern-Match: 1" in html
        assert "LLM-Analyse: 1" in html