feat(agent): strukturierte Ergebnis-Tabs — Impressum (Phase 1)

Der Compliance-Check legt zusätzlich einen strukturierten v3-AgentOutput pro Thema in result.agent_outputs ab (additiv; B18-HTML + Firehose-Mail bleiben unangetastet). Frontend: standardisiertes Ergebnis-Tab statt Firehose — Impressum-Tab (AgentResultTab) + "Alle Checks (roh)" (ChecklistView). - backend: _agent_outputs.py ruft den registrierten v3-ImpressumAgent, gewired in _orchestrator nach B18, surfaced via _phase_f_persist. - frontend: AgentResultView (aus AgentSlotCard extrahiert, DRY), AgentResultTab, ComplianceResultTabs; ComplianceCheckTab 490->391 Zeilen. - Tests: backend 2 passed, frontend 2 passed; tsc 0 neue Fehler; check-loc 0. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-10 18:32:06 +02:00
parent 3aa49f9553
commit e21984e0ad
11 changed files with 622 additions and 192 deletions
@@ -0,0 +1,86 @@
+"""Run registered v3 specialist agents and surface their structured
+AgentOutput per topic for the standardized result tabs.
+
+Additive to the legacy B-wiring HTML (`_b18_wiring`): this does NOT
+replace it — it puts a clean, typed `AgentOutput` into
+`state["agent_outputs"][topic]`, which `_phase_f_persist` forwards into
+the API result so the frontend can render a per-topic tab.
+
+Phase 1 ships only impressum; the topic map extends to cookie / vendor /
+… as those agents get wired (same contract, no code change here beyond
+the map). Once the tabs are the source of truth, B18's v1 path retires.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from compliance.services.specialist_agents import REGISTRY, AgentInput
+
+logger = logging.getLogger(__name__)
+
+# topic key (matches state["doc_texts"]) -> registered agent_id
+_TOPIC_AGENTS: dict[str, str] = {
+    "impressum": "impressum",
+}
+
+_MIN_TEXT = 100
+
+
+def _derive_scope(profile_dict: dict) -> list[str]:
+    """Business-scope aus dem erkannten Profil — identisch zu B18, damit
+    der Tab denselben Scope sieht wie die bestehende Auswertung. Das
+    Rechtsform-Gate kommt in einer späteren Phase (eigene Klassifizierung)."""
+    scope: set[str] = set()
+    if profile_dict.get("has_online_shop"):
+        scope.add("ecommerce")
+    if profile_dict.get("is_regulated_profession"):
+        scope.add("regulated_profession")
+    if profile_dict.get("industry") in ("insurance", "Finance", "finance"):
+        scope.add("insurance")
+    return sorted(scope)
+
+
+async def run_agent_outputs(state: dict) -> None:
+    """Für jedes Topic mit registriertem v3-Agent + ausreichend Text:
+    Agent laufen lassen und den strukturierten AgentOutput ablegen."""
+    doc_texts = state.get("doc_texts") or {}
+    profile_dict = state.get("profile_dict") or {}
+    req = state.get("req")
+    company_name = (
+        (getattr(req, "company_name", None) or "")
+        or (state.get("extracted_profile") or {}).get("company_name", "")
+        or state.get("site_name", "")
+    )
+    origin_domain = (
+        getattr(req, "origin_domain", None) or ""
+    ) or state.get("domain", "")
+    scope = _derive_scope(profile_dict)
+
+    outputs: dict[str, dict] = state.get("agent_outputs") or {}
+    for topic, agent_id in _TOPIC_AGENTS.items():
+        text = (doc_texts.get(topic) or "").strip()
+        if len(text) < _MIN_TEXT:
+            continue
+        agent = REGISTRY.get(agent_id)
+        if agent is None:
+            logger.warning("agent_outputs: agent '%s' not registered", agent_id)
+            continue
+        try:
+            out = await agent.evaluate(AgentInput(
+                doc_type=topic,
+                text=text,
+                business_scope=scope,
+                company_name=company_name,
+                origin_domain=origin_domain,
+            ))
+            outputs[topic] = out.model_dump(mode="json")
+            logger.info(
+                "agent_outputs[%s]: %d findings, confidence %.2f",
+                topic, len(out.findings), out.confidence,
+            )
+        except Exception as e:  # noqa: BLE001 — best-effort, never break the run
+            logger.warning("agent_outputs[%s] failed: %s", topic, e)
+
+    if outputs:
+        state["agent_outputs"] = outputs
@@ -16,6 +16,7 @@ from __future__ import annotations

 import logging

+from ._agent_outputs import run_agent_outputs
 from ._b1_wiring import run_b1
 from ._b3_wiring import run_b3
 from ._b4_wiring import run_b4
@@ -95,6 +96,9 @@ async def run_compliance_check(check_id: str, req) -> None:
        run_b16(state)  # Footer-Label-vs-URL-Slug-Drift
        await run_b17(state)  # Audit-Walk-Video (Beweis-Aufzeichnung)
        await run_b18(state)  # Impressum-Specialist-Agent (Pattern+LLM)
+        # Strukturierter v3-AgentOutput pro Thema → standardisierte
+        # Ergebnis-Tabs im Frontend (additiv zu B18-HTML).
+        await run_agent_outputs(state)
        run_b19(state)  # Cookie-Coherence (Salesforce-as-essential)
        await run_b20(state)  # Legacy-URL-Discovery (Sitemap+Wayback)
        run_b22(state)  # Cross-Domain-Legal-Doc-Hosting (Elli/LogPay)
@@ -93,6 +93,10 @@ def run_phase_f(state: dict) -> None:
            "legacy_urls": state.get("legacy_url_html", ""),
        },
        "legacy_url_inventory": state.get("legacy_url_inventory") or None,
+        # Strukturierter v3-AgentOutput pro Thema (impressum, …) für die
+        # standardisierten Ergebnis-Tabs im Frontend. Additiv; legacy
+        # clients ignorieren unbekannte Felder.
+        "agent_outputs": state.get("agent_outputs") or {},
    }

    _compliance_check_jobs[check_id]["status"] = "completed"
@@ -0,0 +1,80 @@
+"""Phase 1: der Compliance-Check legt einen strukturierten v3-AgentOutput
+pro Thema in state['agent_outputs'][topic] ab (für die Ergebnis-Tabs).
+
+Offline + deterministisch: die einzige LLM-Stelle im registrierten
+ImpressumAgent ist `validate_present` (Semantic-Validator) — gemockt.
+"""
+
+from __future__ import annotations
+
+import asyncio
+
+import pytest
+
+# Impressum mit Name+Anschrift+Geschäftsführer, aber OHNE Email, Telefon,
+# Handelsregister, USt-IdNr → erzwingt Findings (alle mit norm + action).
+IMPRESSUM_TEXT = (
+    "Angaben gemäß § 5 TMG\n\n"
+    "Musterfirma GmbH\n"
+    "Musterstraße 1\n"
+    "12345 Berlin\n\n"
+    "Vertreten durch den Geschäftsführer: Max Mustermann\n\n"
+    "Wir betreiben einen Online-Shop für Musterprodukte aller Art. "
+    "Weitere Informationen finden Sie auf unserer Website.\n"
+)
+
+
+@pytest.fixture(autouse=True)
+def _llm_offline(monkeypatch):
+    """Semantic-Validator (LLM) neutralisieren → rein deterministischer Lauf."""
+    async def _no_validate(*_a, **_kw):
+        return {}
+    monkeypatch.setattr(
+        "compliance.services.specialist_agents.impressum.agent.validate_present",
+        _no_validate,
+        raising=False,
+    )
+
+
+def test_run_agent_outputs_populates_structured_impressum():
+    from compliance.api.agent_check._agent_outputs import run_agent_outputs
+
+    state = {
+        "doc_texts": {"impressum": IMPRESSUM_TEXT},
+        "profile_dict": {"has_online_shop": True},
+        "req": None,
+        "extracted_profile": {"company_name": "Musterfirma GmbH"},
+        "site_name": "musterfirma.de",
+        "domain": "musterfirma.de",
+    }
+    asyncio.run(run_agent_outputs(state))
+
+    out = (state.get("agent_outputs") or {}).get("impressum")
+    assert out is not None, "impressum AgentOutput muss im Ergebnis liegen"
+    assert out["agent"] == "impressum"
+    assert isinstance(out["findings"], list)
+    # Unvollständiges Impressum → mind. ein Finding, jedes mit Abstellmaßnahme
+    assert out["findings"], "erwarte Findings für ein unvollständiges Impressum"
+    assert all(f.get("action") for f in out["findings"]), \
+        "jedes Finding trägt eine Abstellmaßnahme (action)"
+    # Auditfest: Rechtsgrundlage + Quelle je Finding
+    assert all(f.get("norm") for f in out["findings"])
+    assert all(f.get("sources") for f in out["findings"])
+    # Aggregat-Felder fürs Speedometer vorberechnet
+    assert out["mc_total"] >= 1
+    # Linter-sauber: keine verbotenen Disclaimer-Begriffe im Output
+    blob = str(out).lower()
+    for term in ("rechtssicher", "garantiert", "gesetzeskonform"):
+        assert term not in blob
+
+
+def test_run_agent_outputs_skips_short_text():
+    from compliance.api.agent_check._agent_outputs import run_agent_outputs
+
+    state = {
+        "doc_texts": {"impressum": "zu kurz"},
+        "profile_dict": {},
+        "req": None,
+    }
+    asyncio.run(run_agent_outputs(state))
+    assert not (state.get("agent_outputs") or {}).get("impressum")