feat(agent): strukturierte Ergebnis-Tabs — Impressum (Phase 1)

Der Compliance-Check legt zusätzlich einen strukturierten v3-AgentOutput pro Thema in result.agent_outputs ab (additiv; B18-HTML + Firehose-Mail bleiben unangetastet). Frontend: standardisiertes Ergebnis-Tab statt Firehose — Impressum-Tab (AgentResultTab) + "Alle Checks (roh)" (ChecklistView). - backend: _agent_outputs.py ruft den registrierten v3-ImpressumAgent, gewired in _orchestrator nach B18, surfaced via _phase_f_persist. - frontend: AgentResultView (aus AgentSlotCard extrahiert, DRY), AgentResultTab, ComplianceResultTabs; ComplianceCheckTab 490->391 Zeilen. - Tests: backend 2 passed, frontend 2 passed; tsc 0 neue Fehler; check-loc 0. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-10 18:32:06 +02:00
parent 3aa49f9553
commit e21984e0ad
11 changed files with 622 additions and 192 deletions
@@ -0,0 +1,86 @@
+"""Run registered v3 specialist agents and surface their structured
+AgentOutput per topic for the standardized result tabs.
+
+Additive to the legacy B-wiring HTML (`_b18_wiring`): this does NOT
+replace it — it puts a clean, typed `AgentOutput` into
+`state["agent_outputs"][topic]`, which `_phase_f_persist` forwards into
+the API result so the frontend can render a per-topic tab.
+
+Phase 1 ships only impressum; the topic map extends to cookie / vendor /
+… as those agents get wired (same contract, no code change here beyond
+the map). Once the tabs are the source of truth, B18's v1 path retires.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from compliance.services.specialist_agents import REGISTRY, AgentInput
+
+logger = logging.getLogger(__name__)
+
+# topic key (matches state["doc_texts"]) -> registered agent_id
+_TOPIC_AGENTS: dict[str, str] = {
+    "impressum": "impressum",
+}
+
+_MIN_TEXT = 100
+
+
+def _derive_scope(profile_dict: dict) -> list[str]:
+    """Business-scope aus dem erkannten Profil — identisch zu B18, damit
+    der Tab denselben Scope sieht wie die bestehende Auswertung. Das
+    Rechtsform-Gate kommt in einer späteren Phase (eigene Klassifizierung)."""
+    scope: set[str] = set()
+    if profile_dict.get("has_online_shop"):
+        scope.add("ecommerce")
+    if profile_dict.get("is_regulated_profession"):
+        scope.add("regulated_profession")
+    if profile_dict.get("industry") in ("insurance", "Finance", "finance"):
+        scope.add("insurance")
+    return sorted(scope)
+
+
+async def run_agent_outputs(state: dict) -> None:
+    """Für jedes Topic mit registriertem v3-Agent + ausreichend Text:
+    Agent laufen lassen und den strukturierten AgentOutput ablegen."""
+    doc_texts = state.get("doc_texts") or {}
+    profile_dict = state.get("profile_dict") or {}
+    req = state.get("req")
+    company_name = (
+        (getattr(req, "company_name", None) or "")
+        or (state.get("extracted_profile") or {}).get("company_name", "")
+        or state.get("site_name", "")
+    )
+    origin_domain = (
+        getattr(req, "origin_domain", None) or ""
+    ) or state.get("domain", "")
+    scope = _derive_scope(profile_dict)
+
+    outputs: dict[str, dict] = state.get("agent_outputs") or {}
+    for topic, agent_id in _TOPIC_AGENTS.items():
+        text = (doc_texts.get(topic) or "").strip()
+        if len(text) < _MIN_TEXT:
+            continue
+        agent = REGISTRY.get(agent_id)
+        if agent is None:
+            logger.warning("agent_outputs: agent '%s' not registered", agent_id)
+            continue
+        try:
+            out = await agent.evaluate(AgentInput(
+                doc_type=topic,
+                text=text,
+                business_scope=scope,
+                company_name=company_name,
+                origin_domain=origin_domain,
+            ))
+            outputs[topic] = out.model_dump(mode="json")
+            logger.info(
+                "agent_outputs[%s]: %d findings, confidence %.2f",
+                topic, len(out.findings), out.confidence,
+            )
+        except Exception as e:  # noqa: BLE001 — best-effort, never break the run
+            logger.warning("agent_outputs[%s] failed: %s", topic, e)
+
+    if outputs:
+        state["agent_outputs"] = outputs
@@ -16,6 +16,7 @@ from __future__ import annotations

 import logging

+from ._agent_outputs import run_agent_outputs
 from ._b1_wiring import run_b1
 from ._b3_wiring import run_b3
 from ._b4_wiring import run_b4
@@ -95,6 +96,9 @@ async def run_compliance_check(check_id: str, req) -> None:
        run_b16(state)  # Footer-Label-vs-URL-Slug-Drift
        await run_b17(state)  # Audit-Walk-Video (Beweis-Aufzeichnung)
        await run_b18(state)  # Impressum-Specialist-Agent (Pattern+LLM)
+        # Strukturierter v3-AgentOutput pro Thema → standardisierte
+        # Ergebnis-Tabs im Frontend (additiv zu B18-HTML).
+        await run_agent_outputs(state)
        run_b19(state)  # Cookie-Coherence (Salesforce-as-essential)
        await run_b20(state)  # Legacy-URL-Discovery (Sitemap+Wayback)
        run_b22(state)  # Cross-Domain-Legal-Doc-Hosting (Elli/LogPay)
@@ -93,6 +93,10 @@ def run_phase_f(state: dict) -> None:
            "legacy_urls": state.get("legacy_url_html", ""),
        },
        "legacy_url_inventory": state.get("legacy_url_inventory") or None,
+        # Strukturierter v3-AgentOutput pro Thema (impressum, …) für die
+        # standardisierten Ergebnis-Tabs im Frontend. Additiv; legacy
+        # clients ignorieren unbekannte Felder.
+        "agent_outputs": state.get("agent_outputs") or {},
    }

    _compliance_check_jobs[check_id]["status"] = "completed"