feat(agent): strukturierte Ergebnis-Tabs — Impressum (Phase 1)

Der Compliance-Check legt zusätzlich einen strukturierten v3-AgentOutput
pro Thema in result.agent_outputs ab (additiv; B18-HTML + Firehose-Mail
bleiben unangetastet). Frontend: standardisiertes Ergebnis-Tab statt
Firehose — Impressum-Tab (AgentResultTab) + "Alle Checks (roh)" (ChecklistView).

- backend: _agent_outputs.py ruft den registrierten v3-ImpressumAgent,
  gewired in _orchestrator nach B18, surfaced via _phase_f_persist.
- frontend: AgentResultView (aus AgentSlotCard extrahiert, DRY),
  AgentResultTab, ComplianceResultTabs; ComplianceCheckTab 490->391 Zeilen.
- Tests: backend 2 passed, frontend 2 passed; tsc 0 neue Fehler; check-loc 0.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-10 18:32:06 +02:00
parent 3aa49f9553
commit e21984e0ad
11 changed files with 622 additions and 192 deletions
@@ -0,0 +1,86 @@
"""Run registered v3 specialist agents and surface their structured
AgentOutput per topic for the standardized result tabs.
Additive to the legacy B-wiring HTML (`_b18_wiring`): this does NOT
replace it — it puts a clean, typed `AgentOutput` into
`state["agent_outputs"][topic]`, which `_phase_f_persist` forwards into
the API result so the frontend can render a per-topic tab.
Phase 1 ships only impressum; the topic map extends to cookie / vendor /
… as those agents get wired (same contract, no code change here beyond
the map). Once the tabs are the source of truth, B18's v1 path retires.
"""
from __future__ import annotations
import logging
from compliance.services.specialist_agents import REGISTRY, AgentInput
logger = logging.getLogger(__name__)
# topic key (matches state["doc_texts"]) -> registered agent_id
_TOPIC_AGENTS: dict[str, str] = {
"impressum": "impressum",
}
_MIN_TEXT = 100
def _derive_scope(profile_dict: dict) -> list[str]:
"""Business-scope aus dem erkannten Profil — identisch zu B18, damit
der Tab denselben Scope sieht wie die bestehende Auswertung. Das
Rechtsform-Gate kommt in einer späteren Phase (eigene Klassifizierung)."""
scope: set[str] = set()
if profile_dict.get("has_online_shop"):
scope.add("ecommerce")
if profile_dict.get("is_regulated_profession"):
scope.add("regulated_profession")
if profile_dict.get("industry") in ("insurance", "Finance", "finance"):
scope.add("insurance")
return sorted(scope)
async def run_agent_outputs(state: dict) -> None:
"""Für jedes Topic mit registriertem v3-Agent + ausreichend Text:
Agent laufen lassen und den strukturierten AgentOutput ablegen."""
doc_texts = state.get("doc_texts") or {}
profile_dict = state.get("profile_dict") or {}
req = state.get("req")
company_name = (
(getattr(req, "company_name", None) or "")
or (state.get("extracted_profile") or {}).get("company_name", "")
or state.get("site_name", "")
)
origin_domain = (
getattr(req, "origin_domain", None) or ""
) or state.get("domain", "")
scope = _derive_scope(profile_dict)
outputs: dict[str, dict] = state.get("agent_outputs") or {}
for topic, agent_id in _TOPIC_AGENTS.items():
text = (doc_texts.get(topic) or "").strip()
if len(text) < _MIN_TEXT:
continue
agent = REGISTRY.get(agent_id)
if agent is None:
logger.warning("agent_outputs: agent '%s' not registered", agent_id)
continue
try:
out = await agent.evaluate(AgentInput(
doc_type=topic,
text=text,
business_scope=scope,
company_name=company_name,
origin_domain=origin_domain,
))
outputs[topic] = out.model_dump(mode="json")
logger.info(
"agent_outputs[%s]: %d findings, confidence %.2f",
topic, len(out.findings), out.confidence,
)
except Exception as e: # noqa: BLE001 — best-effort, never break the run
logger.warning("agent_outputs[%s] failed: %s", topic, e)
if outputs:
state["agent_outputs"] = outputs
@@ -16,6 +16,7 @@ from __future__ import annotations
import logging
from ._agent_outputs import run_agent_outputs
from ._b1_wiring import run_b1
from ._b3_wiring import run_b3
from ._b4_wiring import run_b4
@@ -95,6 +96,9 @@ async def run_compliance_check(check_id: str, req) -> None:
run_b16(state) # Footer-Label-vs-URL-Slug-Drift
await run_b17(state) # Audit-Walk-Video (Beweis-Aufzeichnung)
await run_b18(state) # Impressum-Specialist-Agent (Pattern+LLM)
# Strukturierter v3-AgentOutput pro Thema → standardisierte
# Ergebnis-Tabs im Frontend (additiv zu B18-HTML).
await run_agent_outputs(state)
run_b19(state) # Cookie-Coherence (Salesforce-as-essential)
await run_b20(state) # Legacy-URL-Discovery (Sitemap+Wayback)
run_b22(state) # Cross-Domain-Legal-Doc-Hosting (Elli/LogPay)
@@ -93,6 +93,10 @@ def run_phase_f(state: dict) -> None:
"legacy_urls": state.get("legacy_url_html", ""),
},
"legacy_url_inventory": state.get("legacy_url_inventory") or None,
# Strukturierter v3-AgentOutput pro Thema (impressum, …) für die
# standardisierten Ergebnis-Tabs im Frontend. Additiv; legacy
# clients ignorieren unbekannte Felder.
"agent_outputs": state.get("agent_outputs") or {},
}
_compliance_check_jobs[check_id]["status"] = "completed"
@@ -0,0 +1,80 @@
"""Phase 1: der Compliance-Check legt einen strukturierten v3-AgentOutput
pro Thema in state['agent_outputs'][topic] ab (für die Ergebnis-Tabs).
Offline + deterministisch: die einzige LLM-Stelle im registrierten
ImpressumAgent ist `validate_present` (Semantic-Validator) — gemockt.
"""
from __future__ import annotations
import asyncio
import pytest
# Impressum mit Name+Anschrift+Geschäftsführer, aber OHNE Email, Telefon,
# Handelsregister, USt-IdNr → erzwingt Findings (alle mit norm + action).
IMPRESSUM_TEXT = (
"Angaben gemäß § 5 TMG\n\n"
"Musterfirma GmbH\n"
"Musterstraße 1\n"
"12345 Berlin\n\n"
"Vertreten durch den Geschäftsführer: Max Mustermann\n\n"
"Wir betreiben einen Online-Shop für Musterprodukte aller Art. "
"Weitere Informationen finden Sie auf unserer Website.\n"
)
@pytest.fixture(autouse=True)
def _llm_offline(monkeypatch):
"""Semantic-Validator (LLM) neutralisieren → rein deterministischer Lauf."""
async def _no_validate(*_a, **_kw):
return {}
monkeypatch.setattr(
"compliance.services.specialist_agents.impressum.agent.validate_present",
_no_validate,
raising=False,
)
def test_run_agent_outputs_populates_structured_impressum():
from compliance.api.agent_check._agent_outputs import run_agent_outputs
state = {
"doc_texts": {"impressum": IMPRESSUM_TEXT},
"profile_dict": {"has_online_shop": True},
"req": None,
"extracted_profile": {"company_name": "Musterfirma GmbH"},
"site_name": "musterfirma.de",
"domain": "musterfirma.de",
}
asyncio.run(run_agent_outputs(state))
out = (state.get("agent_outputs") or {}).get("impressum")
assert out is not None, "impressum AgentOutput muss im Ergebnis liegen"
assert out["agent"] == "impressum"
assert isinstance(out["findings"], list)
# Unvollständiges Impressum → mind. ein Finding, jedes mit Abstellmaßnahme
assert out["findings"], "erwarte Findings für ein unvollständiges Impressum"
assert all(f.get("action") for f in out["findings"]), \
"jedes Finding trägt eine Abstellmaßnahme (action)"
# Auditfest: Rechtsgrundlage + Quelle je Finding
assert all(f.get("norm") for f in out["findings"])
assert all(f.get("sources") for f in out["findings"])
# Aggregat-Felder fürs Speedometer vorberechnet
assert out["mc_total"] >= 1
# Linter-sauber: keine verbotenen Disclaimer-Begriffe im Output
blob = str(out).lower()
for term in ("rechtssicher", "garantiert", "gesetzeskonform"):
assert term not in blob
def test_run_agent_outputs_skips_short_text():
from compliance.api.agent_check._agent_outputs import run_agent_outputs
state = {
"doc_texts": {"impressum": "zu kurz"},
"profile_dict": {},
"req": None,
}
asyncio.run(run_agent_outputs(state))
assert not (state.get("agent_outputs") or {}).get("impressum")