feat(agb): wire validated routed AGB engine into live check path

Consolidate the AGB C-lean engine (71% FP -> ~0, validated vs 7-company Opus GT) onto the canonical checker library and into the live check path. - AGBAgent.evaluate now runs routed C-lean: keyword (L1/L2) -> business- model gate -> per-item decision_method routing (embedding/reference/llm via services/checkers/) -> severity re-tiering (LOW -> recommendation), honoring context.skip_llm. - New agb/_pipeline.py orchestrates the routing; agent.py stays thin. - Remove the 3 AGB-local checker duplicates (_reference_check, _embedding_rescue, _llm_judge); services/checkers/ is now canonical. - Wire "agb" into _agent_outputs._TOPIC_AGENTS so the live check emits a validated AGB tab (was snapshot-only). - Run topic agents concurrently (asyncio.gather) + emit each tab via SSE as it finishes -> progressive results, no wait on the slowest agent. - Tests: checker units (mocked), routed agent (gate/rescue/re-tier), topic wiring; existing AGB tests made offline-safe. dev-only, no deploy. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-21 10:40:08 +02:00
parent 9d79cf1576
commit 32e45f0797
10 changed files with 341 additions and 195 deletions
@@ -13,6 +13,7 @@ the map). Once the tabs are the source of truth, B18's v1 path retires.

 from __future__ import annotations

+import asyncio
 import logging

 from compliance.services.specialist_agents import REGISTRY, AgentInput
@@ -27,6 +28,7 @@ logger = logging.getLogger(__name__)
 # topic key (matches state["doc_texts"]) -> registered agent_id
 _TOPIC_AGENTS: dict[str, str] = {
    "impressum": "impressum",
+    "agb": "agb",  # v2: AGBAgent mit decision_method-Routing (71% FP -> ~0)
 }

 _MIN_TEXT = 100
@@ -112,14 +114,17 @@ async def run_agent_outputs(state: dict) -> None:
    )

    outputs: dict[str, dict] = state.get("agent_outputs") or {}
-    for topic, agent_id in _TOPIC_AGENTS.items():
+
+    async def _run_one(topic: str, agent_id: str):
+        """Einen Topic-Agent laufen lassen + sein Tab-Event sofort emittieren
+        (Zwischenbefund). Fängt eigene Fehler → ein Agent reißt den Run nicht ab."""
        text = (doc_texts.get(topic) or "").strip()
        if len(text) < _MIN_TEXT:
-            continue
+            return None
        agent = REGISTRY.get(agent_id)
        if agent is None:
            logger.warning("agent_outputs: agent '%s' not registered", agent_id)
-            continue
+            return None
        try:
            out = await agent.evaluate(AgentInput(
                doc_type=topic,
@@ -128,15 +133,25 @@ async def run_agent_outputs(state: dict) -> None:
                company_name=company_name,
                origin_domain=origin_domain,
            ))
-            outputs[topic] = out.model_dump(mode="json")
-            emit(check_id, {"type": "topic", "topic": topic,
-                            "output": outputs[topic]})
+            dump = out.model_dump(mode="json")
+            emit(check_id, {"type": "topic", "topic": topic, "output": dump})
            logger.info(
                "agent_outputs[%s]: %d findings, confidence %.2f",
                topic, len(out.findings), out.confidence,
            )
+            return topic, dump
        except Exception as e:  # noqa: BLE001 — best-effort, never break the run
            logger.warning("agent_outputs[%s] failed: %s", topic, e)
+            return None
+
+    # Topic-Agenten laufen NEBENLÄUFIG (ihre Embedding-/LLM-Waits überlappen) und
+    # füllen ihren Tab via SSE, sobald sie fertig sind — kein Warten aufs Schlusslicht.
+    results = await asyncio.gather(
+        *(_run_one(topic, agent_id) for topic, agent_id in _TOPIC_AGENTS.items())
+    )
+    for r in results:
+        if r:
+            outputs[r[0]] = r[1]

    if outputs:
        state["agent_outputs"] = outputs