e8ff75cbfe
5 Backlog-Items aus dem Multi-Site-Briefing in einem Sprint:
1. B13 B2C-Soft-Hints — Versicherungs/Tarif/Buchungs-Marker
_B2C_WEAK erweitert um "Reiseversicherung", "Tarifrechner",
"Online-Antrag", "Flug buchen", "Stromtarif" etc.
Fängt Allianz-Reise-Chatbot (vorher False-Negative).
2. Chatbot-Policy-Discovery (chatbot_policy_discovery.py)
Probt 14 Standard-Slugs (privacypolicychatbot, chatbot-datenschutz,
ai-policy, ki-datenschutz, ...) × 5 Lang-Prefixe auf jeder
submitted Origin. Successful >300-Wort-Findings werden in
doc_texts['dse'] gemerged. Audit-Trail über
doc_entries[dse].chatbot_policy_sources.
Hebt Westfield-iAdvize-Lücke.
3. API-Response-Payload erweitert
phase_f_persist.response um extra_findings, audit_walk und
html_blocks erweitert. B-Wiring-Output (B1, B3-B18) ist nicht
mehr nur im Mail-HTML versteckt — externe Aufrufer sehen jeden
Finding. Schema additiv, legacy clients ignorieren neue Felder.
4. Plausibility-LLM Empty-Response-Fix
Resilienz-Strategie A→B→C→D:
A) format='json' (strict, default)
B) format='' (loose, _try_extract_json mit ```json-fence + prose-
wrap-Unterstützung)
C) Split-Batch-Recursion (vorhanden)
D) Give up, leeres dict (callers behandeln als skipped)
Plus _post_llm() als isolierter LLM-Call-Helper, catched
Network-Errors.
5. Specialist-Agents Phase 2 LLM (MVP) — Impressum-Agent
impressum_agent_llm.py: qwen3:30b-a3b mit § 5 TMG System-Prompt,
business_scope-hints aus profile_dict. Output identisches Schema
wie pattern-agent für ein Merge ohne API-Bruch.
_b18_wiring.py orchestriert beide Agents + deduplet nach
field_id, rendert lila V2-Block mit KB/LLM-Tags pro Finding.
Pattern-first im Dedup (deterministisch + stable).
Tests: 107/107 grün (7 Test-Suites + chatbot-discovery + b18).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
131 lines
4.7 KiB
Python
131 lines
4.7 KiB
Python
"""B18 wiring — Specialist-Agents Phase 2 (Impressum LLM).
|
|
|
|
Ruft den LLM-Agent (impressum_agent_llm.evaluate_llm) auf, mergt das
|
|
Ergebnis mit dem Pattern-Match-Agent und deduplet nach field_id.
|
|
Rendert einen V2-HTML-Block (impressum_agent_html).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import html
|
|
import logging
|
|
import os
|
|
|
|
from compliance.services.specialist_agents.impressum_agent import (
|
|
PFLICHTANGABEN, evaluate as evaluate_pattern,
|
|
)
|
|
from compliance.services.specialist_agents.impressum_agent_llm import (
|
|
evaluate_llm,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
_DISABLED = os.environ.get("IMPRESSUM_AGENT_DISABLED", "").lower() in (
|
|
"1", "true", "yes",
|
|
)
|
|
|
|
|
|
async def run_b18(state: dict) -> None:
|
|
if _DISABLED:
|
|
return
|
|
doc_texts = state.get("doc_texts") or {}
|
|
imp = (doc_texts.get("impressum") or "").strip()
|
|
if len(imp) < 100:
|
|
return
|
|
|
|
# Business-scope-Inferenz aus dem profile, falls vorhanden.
|
|
profile_dict = state.get("profile_dict") or {}
|
|
scope: set[str] = set()
|
|
if profile_dict.get("has_online_shop"):
|
|
scope.add("ecommerce")
|
|
if profile_dict.get("is_regulated_profession"):
|
|
scope.add("regulated_profession")
|
|
if profile_dict.get("industry") in ("insurance", "Finance",
|
|
"finance"):
|
|
scope.add("insurance")
|
|
|
|
pattern_findings = evaluate_pattern(imp, scope)
|
|
llm_findings = await evaluate_llm(imp, scope)
|
|
|
|
# Dedup: pattern-agent + llm-agent können ähnliche field_ids melden.
|
|
# Keep first, prefer pattern (deterministisch + stable).
|
|
seen_keys: set[str] = set()
|
|
merged: list[dict] = []
|
|
for f in pattern_findings + llm_findings:
|
|
# Stable dedup key: field_id (normalised). Both agents emit
|
|
# the same field for the same gap → fold to one.
|
|
key = (f.get("field_id") or "").lower()
|
|
if key and key in seen_keys:
|
|
continue
|
|
seen_keys.add(key)
|
|
merged.append(f)
|
|
|
|
if not merged:
|
|
return
|
|
|
|
extras = state.get("extra_findings") or []
|
|
extras.extend(merged)
|
|
state["extra_findings"] = extras
|
|
state["impressum_agent_html"] = _render(merged, pattern_findings,
|
|
llm_findings)
|
|
logger.info(
|
|
"B18 impressum-agent: pattern=%d llm=%d merged=%d",
|
|
len(pattern_findings), len(llm_findings), len(merged),
|
|
)
|
|
|
|
|
|
def _render(merged: list[dict], pattern: list[dict],
|
|
llm: list[dict]) -> str:
|
|
cards = []
|
|
for f in merged:
|
|
sev = (f.get("severity") or "").upper()
|
|
color = "#dc2626" if sev == "HIGH" else (
|
|
"#f59e0b" if sev == "MEDIUM" else "#64748b"
|
|
)
|
|
agent_tag = f.get("agent") or ""
|
|
tag_html = ""
|
|
if agent_tag:
|
|
short = "LLM" if "llm" in agent_tag.lower() else "KB"
|
|
bg = "#dbeafe" if short == "LLM" else "#f1f5f9"
|
|
col = "#1e40af" if short == "LLM" else "#475569"
|
|
tag_html = (
|
|
f"<span style='display:inline-block;background:{bg};"
|
|
f"color:{col};font-size:10px;padding:1px 6px;"
|
|
f"border-radius:999px;margin-left:6px;'>{short}</span>"
|
|
)
|
|
evidence_html = ""
|
|
if f.get("evidence"):
|
|
evidence_html = (
|
|
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
|
f"<em>{html.escape(f['evidence'])}</em></div>"
|
|
)
|
|
cards.append(
|
|
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
|
f"border-left:3px solid {color};border-radius:4px;'>"
|
|
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
|
f"{sev} · {html.escape(f.get('check_id') or '')}{tag_html}</div>"
|
|
f"<div style='font-size:14px;margin-top:4px;'>"
|
|
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
|
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
|
f"{html.escape(f.get('norm') or '')}</div>"
|
|
f"{evidence_html}"
|
|
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
|
f"padding:8px 10px;border-radius:4px;'>"
|
|
f"<strong>→ Empfehlung:</strong> "
|
|
f"{html.escape(f.get('action') or '')}</div>"
|
|
"</div>"
|
|
)
|
|
return (
|
|
"<div style='margin:24px 0;padding:16px;border-left:4px solid #8b5cf6;"
|
|
"background:#faf5ff;border-radius:4px;'>"
|
|
"<h2 style='margin:0 0 8px;color:#5b21b6;font-size:16px;'>"
|
|
"🤖 Impressum-Specialist-Agent (Pattern-KB + LLM)"
|
|
"</h2>"
|
|
f"<p style='margin:0 0 8px;font-size:12px;color:#475569;'>"
|
|
f"Pattern-Match: {len(pattern)} · LLM-Analyse: {len(llm)} · "
|
|
f"dedupliziert: {len(merged)}</p>"
|
|
+ "".join(cards) +
|
|
"</div>"
|
|
)
|