feat: Backlog 1-5 — soft-hints, chatbot-discovery, API-payload, LLM-Agent
5 Backlog-Items aus dem Multi-Site-Briefing in einem Sprint:
1. B13 B2C-Soft-Hints — Versicherungs/Tarif/Buchungs-Marker
_B2C_WEAK erweitert um "Reiseversicherung", "Tarifrechner",
"Online-Antrag", "Flug buchen", "Stromtarif" etc.
Fängt Allianz-Reise-Chatbot (vorher False-Negative).
2. Chatbot-Policy-Discovery (chatbot_policy_discovery.py)
Probt 14 Standard-Slugs (privacypolicychatbot, chatbot-datenschutz,
ai-policy, ki-datenschutz, ...) × 5 Lang-Prefixe auf jeder
submitted Origin. Successful >300-Wort-Findings werden in
doc_texts['dse'] gemerged. Audit-Trail über
doc_entries[dse].chatbot_policy_sources.
Hebt Westfield-iAdvize-Lücke.
3. API-Response-Payload erweitert
phase_f_persist.response um extra_findings, audit_walk und
html_blocks erweitert. B-Wiring-Output (B1, B3-B18) ist nicht
mehr nur im Mail-HTML versteckt — externe Aufrufer sehen jeden
Finding. Schema additiv, legacy clients ignorieren neue Felder.
4. Plausibility-LLM Empty-Response-Fix
Resilienz-Strategie A→B→C→D:
A) format='json' (strict, default)
B) format='' (loose, _try_extract_json mit ```json-fence + prose-
wrap-Unterstützung)
C) Split-Batch-Recursion (vorhanden)
D) Give up, leeres dict (callers behandeln als skipped)
Plus _post_llm() als isolierter LLM-Call-Helper, catched
Network-Errors.
5. Specialist-Agents Phase 2 LLM (MVP) — Impressum-Agent
impressum_agent_llm.py: qwen3:30b-a3b mit § 5 TMG System-Prompt,
business_scope-hints aus profile_dict. Output identisches Schema
wie pattern-agent für ein Merge ohne API-Bruch.
_b18_wiring.py orchestriert beide Agents + deduplet nach
field_id, rendert lila V2-Block mit KB/LLM-Tags pro Finding.
Pattern-first im Dedup (deterministisch + stable).
Tests: 107/107 grün (7 Test-Suites + chatbot-discovery + b18).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -132,54 +132,102 @@ def _build_user_prompt(items: list[dict], doc_title: str,
|
||||
)
|
||||
|
||||
|
||||
async def _post_llm(body: dict) -> str:
|
||||
"""One LLM call. Returns content string or empty on failure.
|
||||
Catches network errors so the caller can decide fallback strategy."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=TIMEOUT) as c:
|
||||
r = await c.post(f"{OLLAMA_URL}/api/chat", json=body)
|
||||
r.raise_for_status()
|
||||
return (r.json().get("message") or {}).get("content", "") or ""
|
||||
except Exception as e:
|
||||
logger.warning("plausibility LLM call failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _try_extract_json(content: str) -> dict | None:
|
||||
"""Extract a JSON object from free-form LLM output. Handles
|
||||
markdown-fenced and prose-wrapped responses."""
|
||||
if not content:
|
||||
return None
|
||||
s = content.strip()
|
||||
# Strip ```json … ``` fences
|
||||
if s.startswith("```"):
|
||||
s = s.strip("`")
|
||||
if s.lower().startswith("json"):
|
||||
s = s[4:]
|
||||
s = s.strip()
|
||||
# Heuristic: cut from first { to last }
|
||||
first = s.find("{")
|
||||
last = s.rfind("}")
|
||||
if first >= 0 and last > first:
|
||||
s = s[first:last + 1]
|
||||
try:
|
||||
return json.loads(s)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _ask_llm_batch(items: list[dict], doc_title: str,
|
||||
doc_excerpt: str) -> dict[str, dict]:
|
||||
"""Send a batch of up to BATCH_SIZE findings to the LLM."""
|
||||
body = {
|
||||
"""Send a batch of up to BATCH_SIZE findings to the LLM.
|
||||
|
||||
Resilience strategy (P125 fix for empty-response bug):
|
||||
A. format='json' (strict) — current default
|
||||
B. If A returns empty: format='' (loose), extract JSON manually
|
||||
C. If B also empty AND batch >2: split batch + recurse
|
||||
D. Else: give up, return {} (callers stamp llm_skipped=true)
|
||||
"""
|
||||
user_prompt = _build_user_prompt(items, doc_title, doc_excerpt)
|
||||
base_body = {
|
||||
"model": MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": _build_user_prompt(
|
||||
items, doc_title, doc_excerpt,
|
||||
)},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"format": "json",
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.0, "seed": 42, "num_predict": 1500},
|
||||
}
|
||||
out: dict[str, dict] = {}
|
||||
input_ids = [it["id"] for it in items]
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=TIMEOUT) as c:
|
||||
r = await c.post(f"{OLLAMA_URL}/api/chat", json=body)
|
||||
r.raise_for_status()
|
||||
content = (r.json().get("message") or {}).get("content", "")
|
||||
if not content:
|
||||
# Single retry with smaller batch — qwen3 sometimes
|
||||
# rejects ≥6-item prompts under format='json'.
|
||||
if len(items) > 2:
|
||||
half = len(items) // 2
|
||||
logger.info(
|
||||
"plausibility empty → retry split %d → %dx2",
|
||||
len(items), half,
|
||||
)
|
||||
first = await _ask_llm_batch(
|
||||
items[:half], doc_title, doc_excerpt,
|
||||
)
|
||||
second = await _ask_llm_batch(
|
||||
items[half:], doc_title, doc_excerpt,
|
||||
)
|
||||
out.update(first)
|
||||
out.update(second)
|
||||
return out
|
||||
logger.warning("plausibility LLM returned empty content")
|
||||
# Strategy A: format='json'
|
||||
content = await _post_llm({**base_body, "format": "json"})
|
||||
if not content:
|
||||
# Strategy B: format-free, parse-on-our-side
|
||||
logger.info(
|
||||
"plausibility A→empty, trying B (format-free) batch=%d",
|
||||
len(items),
|
||||
)
|
||||
content = await _post_llm(base_body)
|
||||
|
||||
if not content:
|
||||
# Strategy C: split + recurse
|
||||
if len(items) > 2:
|
||||
half = len(items) // 2
|
||||
logger.info(
|
||||
"plausibility A+B empty → split %d → %dx2",
|
||||
len(items), half,
|
||||
)
|
||||
first = await _ask_llm_batch(
|
||||
items[:half], doc_title, doc_excerpt,
|
||||
)
|
||||
second = await _ask_llm_batch(
|
||||
items[half:], doc_title, doc_excerpt,
|
||||
)
|
||||
out.update(first)
|
||||
out.update(second)
|
||||
return out
|
||||
try:
|
||||
data = json.loads(content)
|
||||
except json.JSONDecodeError as je:
|
||||
# Strategy D: give up
|
||||
logger.warning(
|
||||
"plausibility gave up after A+B for batch=%d", len(items),
|
||||
)
|
||||
return out
|
||||
data = _try_extract_json(content)
|
||||
if data is None:
|
||||
logger.warning(
|
||||
"plausibility LLM JSON parse failed: %s; raw=%s",
|
||||
je, content[:300],
|
||||
"plausibility LLM JSON parse failed (after fallback); "
|
||||
"raw=%s", content[:300],
|
||||
)
|
||||
return out
|
||||
llm_findings = data.get("findings") or []
|
||||
|
||||
Reference in New Issue
Block a user