fix(compliance-check): batch LLM verification + increase poll timeout

- LLM verify now sends ALL failed checks in one batched call instead of one Ollama call per check (80+ calls → 1 per document) - Increase frontend poll timeout from 6 min to 15 min Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-12 11:49:30 +02:00
parent a127dd971b
commit ce77cde309
2 changed files with 63 additions and 53 deletions
@@ -202,9 +202,9 @@ export function ComplianceCheckTab() {
      setActiveCheckId(check_id)
      localStorage.setItem(STORAGE_KEY_CHECK_ID, check_id)
-      // Poll for results
+      // Poll for results (max 15 min = 300 polls x 3s)
      let attempts = 0
-      while (attempts < 120) {
+      while (attempts < 300) {
        await new Promise(r => setTimeout(r, 3000))
        const pollRes = await fetch(`/api/sdk/v1/agent/compliance-check?check_id=${check_id}`)
        if (!pollRes.ok) { attempts++; continue }
@@ -235,9 +235,9 @@ export function ComplianceCheckTab() {
        }
        attempts++
      }
-      if (attempts >= 120) {
+      if (attempts >= 300) {
        localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
-        throw new Error('Zeitlimit ueberschritten')
+        throw new Error('Zeitlimit ueberschritten (15 Min)')
      }
    } catch (e) {
      setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
@@ -24,32 +24,25 @@ async def verify_failed_checks(
    failed_checks: list[dict],
    doc_title: str,
 ) -> dict[str, dict]:
-    """Verify regex FAIL results using LLM.
+    """Verify regex FAIL results using LLM — single batched call.
-    For each failed check, asks the LLM a binary YES/NO question.
+    Sends ALL failed checks in one LLM prompt instead of one call per check.
    Returns a dict mapping check_id -> {"overturned": bool, "evidence": str}.
    Only checks with a "hint" field are verified (hints contain the
    natural-language question the LLM can answer).
    """
    results: dict[str, dict] = {}
-    if not failed_checks:
+    checks_with_hints = [c for c in failed_checks if c.get("hint")]
    if not checks_with_hints:
        return results
    # Truncate text to fit context window
    text_excerpt = text[:8000]
-    for check in failed_checks:
+    try:
-        check_id = check.get("id", "")
+        batch_results = await _ask_llm_batch(
-        label = check.get("label", "")
+            text_excerpt, checks_with_hints, doc_title,
-        hint = check.get("hint", "")
+        )
-
+        for check_id, answer in batch_results.items():
        if not hint:
            continue
        try:
            answer = await _ask_llm(text_excerpt, label, hint, doc_title)
            overturned = answer.get("found", False)
            results[check_id] = {
                "overturned": overturned,
@@ -58,71 +51,88 @@ async def verify_failed_checks(
            if overturned:
                logger.info(
                    "LLM overturned regex FAIL for '%s' in '%s': %s",
-                    label, doc_title, answer.get("evidence", "")[:80],
+                    check_id, doc_title, answer.get("evidence", "")[:80],
                )
-        except Exception as e:
+    except Exception as e:
-            logger.warning("LLM verify failed for '%s': %s", label, e)
+        logger.warning("LLM batch verify failed for '%s': %s", doc_title, e)
    return results
-async def _ask_llm(
+async def _ask_llm_batch(
-    text: str, check_label: str, hint: str, doc_title: str,
+    text: str, checks: list[dict], doc_title: str,
-) -> dict:
+) -> dict[str, dict]:
-    """Ask the LLM a binary verification question."""
+    """Ask the LLM to verify ALL failed checks in a single call."""
-    prompt = f"""/no_think
+    checklist_lines = []
-Pruefe ob der folgende Dokumenttext die Anforderung erfuellt.
+    for i, c in enumerate(checks, 1):
        checklist_lines.append(
            f'{i}. ID="{c["id"]}" | {c["label"]} | {c.get("hint", "")[:120]}'
        )
    checklist_str = "\n".join(checklist_lines)
-ANFORDERUNG: {check_label}
+    prompt = f"""/no_think
-DETAILS: {hint}
+Pruefe ob der Dokumenttext die folgenden Anforderungen erfuellt.
 DOKUMENT: "{doc_title}"
 ANFORDERUNGEN:
 {checklist_str}
 TEXT:
 {text}
-Antworte NUR mit einem JSON-Objekt (keine Erklaerung):
+Antworte NUR mit einem JSON-Array (keine Erklaerung). Fuer jede Anforderung:
-{{"found": true/false, "evidence": "Zitat aus dem Text das die Anforderung belegt (max 100 Zeichen), oder leer wenn nicht gefunden"}}
+[{{"id": "check-id", "found": true/false, "evidence": "Kurzes Zitat (max 80 Zeichen) oder leer"}}]
 """
-    async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+    async with httpx.AsyncClient(timeout=90.0) as client:
        resp = await client.post(
            f"{OLLAMA_URL}/api/generate",
            json={
                "model": OLLAMA_MODEL,
                "prompt": prompt,
                "stream": False,
-                "options": {"temperature": 0.0, "num_predict": 200},
+                "options": {"temperature": 0.0, "num_predict": 2000},
            },
        )
        resp.raise_for_status()
        raw = resp.json().get("response", "")
-    return _parse_llm_response(raw)
+    return _parse_batch_response(raw, checks)
-def _parse_llm_response(raw: str) -> dict:
+def _parse_batch_response(raw: str, checks: list[dict]) -> dict[str, dict]:
-    """Parse LLM JSON response with fallback extraction."""
+    """Parse batch LLM JSON array response."""
    import json
    import re
-    # Try direct JSON parse
+    results: dict[str, dict] = {}
    raw = raw.strip()
-    # Extract JSON from markdown code blocks
+
-    m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", raw, re.DOTALL)
+    # Extract JSON array from markdown code blocks
    m = re.search(r"```(?:json)?\s*(\[.*?\])\s*```", raw, re.DOTALL)
    if m:
        raw = m.group(1)
-    # Or just find the JSON object
+    else:
-    m = re.search(r"\{[^}]*\"found\"[^}]*\}", raw, re.DOTALL)
+        m = re.search(r"\[.*\]", raw, re.DOTALL)
-    if m:
+        if m:
-        raw = m.group(0)
+            raw = m.group(0)
    try:
-        data = json.loads(raw)
+        items = json.loads(raw)
-        return {
+        if isinstance(items, list):
-            "found": bool(data.get("found", False)),
+            for item in items:
-            "evidence": str(data.get("evidence", ""))[:150],
+                cid = item.get("id", "")
-        }
+                if cid:
                    results[cid] = {
                        "found": bool(item.get("found", False)),
                        "evidence": str(item.get("evidence", ""))[:150],
                    }
    except (json.JSONDecodeError, ValueError):
-        # Fallback: look for "found": true/false
+        # Fallback: extract individual JSON objects
-        found = '"found": true' in raw.lower() or '"found":true' in raw.lower()
+        for m in re.finditer(r'\{[^}]*"id"\s*:\s*"([^"]+)"[^}]*"found"\s*:\s*(true|false)[^}]*\}', raw, re.DOTALL):
-        return {"found": found, "evidence": ""}
+            cid = m.group(1)
            found = m.group(2) == "true"
            results[cid] = {"found": found, "evidence": ""}
    logger.info("LLM batch: %d/%d checks parsed", len(results), len(checks))
    return results