fix(compliance-check): batch LLM verification + increase poll timeout
Build + Deploy / build-admin-compliance (push) Successful in 1m52s
Build + Deploy / build-backend-compliance (push) Successful in 18s
Build + Deploy / build-ai-sdk (push) Successful in 11s
Build + Deploy / build-developer-portal (push) Successful in 11s
Build + Deploy / build-tts (push) Successful in 12s
Build + Deploy / build-document-crawler (push) Successful in 14s
Build + Deploy / build-dsms-gateway (push) Successful in 10s
Build + Deploy / build-dsms-node (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m35s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 42s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 16s
Build + Deploy / trigger-orca (push) Successful in 2m24s
Build + Deploy / build-admin-compliance (push) Successful in 1m52s
Build + Deploy / build-backend-compliance (push) Successful in 18s
Build + Deploy / build-ai-sdk (push) Successful in 11s
Build + Deploy / build-developer-portal (push) Successful in 11s
Build + Deploy / build-tts (push) Successful in 12s
Build + Deploy / build-document-crawler (push) Successful in 14s
Build + Deploy / build-dsms-gateway (push) Successful in 10s
Build + Deploy / build-dsms-node (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m35s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 42s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 16s
Build + Deploy / trigger-orca (push) Successful in 2m24s
- LLM verify now sends ALL failed checks in one batched call instead of one Ollama call per check (80+ calls → 1 per document) - Increase frontend poll timeout from 6 min to 15 min Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -202,9 +202,9 @@ export function ComplianceCheckTab() {
|
|||||||
setActiveCheckId(check_id)
|
setActiveCheckId(check_id)
|
||||||
localStorage.setItem(STORAGE_KEY_CHECK_ID, check_id)
|
localStorage.setItem(STORAGE_KEY_CHECK_ID, check_id)
|
||||||
|
|
||||||
// Poll for results
|
// Poll for results (max 15 min = 300 polls x 3s)
|
||||||
let attempts = 0
|
let attempts = 0
|
||||||
while (attempts < 120) {
|
while (attempts < 300) {
|
||||||
await new Promise(r => setTimeout(r, 3000))
|
await new Promise(r => setTimeout(r, 3000))
|
||||||
const pollRes = await fetch(`/api/sdk/v1/agent/compliance-check?check_id=${check_id}`)
|
const pollRes = await fetch(`/api/sdk/v1/agent/compliance-check?check_id=${check_id}`)
|
||||||
if (!pollRes.ok) { attempts++; continue }
|
if (!pollRes.ok) { attempts++; continue }
|
||||||
@@ -235,9 +235,9 @@ export function ComplianceCheckTab() {
|
|||||||
}
|
}
|
||||||
attempts++
|
attempts++
|
||||||
}
|
}
|
||||||
if (attempts >= 120) {
|
if (attempts >= 300) {
|
||||||
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
||||||
throw new Error('Zeitlimit ueberschritten')
|
throw new Error('Zeitlimit ueberschritten (15 Min)')
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
|||||||
@@ -24,32 +24,25 @@ async def verify_failed_checks(
|
|||||||
failed_checks: list[dict],
|
failed_checks: list[dict],
|
||||||
doc_title: str,
|
doc_title: str,
|
||||||
) -> dict[str, dict]:
|
) -> dict[str, dict]:
|
||||||
"""Verify regex FAIL results using LLM.
|
"""Verify regex FAIL results using LLM — single batched call.
|
||||||
|
|
||||||
For each failed check, asks the LLM a binary YES/NO question.
|
Sends ALL failed checks in one LLM prompt instead of one call per check.
|
||||||
Returns a dict mapping check_id -> {"overturned": bool, "evidence": str}.
|
Returns a dict mapping check_id -> {"overturned": bool, "evidence": str}.
|
||||||
|
|
||||||
Only checks with a "hint" field are verified (hints contain the
|
|
||||||
natural-language question the LLM can answer).
|
|
||||||
"""
|
"""
|
||||||
results: dict[str, dict] = {}
|
results: dict[str, dict] = {}
|
||||||
|
|
||||||
if not failed_checks:
|
checks_with_hints = [c for c in failed_checks if c.get("hint")]
|
||||||
|
if not checks_with_hints:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# Truncate text to fit context window
|
# Truncate text to fit context window
|
||||||
text_excerpt = text[:8000]
|
text_excerpt = text[:8000]
|
||||||
|
|
||||||
for check in failed_checks:
|
try:
|
||||||
check_id = check.get("id", "")
|
batch_results = await _ask_llm_batch(
|
||||||
label = check.get("label", "")
|
text_excerpt, checks_with_hints, doc_title,
|
||||||
hint = check.get("hint", "")
|
)
|
||||||
|
for check_id, answer in batch_results.items():
|
||||||
if not hint:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
answer = await _ask_llm(text_excerpt, label, hint, doc_title)
|
|
||||||
overturned = answer.get("found", False)
|
overturned = answer.get("found", False)
|
||||||
results[check_id] = {
|
results[check_id] = {
|
||||||
"overturned": overturned,
|
"overturned": overturned,
|
||||||
@@ -58,71 +51,88 @@ async def verify_failed_checks(
|
|||||||
if overturned:
|
if overturned:
|
||||||
logger.info(
|
logger.info(
|
||||||
"LLM overturned regex FAIL for '%s' in '%s': %s",
|
"LLM overturned regex FAIL for '%s' in '%s': %s",
|
||||||
label, doc_title, answer.get("evidence", "")[:80],
|
check_id, doc_title, answer.get("evidence", "")[:80],
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("LLM verify failed for '%s': %s", label, e)
|
logger.warning("LLM batch verify failed for '%s': %s", doc_title, e)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
async def _ask_llm(
|
async def _ask_llm_batch(
|
||||||
text: str, check_label: str, hint: str, doc_title: str,
|
text: str, checks: list[dict], doc_title: str,
|
||||||
) -> dict:
|
) -> dict[str, dict]:
|
||||||
"""Ask the LLM a binary verification question."""
|
"""Ask the LLM to verify ALL failed checks in a single call."""
|
||||||
prompt = f"""/no_think
|
checklist_lines = []
|
||||||
Pruefe ob der folgende Dokumenttext die Anforderung erfuellt.
|
for i, c in enumerate(checks, 1):
|
||||||
|
checklist_lines.append(
|
||||||
|
f'{i}. ID="{c["id"]}" | {c["label"]} | {c.get("hint", "")[:120]}'
|
||||||
|
)
|
||||||
|
checklist_str = "\n".join(checklist_lines)
|
||||||
|
|
||||||
ANFORDERUNG: {check_label}
|
prompt = f"""/no_think
|
||||||
DETAILS: {hint}
|
Pruefe ob der Dokumenttext die folgenden Anforderungen erfuellt.
|
||||||
DOKUMENT: "{doc_title}"
|
DOKUMENT: "{doc_title}"
|
||||||
|
|
||||||
|
ANFORDERUNGEN:
|
||||||
|
{checklist_str}
|
||||||
|
|
||||||
TEXT:
|
TEXT:
|
||||||
{text}
|
{text}
|
||||||
|
|
||||||
Antworte NUR mit einem JSON-Objekt (keine Erklaerung):
|
Antworte NUR mit einem JSON-Array (keine Erklaerung). Fuer jede Anforderung:
|
||||||
{{"found": true/false, "evidence": "Zitat aus dem Text das die Anforderung belegt (max 100 Zeichen), oder leer wenn nicht gefunden"}}
|
[{{"id": "check-id", "found": true/false, "evidence": "Kurzes Zitat (max 80 Zeichen) oder leer"}}]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
|
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{OLLAMA_URL}/api/generate",
|
f"{OLLAMA_URL}/api/generate",
|
||||||
json={
|
json={
|
||||||
"model": OLLAMA_MODEL,
|
"model": OLLAMA_MODEL,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"options": {"temperature": 0.0, "num_predict": 200},
|
"options": {"temperature": 0.0, "num_predict": 2000},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
raw = resp.json().get("response", "")
|
raw = resp.json().get("response", "")
|
||||||
|
|
||||||
return _parse_llm_response(raw)
|
return _parse_batch_response(raw, checks)
|
||||||
|
|
||||||
|
|
||||||
def _parse_llm_response(raw: str) -> dict:
|
def _parse_batch_response(raw: str, checks: list[dict]) -> dict[str, dict]:
|
||||||
"""Parse LLM JSON response with fallback extraction."""
|
"""Parse batch LLM JSON array response."""
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Try direct JSON parse
|
results: dict[str, dict] = {}
|
||||||
raw = raw.strip()
|
raw = raw.strip()
|
||||||
# Extract JSON from markdown code blocks
|
|
||||||
m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", raw, re.DOTALL)
|
# Extract JSON array from markdown code blocks
|
||||||
|
m = re.search(r"```(?:json)?\s*(\[.*?\])\s*```", raw, re.DOTALL)
|
||||||
if m:
|
if m:
|
||||||
raw = m.group(1)
|
raw = m.group(1)
|
||||||
# Or just find the JSON object
|
else:
|
||||||
m = re.search(r"\{[^}]*\"found\"[^}]*\}", raw, re.DOTALL)
|
m = re.search(r"\[.*\]", raw, re.DOTALL)
|
||||||
if m:
|
if m:
|
||||||
raw = m.group(0)
|
raw = m.group(0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(raw)
|
items = json.loads(raw)
|
||||||
return {
|
if isinstance(items, list):
|
||||||
"found": bool(data.get("found", False)),
|
for item in items:
|
||||||
"evidence": str(data.get("evidence", ""))[:150],
|
cid = item.get("id", "")
|
||||||
}
|
if cid:
|
||||||
|
results[cid] = {
|
||||||
|
"found": bool(item.get("found", False)),
|
||||||
|
"evidence": str(item.get("evidence", ""))[:150],
|
||||||
|
}
|
||||||
except (json.JSONDecodeError, ValueError):
|
except (json.JSONDecodeError, ValueError):
|
||||||
# Fallback: look for "found": true/false
|
# Fallback: extract individual JSON objects
|
||||||
found = '"found": true' in raw.lower() or '"found":true' in raw.lower()
|
for m in re.finditer(r'\{[^}]*"id"\s*:\s*"([^"]+)"[^}]*"found"\s*:\s*(true|false)[^}]*\}', raw, re.DOTALL):
|
||||||
return {"found": found, "evidence": ""}
|
cid = m.group(1)
|
||||||
|
found = m.group(2) == "true"
|
||||||
|
results[cid] = {"found": found, "evidence": ""}
|
||||||
|
|
||||||
|
logger.info("LLM batch: %d/%d checks parsed", len(results), len(checks))
|
||||||
|
return results
|
||||||
|
|||||||
Reference in New Issue
Block a user