f51671737a
Build + Deploy / build-admin-compliance (push) Failing after 48s
Build + Deploy / build-backend-compliance (push) Successful in 9s
Build + Deploy / build-ai-sdk (push) Successful in 8s
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Failing after 2m3s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 9s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 8s
Build + Deploy / build-dsms-gateway (push) Successful in 7s
Build + Deploy / build-dsms-node (push) Successful in 7s
CI / branch-name (push) Has been skipped
Build + Deploy / trigger-orca (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-go (push) Failing after 45s
CI / test-python-document-crawler (push) Successful in 34s
CI / test-python-dsms-gateway (push) Successful in 27s
CI / validate-canonical-controls (push) Successful in 15s
1. LLM model: qwen3:32b → qwen3.5:35b-a3b (actual model on Mac Mini)
2. Section splitter: headings MUST be preceded by a blank line.
This prevents cookie table entries ("Funktionale Cookies",
"Session Cookies") from splitting the cookie section.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
129 lines
3.7 KiB
Python
129 lines
3.7 KiB
Python
"""
|
|
LLM verification for regex check results.
|
|
|
|
When a regex check FAILs, the LLM re-checks the original text
|
|
to confirm or overturn the finding. This eliminates false positives
|
|
caused by regex limitations (unusual formatting, synonyms, etc.).
|
|
|
|
Uses the self-hosted Ollama endpoint (Qwen) for fast local inference.
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
|
OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3.5:35b-a3b")
|
|
TIMEOUT = 30.0
|
|
|
|
|
|
async def verify_failed_checks(
|
|
text: str,
|
|
failed_checks: list[dict],
|
|
doc_title: str,
|
|
) -> dict[str, dict]:
|
|
"""Verify regex FAIL results using LLM.
|
|
|
|
For each failed check, asks the LLM a binary YES/NO question.
|
|
Returns a dict mapping check_id -> {"overturned": bool, "evidence": str}.
|
|
|
|
Only checks with a "hint" field are verified (hints contain the
|
|
natural-language question the LLM can answer).
|
|
"""
|
|
results: dict[str, dict] = {}
|
|
|
|
if not failed_checks:
|
|
return results
|
|
|
|
# Truncate text to fit context window
|
|
text_excerpt = text[:8000]
|
|
|
|
for check in failed_checks:
|
|
check_id = check.get("id", "")
|
|
label = check.get("label", "")
|
|
hint = check.get("hint", "")
|
|
|
|
if not hint:
|
|
continue
|
|
|
|
try:
|
|
answer = await _ask_llm(text_excerpt, label, hint, doc_title)
|
|
overturned = answer.get("found", False)
|
|
results[check_id] = {
|
|
"overturned": overturned,
|
|
"evidence": answer.get("evidence", ""),
|
|
}
|
|
if overturned:
|
|
logger.info(
|
|
"LLM overturned regex FAIL for '%s' in '%s': %s",
|
|
label, doc_title, answer.get("evidence", "")[:80],
|
|
)
|
|
except Exception as e:
|
|
logger.warning("LLM verify failed for '%s': %s", label, e)
|
|
|
|
return results
|
|
|
|
|
|
async def _ask_llm(
|
|
text: str, check_label: str, hint: str, doc_title: str,
|
|
) -> dict:
|
|
"""Ask the LLM a binary verification question."""
|
|
prompt = f"""/no_think
|
|
Pruefe ob der folgende Dokumenttext die Anforderung erfuellt.
|
|
|
|
ANFORDERUNG: {check_label}
|
|
DETAILS: {hint}
|
|
DOKUMENT: "{doc_title}"
|
|
|
|
TEXT:
|
|
{text}
|
|
|
|
Antworte NUR mit einem JSON-Objekt (keine Erklaerung):
|
|
{{"found": true/false, "evidence": "Zitat aus dem Text das die Anforderung belegt (max 100 Zeichen), oder leer wenn nicht gefunden"}}
|
|
"""
|
|
|
|
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
|
|
resp = await client.post(
|
|
f"{OLLAMA_URL}/api/generate",
|
|
json={
|
|
"model": OLLAMA_MODEL,
|
|
"prompt": prompt,
|
|
"stream": False,
|
|
"options": {"temperature": 0.0, "num_predict": 200},
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
raw = resp.json().get("response", "")
|
|
|
|
return _parse_llm_response(raw)
|
|
|
|
|
|
def _parse_llm_response(raw: str) -> dict:
|
|
"""Parse LLM JSON response with fallback extraction."""
|
|
import json
|
|
import re
|
|
|
|
# Try direct JSON parse
|
|
raw = raw.strip()
|
|
# Extract JSON from markdown code blocks
|
|
m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", raw, re.DOTALL)
|
|
if m:
|
|
raw = m.group(1)
|
|
# Or just find the JSON object
|
|
m = re.search(r"\{[^}]*\"found\"[^}]*\}", raw, re.DOTALL)
|
|
if m:
|
|
raw = m.group(0)
|
|
|
|
try:
|
|
data = json.loads(raw)
|
|
return {
|
|
"found": bool(data.get("found", False)),
|
|
"evidence": str(data.get("evidence", ""))[:150],
|
|
}
|
|
except (json.JSONDecodeError, ValueError):
|
|
# Fallback: look for "found": true/false
|
|
found = '"found": true' in raw.lower() or '"found":true' in raw.lower()
|
|
return {"found": found, "evidence": ""}
|