fix: Robust JSON parsing for LLM responses — handles unquoted keys, fallback extraction
LLM returns {fulfilled: true} instead of {"fulfilled": true}.
Now fixes unquoted keys, True→true, and falls back to text-based
boolean extraction when JSON parsing fails entirely.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -213,14 +213,43 @@ async def _verify_control_with_llm(
|
|||||||
# Strip think tags if present
|
# Strip think tags if present
|
||||||
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||||||
|
|
||||||
# Parse JSON response
|
# Parse JSON response — handle LLM quirks
|
||||||
import json
|
import json
|
||||||
# Find JSON in response
|
# Find JSON in response
|
||||||
json_match = re.search(r"\{[^{}]+\}", raw)
|
json_match = re.search(r"\{[^{}]+\}", raw)
|
||||||
if not json_match:
|
if not json_match:
|
||||||
return None
|
# Fallback: try to extract fulfilled/evidence from raw text
|
||||||
|
fulfilled = "true" in raw.lower()[:100] or "yes" in raw.lower()[:100] or "erfüllt" in raw.lower()[:100]
|
||||||
|
return {
|
||||||
|
"id": f"rag-{hash(control_text) % 10000}",
|
||||||
|
"label": f"{regulation}: {control_text[:80]}...",
|
||||||
|
"passed": fulfilled,
|
||||||
|
"severity": "LOW" if fulfilled else "MEDIUM",
|
||||||
|
"matched_text": raw[:100] if fulfilled else "",
|
||||||
|
"issue": "" if fulfilled else raw[:100],
|
||||||
|
"control_text": control_text[:200],
|
||||||
|
"regulation": regulation,
|
||||||
|
}
|
||||||
|
|
||||||
result = json.loads(json_match.group())
|
json_str = json_match.group()
|
||||||
|
# Fix common LLM JSON issues
|
||||||
|
json_str = re.sub(r'(?<=[{,])\s*(\w+)\s*:', r' "\1":', json_str) # Unquoted keys
|
||||||
|
json_str = json_str.replace("True", "true").replace("False", "false")
|
||||||
|
try:
|
||||||
|
result = json.loads(json_str)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Last resort: extract boolean from raw text
|
||||||
|
fulfilled = "true" in json_str.lower() or "fulfilled" in raw.lower()[:200]
|
||||||
|
return {
|
||||||
|
"id": f"rag-{hash(control_text) % 10000}",
|
||||||
|
"label": f"{regulation}: {control_text[:80]}...",
|
||||||
|
"passed": fulfilled,
|
||||||
|
"severity": "LOW" if fulfilled else "MEDIUM",
|
||||||
|
"matched_text": "",
|
||||||
|
"issue": "",
|
||||||
|
"control_text": control_text[:200],
|
||||||
|
"regulation": regulation,
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"id": f"rag-{hash(control_text) % 10000}",
|
"id": f"rag-{hash(control_text) % 10000}",
|
||||||
|
|||||||
Reference in New Issue
Block a user