fix(cra): prod hang-guard /readiness machinery + robuster Datenblatt-JSON-Parse
CI / detect-changes (push) Successful in 19s
CI / guardrail-integrity (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 10s
CI / validate-canonical-controls (push) Successful in 9s
CI / loc-budget (push) Successful in 22s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 32s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

#1 _machinery_obligations: SET statement_timeout=4s + run_in_threadpool — auf
   prod hing die maschinen-Query ~30s (langsame/unindizierte DB nach DB-Swap)
   und blockierte den async-Worker. Jetzt: bei Langsamkeit graceful 'keine
   Maschinen-Pflichten' statt Hang. (Fehlender prod-Index = Controls/DB-Session.)
#2 parse_grenzen_json: tolerant ggue. ```json-Fences / Prosa-umschlossenem JSON
   (gehostete Modelle wie OVH ignorieren z.T. response_format) → Datenblatt-
   Extraktion liefert auch ueber den OVH-Fallback Felder.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-17 07:39:39 +02:00
parent 9e2655bfef
commit fda94afd5f
3 changed files with 40 additions and 4 deletions
@@ -13,6 +13,8 @@ from typing import Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy import text
from starlette.concurrency import run_in_threadpool
from compliance.services.cra_finding_mapper import assess_findings_payload from compliance.services.cra_finding_mapper import assess_findings_payload
from compliance.services.cra_applicability import ( from compliance.services.cra_applicability import (
@@ -237,6 +239,10 @@ def _machinery_obligations(limit_per: int = 4) -> list:
out = [] out = []
db = SessionLocal() db = SessionLocal()
try: try:
# Bound the query: on a slow/unindexed prod DB this used to hang ~30s and
# block the worker. Cap at 4s → on timeout the queries raise, we degrade
# to "no machinery obligations" (best-effort enrichment, not core).
db.execute(text("SET statement_timeout = '4000'"))
svc = UseCaseControlsService(db) svc = UseCaseControlsService(db)
for sub_topic, bucket in _MACHINERY_SUBTOPICS: for sub_topic, bucket in _MACHINERY_SUBTOPICS:
try: try:
@@ -291,7 +297,7 @@ async def readiness(body: ReadinessRequest):
# — keep them in their OWN section, not mixed into the Code/Process/Document # — keep them in their OWN section, not mixed into the Code/Process/Document
# cyber buckets (machine safety != cybersecurity). # cyber buckets (machine safety != cybersecurity).
if body.is_machinery or machine_integrator: if body.is_machinery or machine_integrator:
machinery = _machinery_obligations() machinery = await run_in_threadpool(_machinery_obligations)
if machinery: if machinery:
regulations.append("Maschinen-VO 2023/1230") regulations.append("Maschinen-VO 2023/1230")
machinery_guideline = [item for _bucket, item in machinery] machinery_guideline = [item for _bucket, item in machinery]
@@ -91,11 +91,33 @@ def _system_prompt() -> str:
) )
def _coerce_json(raw: str):
"""Tolerant JSON load: handle ```json fences / surrounding prose (some hosted
models, e.g. OVH, ignore response_format and wrap the object)."""
s = (raw or "").strip()
try:
return json.loads(s)
except (json.JSONDecodeError, TypeError):
pass
if "```" in s:
parts = s.split("```")
if len(parts) > 1:
s = parts[1].lstrip()
if s[:4].lower() == "json":
s = s[4:]
i, j = s.find("{"), s.rfind("}")
if i != -1 and j > i:
try:
return json.loads(s[i:j + 1])
except (json.JSONDecodeError, TypeError):
return None
return None
def parse_grenzen_json(raw: str) -> dict: def parse_grenzen_json(raw: str) -> dict:
"""Parse the LLM response into {key: {value, source}} for known keys only.""" """Parse the LLM response into {key: {value, source}} for known keys only."""
try: data = _coerce_json(raw)
data = json.loads(raw) if not isinstance(data, dict):
except (json.JSONDecodeError, TypeError):
return {} return {}
fields = data.get("fields") if isinstance(data, dict) else None fields = data.get("fields") if isinstance(data, dict) else None
if not isinstance(fields, dict): if not isinstance(fields, dict):
@@ -48,6 +48,14 @@ class TestParse:
assert parse_grenzen_json("not json") == {} assert parse_grenzen_json("not json") == {}
assert parse_grenzen_json("") == {} assert parse_grenzen_json("") == {}
def test_fenced_json(self):
raw = '```json\n{"fields": {"manufacturer": {"value": "OWIS", "source": "x"}}}\n```'
assert parse_grenzen_json(raw)["manufacturer"]["value"] == "OWIS"
def test_prose_wrapped_json(self):
raw = 'Hier das Ergebnis:\n{"fields": {"machine_type": {"value": "Steuerung"}}}\nDanke.'
assert parse_grenzen_json(raw)["machine_type"]["value"] == "Steuerung"
class TestFollowups: class TestFollowups:
def test_empty_limits_asks_all_essentials(self): def test_empty_limits_asks_all_essentials(self):