38a347a82a
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 9s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 24s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m11s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 24s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
AGB v2 (decision_method routing, 71%FP->~0) + DSE v3 (4-layer, recovered from container) + Architektur-Tab into /sdk/agent live path. Incl CI robustness (detect-changes.sh + PR-head checkout) + security (hardcoded Qdrant key removed, gitleaks allowlist). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
74 lines
2.8 KiB
Python
74 lines
2.8 KiB
Python
"""CONTENT/CONTRACTUAL-Pruefer / decision_method=LLM.
|
|
|
|
present/absent ueber die LLM-Kaskade (`call_with_cascade`; prod: OVH-120b zuerst).
|
|
Retrieval = GANZE Paragraph-Abschnitte zum Topic (nicht Top-k-Chunks — das war in
|
|
der AGB-Validierung der Schluessel). KEIN DEFECT — Korrektheits-/Defekt-Pruefung
|
|
ist ein separater Modus. present=None bei Fehler (fail-safe: Aufrufer behaelt
|
|
Keyword-Ergebnis). (Validiert an AGB delivery/warranty.)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
|
|
from .base import CheckResult, ControlSpec, DocContext, VerificationMethod
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_SECTION = re.compile(r"(?m)(?=^\s*(?:§\s*)?\d+[\.\)]\s)")
|
|
_SYS = (
|
|
"Du bist deutscher Compliance-Rechtsexperte. Entscheide, ob die genannte "
|
|
"Pflicht in den vorgelegten Abschnitten vorhanden ist. NUR die Abschnitte "
|
|
'zaehlen. Antworte NUR JSON: {"verdict":"ERFUELLT|FEHLT","zitat":"woertlich '
|
|
'oder leer","begruendung":"1 Satz"}.'
|
|
)
|
|
|
|
|
|
def _sections(text: str) -> list[str]:
|
|
return [s.strip() for s in _SECTION.split(text) if s.strip()]
|
|
|
|
|
|
def _parse(txt: str) -> dict:
|
|
out = (txt or "").strip()
|
|
if out.startswith("```"):
|
|
out = out.split("```", 2)[1]
|
|
out = out[4:] if out.startswith("json") else out
|
|
a, b = out.find("{"), out.rfind("}")
|
|
return json.loads(out[a:b + 1] if 0 <= a < b else out)
|
|
|
|
|
|
class LLMChecker:
|
|
verification_method = VerificationMethod.CONTENT
|
|
|
|
async def check(self, ctrl: ControlSpec, doc: DocContext) -> CheckResult:
|
|
text = doc.text or ""
|
|
if len(text) < 50:
|
|
return CheckResult(present=None, source="llm")
|
|
secs = _sections(text)
|
|
if ctrl.topic_regex:
|
|
rel = [s for s in secs if re.search(ctrl.topic_regex, s, re.I)][:6] or secs[:6]
|
|
else:
|
|
rel = secs[:6]
|
|
question = ctrl.question or f"Ist die Pflicht '{ctrl.label}' im Text vorhanden?"
|
|
try:
|
|
from compliance.services.llm_cascade import call_with_cascade
|
|
r = await call_with_cascade(
|
|
_SYS,
|
|
json.dumps({"frage": question, "abschnitte": rel}, ensure_ascii=False),
|
|
min_confidence=0.6, max_tokens=500,
|
|
)
|
|
obj = _parse(r.get("text"))
|
|
verdict = obj.get("verdict")
|
|
zitat = (obj.get("zitat") or "")[:120]
|
|
if verdict not in ("ERFUELLT", "FEHLT"):
|
|
return CheckResult(present=None, evidence=zitat, source=r.get("source", "?"))
|
|
return CheckResult(
|
|
present=verdict == "ERFUELLT", evidence=zitat,
|
|
confidence=float(r.get("confidence") or 0.0),
|
|
source=r.get("source", "llm"),
|
|
)
|
|
except Exception as e:
|
|
logger.info("llm checker fail %s: %s", ctrl.control_id, str(e)[:80])
|
|
return CheckResult(present=None, source="error")
|