feat(multi-layer): complete Multi-Layer Control Architecture (Phases 1-8 + Pass 0)
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 47s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 24s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 47s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 24s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Implements the full Multi-Layer Control Architecture for migrating ~25,000 Rich Controls into atomic, deduplicated Master Controls with full traceability. Architecture: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance New services: - ObligationExtractor: 3-tier extraction (exact → embedding → LLM) - PatternMatcher: 2-tier matching (keyword + embedding + domain-bonus) - ControlComposer: Pattern + Obligation → Master Control - PipelineAdapter: Pipeline integration + Migration Passes 1-5 - DecompositionPass: Pass 0a/0b — Rich Control → atomic Controls - CrosswalkRoutes: 15 API endpoints under /v1/canonical/ New DB schema: - Migration 060: obligation_extractions, control_patterns, crosswalk_matrix - Migration 061: obligation_candidates, parent_control_uuid tracking Pattern Library: 50 YAML patterns (30 core + 20 IT-security) Go SDK: Pattern loader with YAML validation and indexing Documentation: MkDocs updated with full architecture overview 500 Python tests passing across all components. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
562
backend-compliance/compliance/services/obligation_extractor.py
Normal file
562
backend-compliance/compliance/services/obligation_extractor.py
Normal file
@@ -0,0 +1,562 @@
|
||||
"""Obligation Extractor — 3-Tier Chunk-to-Obligation Linking.
|
||||
|
||||
Maps RAG chunks to obligations from the v2 obligation framework using
|
||||
three tiers (fastest first):
|
||||
|
||||
Tier 1: EXACT MATCH — regulation_code + article → obligation_id (~40%)
|
||||
Tier 2: EMBEDDING — chunk text vs. obligation descriptions (~30%)
|
||||
Tier 3: LLM EXTRACT — local Ollama extracts obligation text (~25%)
|
||||
|
||||
Part of the Multi-Layer Control Architecture (Phase 4 of 8).
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
|
||||
LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
|
||||
|
||||
# Embedding similarity thresholds for Tier 2
|
||||
EMBEDDING_MATCH_THRESHOLD = 0.80
|
||||
EMBEDDING_CANDIDATE_THRESHOLD = 0.60
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regulation code mapping: RAG chunk codes → obligation file regulation IDs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REGULATION_CODE_TO_ID = {
|
||||
# DSGVO
|
||||
"eu_2016_679": "dsgvo",
|
||||
"dsgvo": "dsgvo",
|
||||
"gdpr": "dsgvo",
|
||||
# AI Act
|
||||
"eu_2024_1689": "ai_act",
|
||||
"ai_act": "ai_act",
|
||||
"aiact": "ai_act",
|
||||
# NIS2
|
||||
"eu_2022_2555": "nis2",
|
||||
"nis2": "nis2",
|
||||
"bsig": "nis2",
|
||||
# BDSG
|
||||
"bdsg": "bdsg",
|
||||
# TTDSG
|
||||
"ttdsg": "ttdsg",
|
||||
# DSA
|
||||
"eu_2022_2065": "dsa",
|
||||
"dsa": "dsa",
|
||||
# Data Act
|
||||
"eu_2023_2854": "data_act",
|
||||
"data_act": "data_act",
|
||||
# EU Machinery
|
||||
"eu_2023_1230": "eu_machinery",
|
||||
"eu_machinery": "eu_machinery",
|
||||
# DORA
|
||||
"eu_2022_2554": "dora",
|
||||
"dora": "dora",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObligationMatch:
|
||||
"""Result of obligation extraction."""
|
||||
|
||||
obligation_id: Optional[str] = None
|
||||
obligation_title: Optional[str] = None
|
||||
obligation_text: Optional[str] = None
|
||||
method: str = "none" # exact_match | embedding_match | llm_extracted | inferred
|
||||
confidence: float = 0.0
|
||||
regulation_id: Optional[str] = None # e.g. "dsgvo"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"obligation_id": self.obligation_id,
|
||||
"obligation_title": self.obligation_title,
|
||||
"obligation_text": self.obligation_text,
|
||||
"method": self.method,
|
||||
"confidence": self.confidence,
|
||||
"regulation_id": self.regulation_id,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ObligationEntry:
|
||||
"""Internal representation of a loaded obligation."""
|
||||
|
||||
id: str
|
||||
title: str
|
||||
description: str
|
||||
regulation_id: str
|
||||
articles: list[str] = field(default_factory=list) # normalized: ["art. 30", "§ 38"]
|
||||
embedding: list[float] = field(default_factory=list)
|
||||
|
||||
|
||||
class ObligationExtractor:
|
||||
"""3-Tier obligation extraction from RAG chunks.
|
||||
|
||||
Usage::
|
||||
|
||||
extractor = ObligationExtractor()
|
||||
await extractor.initialize() # loads obligations + embeddings
|
||||
|
||||
match = await extractor.extract(
|
||||
chunk_text="...",
|
||||
regulation_code="eu_2016_679",
|
||||
article="Art. 30",
|
||||
paragraph="Abs. 1",
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._article_lookup: dict[str, list[str]] = {} # "dsgvo/art. 30" → ["DSGVO-OBL-001"]
|
||||
self._obligations: dict[str, _ObligationEntry] = {} # id → entry
|
||||
self._obligation_embeddings: list[list[float]] = []
|
||||
self._obligation_ids: list[str] = []
|
||||
self._initialized = False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Load all obligations from v2 JSON files and compute embeddings."""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self._load_obligations()
|
||||
await self._compute_embeddings()
|
||||
self._initialized = True
|
||||
logger.info(
|
||||
"ObligationExtractor initialized: %d obligations, %d article lookups, %d embeddings",
|
||||
len(self._obligations),
|
||||
len(self._article_lookup),
|
||||
sum(1 for e in self._obligation_embeddings if e),
|
||||
)
|
||||
|
||||
async def extract(
|
||||
self,
|
||||
chunk_text: str,
|
||||
regulation_code: str,
|
||||
article: Optional[str] = None,
|
||||
paragraph: Optional[str] = None,
|
||||
) -> ObligationMatch:
|
||||
"""Extract obligation from a chunk using 3-tier strategy."""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
reg_id = _normalize_regulation(regulation_code)
|
||||
|
||||
# Tier 1: Exact match via article lookup
|
||||
if article:
|
||||
match = self._tier1_exact(reg_id, article)
|
||||
if match:
|
||||
return match
|
||||
|
||||
# Tier 2: Embedding similarity
|
||||
match = await self._tier2_embedding(chunk_text, reg_id)
|
||||
if match:
|
||||
return match
|
||||
|
||||
# Tier 3: LLM extraction
|
||||
match = await self._tier3_llm(chunk_text, regulation_code, article)
|
||||
return match
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 1: Exact Match
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _tier1_exact(self, reg_id: Optional[str], article: str) -> Optional[ObligationMatch]:
|
||||
"""Look up obligation by regulation + article."""
|
||||
if not reg_id:
|
||||
return None
|
||||
|
||||
norm_article = _normalize_article(article)
|
||||
key = f"{reg_id}/{norm_article}"
|
||||
|
||||
obl_ids = self._article_lookup.get(key)
|
||||
if not obl_ids:
|
||||
return None
|
||||
|
||||
# Take the first match (highest priority)
|
||||
obl_id = obl_ids[0]
|
||||
entry = self._obligations.get(obl_id)
|
||||
if not entry:
|
||||
return None
|
||||
|
||||
return ObligationMatch(
|
||||
obligation_id=entry.id,
|
||||
obligation_title=entry.title,
|
||||
obligation_text=entry.description,
|
||||
method="exact_match",
|
||||
confidence=1.0,
|
||||
regulation_id=reg_id,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 2: Embedding Match
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _tier2_embedding(
|
||||
self, chunk_text: str, reg_id: Optional[str]
|
||||
) -> Optional[ObligationMatch]:
|
||||
"""Find nearest obligation by embedding similarity."""
|
||||
if not self._obligation_embeddings:
|
||||
return None
|
||||
|
||||
chunk_embedding = await _get_embedding(chunk_text[:2000])
|
||||
if not chunk_embedding:
|
||||
return None
|
||||
|
||||
best_idx = -1
|
||||
best_score = 0.0
|
||||
|
||||
for i, obl_emb in enumerate(self._obligation_embeddings):
|
||||
if not obl_emb:
|
||||
continue
|
||||
# Prefer same-regulation matches
|
||||
obl_id = self._obligation_ids[i]
|
||||
entry = self._obligations.get(obl_id)
|
||||
score = _cosine_sim(chunk_embedding, obl_emb)
|
||||
|
||||
# Domain bonus: +0.05 if same regulation
|
||||
if entry and reg_id and entry.regulation_id == reg_id:
|
||||
score += 0.05
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_idx = i
|
||||
|
||||
if best_idx < 0:
|
||||
return None
|
||||
|
||||
# Remove domain bonus for threshold comparison
|
||||
raw_score = best_score
|
||||
obl_id = self._obligation_ids[best_idx]
|
||||
entry = self._obligations.get(obl_id)
|
||||
if entry and reg_id and entry.regulation_id == reg_id:
|
||||
raw_score -= 0.05
|
||||
|
||||
if raw_score >= EMBEDDING_MATCH_THRESHOLD:
|
||||
return ObligationMatch(
|
||||
obligation_id=entry.id if entry else obl_id,
|
||||
obligation_title=entry.title if entry else None,
|
||||
obligation_text=entry.description if entry else None,
|
||||
method="embedding_match",
|
||||
confidence=round(min(raw_score, 1.0), 3),
|
||||
regulation_id=entry.regulation_id if entry else reg_id,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 3: LLM Extraction
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _tier3_llm(
|
||||
self, chunk_text: str, regulation_code: str, article: Optional[str]
|
||||
) -> ObligationMatch:
|
||||
"""Use local LLM to extract the obligation from the chunk."""
|
||||
prompt = f"""Analysiere den folgenden Gesetzestext und extrahiere die zentrale rechtliche Pflicht.
|
||||
|
||||
Text:
|
||||
{chunk_text[:3000]}
|
||||
|
||||
Quelle: {regulation_code} {article or ''}
|
||||
|
||||
Antworte NUR als JSON:
|
||||
{{
|
||||
"obligation_text": "Die zentrale Pflicht in einem Satz",
|
||||
"actor": "Wer muss handeln (z.B. Verantwortlicher, Auftragsverarbeiter)",
|
||||
"action": "Was muss getan werden",
|
||||
"normative_strength": "muss|soll|kann"
|
||||
}}"""
|
||||
|
||||
system_prompt = (
|
||||
"Du bist ein Rechtsexperte fuer EU-Datenschutz- und Digitalrecht. "
|
||||
"Extrahiere die zentrale rechtliche Pflicht aus Gesetzestexten. "
|
||||
"Antworte ausschliesslich als JSON."
|
||||
)
|
||||
|
||||
result_text = await _llm_ollama(prompt, system_prompt)
|
||||
if not result_text:
|
||||
return ObligationMatch(
|
||||
method="llm_extracted",
|
||||
confidence=0.0,
|
||||
regulation_id=_normalize_regulation(regulation_code),
|
||||
)
|
||||
|
||||
parsed = _parse_json(result_text)
|
||||
obligation_text = parsed.get("obligation_text", result_text[:500])
|
||||
|
||||
return ObligationMatch(
|
||||
obligation_id=None,
|
||||
obligation_title=None,
|
||||
obligation_text=obligation_text,
|
||||
method="llm_extracted",
|
||||
confidence=0.60,
|
||||
regulation_id=_normalize_regulation(regulation_code),
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Initialization helpers
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _load_obligations(self) -> None:
|
||||
"""Load all obligation files from v2 framework."""
|
||||
v2_dir = _find_obligations_dir()
|
||||
if not v2_dir:
|
||||
logger.warning("Obligations v2 directory not found — Tier 1 disabled")
|
||||
return
|
||||
|
||||
manifest_path = v2_dir / "_manifest.json"
|
||||
if not manifest_path.exists():
|
||||
logger.warning("Manifest not found at %s", manifest_path)
|
||||
return
|
||||
|
||||
with open(manifest_path) as f:
|
||||
manifest = json.load(f)
|
||||
|
||||
for reg_info in manifest.get("regulations", []):
|
||||
reg_id = reg_info["id"]
|
||||
reg_file = v2_dir / reg_info["file"]
|
||||
if not reg_file.exists():
|
||||
logger.warning("Regulation file not found: %s", reg_file)
|
||||
continue
|
||||
|
||||
with open(reg_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
for obl in data.get("obligations", []):
|
||||
obl_id = obl["id"]
|
||||
entry = _ObligationEntry(
|
||||
id=obl_id,
|
||||
title=obl.get("title", ""),
|
||||
description=obl.get("description", ""),
|
||||
regulation_id=reg_id,
|
||||
)
|
||||
|
||||
# Build article lookup from legal_basis
|
||||
for basis in obl.get("legal_basis", []):
|
||||
article_raw = basis.get("article", "")
|
||||
if article_raw:
|
||||
norm_art = _normalize_article(article_raw)
|
||||
key = f"{reg_id}/{norm_art}"
|
||||
if key not in self._article_lookup:
|
||||
self._article_lookup[key] = []
|
||||
self._article_lookup[key].append(obl_id)
|
||||
entry.articles.append(norm_art)
|
||||
|
||||
self._obligations[obl_id] = entry
|
||||
|
||||
logger.info(
|
||||
"Loaded %d obligations from %d regulations",
|
||||
len(self._obligations),
|
||||
len(manifest.get("regulations", [])),
|
||||
)
|
||||
|
||||
async def _compute_embeddings(self) -> None:
|
||||
"""Compute embeddings for all obligation descriptions."""
|
||||
if not self._obligations:
|
||||
return
|
||||
|
||||
self._obligation_ids = list(self._obligations.keys())
|
||||
texts = [
|
||||
f"{self._obligations[oid].title}: {self._obligations[oid].description}"
|
||||
for oid in self._obligation_ids
|
||||
]
|
||||
|
||||
logger.info("Computing embeddings for %d obligations...", len(texts))
|
||||
self._obligation_embeddings = await _get_embeddings_batch(texts)
|
||||
valid = sum(1 for e in self._obligation_embeddings if e)
|
||||
logger.info("Got %d/%d valid embeddings", valid, len(texts))
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Stats
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Return initialization statistics."""
|
||||
return {
|
||||
"total_obligations": len(self._obligations),
|
||||
"article_lookups": len(self._article_lookup),
|
||||
"embeddings_valid": sum(1 for e in self._obligation_embeddings if e),
|
||||
"regulations": list(
|
||||
{e.regulation_id for e in self._obligations.values()}
|
||||
),
|
||||
"initialized": self._initialized,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level helpers (reusable by other modules)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _normalize_regulation(regulation_code: str) -> Optional[str]:
|
||||
"""Map a RAG regulation_code to obligation framework regulation ID."""
|
||||
if not regulation_code:
|
||||
return None
|
||||
code = regulation_code.lower().strip()
|
||||
|
||||
# Direct lookup
|
||||
if code in _REGULATION_CODE_TO_ID:
|
||||
return _REGULATION_CODE_TO_ID[code]
|
||||
|
||||
# Prefix matching for families
|
||||
for prefix, reg_id in [
|
||||
("eu_2016_679", "dsgvo"),
|
||||
("eu_2024_1689", "ai_act"),
|
||||
("eu_2022_2555", "nis2"),
|
||||
("eu_2022_2065", "dsa"),
|
||||
("eu_2023_2854", "data_act"),
|
||||
("eu_2023_1230", "eu_machinery"),
|
||||
("eu_2022_2554", "dora"),
|
||||
]:
|
||||
if code.startswith(prefix):
|
||||
return reg_id
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_article(article: str) -> str:
|
||||
"""Normalize article references for consistent lookup.
|
||||
|
||||
Examples:
|
||||
"Art. 30" → "art. 30"
|
||||
"§ 38 BDSG" → "§ 38"
|
||||
"Article 10" → "art. 10"
|
||||
"Art. 30 Abs. 1" → "art. 30"
|
||||
"Artikel 35" → "art. 35"
|
||||
"""
|
||||
if not article:
|
||||
return ""
|
||||
s = article.strip()
|
||||
|
||||
# Remove trailing law name: "§ 38 BDSG" → "§ 38"
|
||||
s = re.sub(r"\s+(DSGVO|BDSG|TTDSG|DSA|NIS2|DORA|AI.?Act)\s*$", "", s, flags=re.IGNORECASE)
|
||||
|
||||
# Remove paragraph references: "Art. 30 Abs. 1" → "Art. 30"
|
||||
s = re.sub(r"\s+(Abs|Absatz|para|paragraph|lit|Satz)\.?\s+.*$", "", s, flags=re.IGNORECASE)
|
||||
|
||||
# Normalize "Article" / "Artikel" → "Art."
|
||||
s = re.sub(r"^(Article|Artikel)\s+", "Art. ", s, flags=re.IGNORECASE)
|
||||
|
||||
return s.lower().strip()
|
||||
|
||||
|
||||
def _cosine_sim(a: list[float], b: list[float]) -> float:
|
||||
"""Compute cosine similarity between two vectors."""
|
||||
if not a or not b or len(a) != len(b):
|
||||
return 0.0
|
||||
dot = sum(x * y for x, y in zip(a, b))
|
||||
norm_a = sum(x * x for x in a) ** 0.5
|
||||
norm_b = sum(x * x for x in b) ** 0.5
|
||||
if norm_a == 0 or norm_b == 0:
|
||||
return 0.0
|
||||
return dot / (norm_a * norm_b)
|
||||
|
||||
|
||||
def _find_obligations_dir() -> Optional[Path]:
|
||||
"""Locate the obligations v2 directory."""
|
||||
candidates = [
|
||||
Path(__file__).resolve().parent.parent.parent.parent
|
||||
/ "ai-compliance-sdk" / "policies" / "obligations" / "v2",
|
||||
Path("/app/ai-compliance-sdk/policies/obligations/v2"),
|
||||
Path("ai-compliance-sdk/policies/obligations/v2"),
|
||||
]
|
||||
for p in candidates:
|
||||
if p.is_dir() and (p / "_manifest.json").exists():
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
async def _get_embedding(text: str) -> list[float]:
|
||||
"""Get embedding vector for a single text."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{EMBEDDING_URL}/embed",
|
||||
json={"texts": [text]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
embeddings = resp.json().get("embeddings", [])
|
||||
return embeddings[0] if embeddings else []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
async def _get_embeddings_batch(
|
||||
texts: list[str], batch_size: int = 32
|
||||
) -> list[list[float]]:
|
||||
"""Get embeddings for multiple texts in batches."""
|
||||
all_embeddings: list[list[float]] = []
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(
|
||||
f"{EMBEDDING_URL}/embed",
|
||||
json={"texts": batch},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
embeddings = resp.json().get("embeddings", [])
|
||||
all_embeddings.extend(embeddings)
|
||||
except Exception as e:
|
||||
logger.warning("Batch embedding failed for %d texts: %s", len(batch), e)
|
||||
all_embeddings.extend([[] for _ in batch])
|
||||
return all_embeddings
|
||||
|
||||
|
||||
async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
|
||||
"""Call local Ollama for LLM extraction."""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_MODEL,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"options": {"num_predict": 512},
|
||||
"think": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
|
||||
resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
"Ollama chat failed %d: %s", resp.status_code, resp.text[:300]
|
||||
)
|
||||
return ""
|
||||
data = resp.json()
|
||||
return data.get("message", {}).get("content", "")
|
||||
except Exception as e:
|
||||
logger.warning("Ollama call failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _parse_json(text: str) -> dict:
|
||||
"""Extract JSON from LLM response text."""
|
||||
# Try direct parse
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try extracting JSON block
|
||||
match = re.search(r"\{[^{}]*\}", text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
return {}
|
||||
Reference in New Issue
Block a user