diff --git a/admin-compliance/app/sdk/cra/_components/DatasheetExtract.tsx b/admin-compliance/app/sdk/cra/_components/DatasheetExtract.tsx index afd98aa5..2519456d 100644 --- a/admin-compliance/app/sdk/cra/_components/DatasheetExtract.tsx +++ b/admin-compliance/app/sdk/cra/_components/DatasheetExtract.tsx @@ -8,6 +8,7 @@ interface ExtractResult { limits: Record provenance: Record detected: { interfaces: string[]; units: string[] } + llm_status?: string filled: string[] missing: string[] followup: Followup[] @@ -73,6 +74,12 @@ export function DatasheetExtract() { {res && (
+ {res.llm_status === 'unavailable' && ( +
+ KI-Extraktion gerade nicht verfügbar (lokales Modell lädt oder offline). Unten stehen nur + deterministisch erkannte Werte — bitte „Grenzen extrahieren" erneut klicken oder Felder manuell ergänzen. +
+ )} {(res.detected.interfaces.length > 0 || res.detected.units.length > 0) && (
Deterministisch erkannt:{' '} diff --git a/backend-compliance/compliance/services/cra_datasheet_extractor.py b/backend-compliance/compliance/services/cra_datasheet_extractor.py index 2af71a10..917c7524 100644 --- a/backend-compliance/compliance/services/cra_datasheet_extractor.py +++ b/backend-compliance/compliance/services/cra_datasheet_extractor.py @@ -10,8 +10,15 @@ Pure + testable: detect_signals / parse_grenzen_json / compute_followups. The async extract_grenzen() wraps the LLM call (llm_cascade, same as vendor extractor). """ import json +import logging +import os import re -from typing import Optional + +logger = logging.getLogger(__name__) + +# Datasheet extraction uses the local 35B (same model as the Compliance Advisor) — +# higher-quality semantic mapping than the default cascade model. Env-overridable. +_DATASHEET_MODEL = os.getenv("CRA_DATASHEET_MODEL", "qwen3.5:35b-a3b") # IACE Grenzen field keys (must match admin LimitsFormData). label + whether it # is essential for a usable risk assessment (=> asked as follow-up if empty). @@ -150,6 +157,7 @@ async def extract_grenzen(text: str, max_chars: int = 20000) -> dict: signals = detect_signals(text or "") limits: dict = {} provenance: dict = {} + llm_status = "skipped" # skipped | ok | empty | unavailable excerpt = (text or "")[:max_chars] if len(excerpt) >= 200: try: @@ -157,20 +165,25 @@ async def extract_grenzen(text: str, max_chars: int = 20000) -> dict: res = await call_with_cascade( system=_system_prompt(), user=f"Datenblatt-Text:\n\n{excerpt}", - min_confidence=0.5, max_tokens=4000, + min_confidence=0.5, max_tokens=4000, model=_DATASHEET_MODEL, ) parsed = parse_grenzen_json(res.get("text", "") if isinstance(res, dict) else "") for key, entry in parsed.items(): limits[key] = entry["value"] provenance[key] = entry.get("source", "") - except Exception: - pass # extraction is best-effort; fall back to detector + follow-ups + llm_status = "ok" if parsed else "empty" + except Exception as e: + # best-effort: keep the deterministic facts, but surface the failure so + # a cold-start/timeout doesn't masquerade as "nothing on the datasheet". + logger.warning("datasheet LLM extraction failed: %s (%s)", e, type(e).__name__) + llm_status = "unavailable" _merge_detected(limits, provenance, signals) return { "limits": limits, "provenance": provenance, "detected": signals, + "llm_status": llm_status, "filled": sorted(limits.keys()), "missing": [k for k in _FIELD_KEYS if not (limits.get(k) or "").strip()], "followup": compute_followups(limits), diff --git a/backend-compliance/compliance/services/llm_cascade.py b/backend-compliance/compliance/services/llm_cascade.py index 6fd9d190..a5eba899 100644 --- a/backend-compliance/compliance/services/llm_cascade.py +++ b/backend-compliance/compliance/services/llm_cascade.py @@ -104,9 +104,10 @@ def _heuristic_confidence(response_text: str, input_len: int) -> float: async def _call_ollama(system: str, user: str, max_tokens: int = 6000, - timeout: float = 90.0) -> str: + timeout: float = 90.0, + model: str = "") -> str: base = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") - model = os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b") + model = model or os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b") payload = { "model": model, "stream": False, "format": "json", "messages": [{"role": "system", "content": system}, @@ -188,10 +189,11 @@ async def call_with_cascade( user: str, min_confidence: float = 0.6, max_tokens: int = 6000, + model: str = "", ) -> dict: """Returns {'text': str, 'confidence': float, 'source': str, - 'cached': bool}.""" - key = _cache_key(system, user) + 'cached': bool}. `model` overrides the local Tier-1 (Ollama) model only.""" + key = _cache_key(system, user, model) cached = _cache_get(key) if cached: cached["cached"] = True @@ -211,7 +213,7 @@ async def call_with_cascade( "or ANTHROPIC_API_KEY to enable fallbacks." ) # Tier 1: Qwen lokal - text = await _call_ollama(system, user, max_tokens=max_tokens) + text = await _call_ollama(system, user, max_tokens=max_tokens, model=model) conf = _heuristic_confidence(text, input_len) if text and conf >= min_confidence: out = {"text": text, "confidence": conf,