feat(cra): Datenblatt-Extraktion auf lokales 35B + llm_status-Fix
llm_cascade additiv modell-faehig (optionaler model-Param, Cache-Key kennt model_hint → keine Kollision; Default unveraendert für alle anderen Nutzer). Datenblatt-Extraktor nutzt jetzt qwen3.5:35b-a3b (CRA_DATASHEET_MODEL, gleiches Modell wie der Compliance Advisor) für bessere semantische Zuordnung. Plus llm_status (ok|empty|unavailable) + Logging statt stillem except; Frontend zeigt bei 'unavailable' einen Hinweis statt leerer Felder (wichtig auf prod ohne lokales Ollama → Cascade-Fallback bzw. Hinweis). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ interface ExtractResult {
|
|||||||
limits: Record<string, string>
|
limits: Record<string, string>
|
||||||
provenance: Record<string, string>
|
provenance: Record<string, string>
|
||||||
detected: { interfaces: string[]; units: string[] }
|
detected: { interfaces: string[]; units: string[] }
|
||||||
|
llm_status?: string
|
||||||
filled: string[]
|
filled: string[]
|
||||||
missing: string[]
|
missing: string[]
|
||||||
followup: Followup[]
|
followup: Followup[]
|
||||||
@@ -73,6 +74,12 @@ export function DatasheetExtract() {
|
|||||||
|
|
||||||
{res && (
|
{res && (
|
||||||
<div className="mt-5 space-y-4">
|
<div className="mt-5 space-y-4">
|
||||||
|
{res.llm_status === 'unavailable' && (
|
||||||
|
<div className="rounded-lg border border-amber-300 bg-amber-50 dark:bg-amber-900/20 text-amber-900 dark:text-amber-200 p-3 text-xs">
|
||||||
|
KI-Extraktion gerade nicht verfügbar (lokales Modell lädt oder offline). Unten stehen nur
|
||||||
|
deterministisch erkannte Werte — bitte „Grenzen extrahieren" erneut klicken oder Felder manuell ergänzen.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
{(res.detected.interfaces.length > 0 || res.detected.units.length > 0) && (
|
{(res.detected.interfaces.length > 0 || res.detected.units.length > 0) && (
|
||||||
<div className="text-xs text-gray-600 dark:text-gray-300">
|
<div className="text-xs text-gray-600 dark:text-gray-300">
|
||||||
<span className="font-medium">Deterministisch erkannt:</span>{' '}
|
<span className="font-medium">Deterministisch erkannt:</span>{' '}
|
||||||
|
|||||||
@@ -10,8 +10,15 @@ Pure + testable: detect_signals / parse_grenzen_json / compute_followups. The
|
|||||||
async extract_grenzen() wraps the LLM call (llm_cascade, same as vendor extractor).
|
async extract_grenzen() wraps the LLM call (llm_cascade, same as vendor extractor).
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import Optional
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Datasheet extraction uses the local 35B (same model as the Compliance Advisor) —
|
||||||
|
# higher-quality semantic mapping than the default cascade model. Env-overridable.
|
||||||
|
_DATASHEET_MODEL = os.getenv("CRA_DATASHEET_MODEL", "qwen3.5:35b-a3b")
|
||||||
|
|
||||||
# IACE Grenzen field keys (must match admin LimitsFormData). label + whether it
|
# IACE Grenzen field keys (must match admin LimitsFormData). label + whether it
|
||||||
# is essential for a usable risk assessment (=> asked as follow-up if empty).
|
# is essential for a usable risk assessment (=> asked as follow-up if empty).
|
||||||
@@ -150,6 +157,7 @@ async def extract_grenzen(text: str, max_chars: int = 20000) -> dict:
|
|||||||
signals = detect_signals(text or "")
|
signals = detect_signals(text or "")
|
||||||
limits: dict = {}
|
limits: dict = {}
|
||||||
provenance: dict = {}
|
provenance: dict = {}
|
||||||
|
llm_status = "skipped" # skipped | ok | empty | unavailable
|
||||||
excerpt = (text or "")[:max_chars]
|
excerpt = (text or "")[:max_chars]
|
||||||
if len(excerpt) >= 200:
|
if len(excerpt) >= 200:
|
||||||
try:
|
try:
|
||||||
@@ -157,20 +165,25 @@ async def extract_grenzen(text: str, max_chars: int = 20000) -> dict:
|
|||||||
res = await call_with_cascade(
|
res = await call_with_cascade(
|
||||||
system=_system_prompt(),
|
system=_system_prompt(),
|
||||||
user=f"Datenblatt-Text:\n\n{excerpt}",
|
user=f"Datenblatt-Text:\n\n{excerpt}",
|
||||||
min_confidence=0.5, max_tokens=4000,
|
min_confidence=0.5, max_tokens=4000, model=_DATASHEET_MODEL,
|
||||||
)
|
)
|
||||||
parsed = parse_grenzen_json(res.get("text", "") if isinstance(res, dict) else "")
|
parsed = parse_grenzen_json(res.get("text", "") if isinstance(res, dict) else "")
|
||||||
for key, entry in parsed.items():
|
for key, entry in parsed.items():
|
||||||
limits[key] = entry["value"]
|
limits[key] = entry["value"]
|
||||||
provenance[key] = entry.get("source", "")
|
provenance[key] = entry.get("source", "")
|
||||||
except Exception:
|
llm_status = "ok" if parsed else "empty"
|
||||||
pass # extraction is best-effort; fall back to detector + follow-ups
|
except Exception as e:
|
||||||
|
# best-effort: keep the deterministic facts, but surface the failure so
|
||||||
|
# a cold-start/timeout doesn't masquerade as "nothing on the datasheet".
|
||||||
|
logger.warning("datasheet LLM extraction failed: %s (%s)", e, type(e).__name__)
|
||||||
|
llm_status = "unavailable"
|
||||||
|
|
||||||
_merge_detected(limits, provenance, signals)
|
_merge_detected(limits, provenance, signals)
|
||||||
return {
|
return {
|
||||||
"limits": limits,
|
"limits": limits,
|
||||||
"provenance": provenance,
|
"provenance": provenance,
|
||||||
"detected": signals,
|
"detected": signals,
|
||||||
|
"llm_status": llm_status,
|
||||||
"filled": sorted(limits.keys()),
|
"filled": sorted(limits.keys()),
|
||||||
"missing": [k for k in _FIELD_KEYS if not (limits.get(k) or "").strip()],
|
"missing": [k for k in _FIELD_KEYS if not (limits.get(k) or "").strip()],
|
||||||
"followup": compute_followups(limits),
|
"followup": compute_followups(limits),
|
||||||
|
|||||||
@@ -104,9 +104,10 @@ def _heuristic_confidence(response_text: str, input_len: int) -> float:
|
|||||||
|
|
||||||
async def _call_ollama(system: str, user: str,
|
async def _call_ollama(system: str, user: str,
|
||||||
max_tokens: int = 6000,
|
max_tokens: int = 6000,
|
||||||
timeout: float = 90.0) -> str:
|
timeout: float = 90.0,
|
||||||
|
model: str = "") -> str:
|
||||||
base = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
base = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||||
model = os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b")
|
model = model or os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b")
|
||||||
payload = {
|
payload = {
|
||||||
"model": model, "stream": False, "format": "json",
|
"model": model, "stream": False, "format": "json",
|
||||||
"messages": [{"role": "system", "content": system},
|
"messages": [{"role": "system", "content": system},
|
||||||
@@ -188,10 +189,11 @@ async def call_with_cascade(
|
|||||||
user: str,
|
user: str,
|
||||||
min_confidence: float = 0.6,
|
min_confidence: float = 0.6,
|
||||||
max_tokens: int = 6000,
|
max_tokens: int = 6000,
|
||||||
|
model: str = "",
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Returns {'text': str, 'confidence': float, 'source': str,
|
"""Returns {'text': str, 'confidence': float, 'source': str,
|
||||||
'cached': bool}."""
|
'cached': bool}. `model` overrides the local Tier-1 (Ollama) model only."""
|
||||||
key = _cache_key(system, user)
|
key = _cache_key(system, user, model)
|
||||||
cached = _cache_get(key)
|
cached = _cache_get(key)
|
||||||
if cached:
|
if cached:
|
||||||
cached["cached"] = True
|
cached["cached"] = True
|
||||||
@@ -211,7 +213,7 @@ async def call_with_cascade(
|
|||||||
"or ANTHROPIC_API_KEY to enable fallbacks."
|
"or ANTHROPIC_API_KEY to enable fallbacks."
|
||||||
)
|
)
|
||||||
# Tier 1: Qwen lokal
|
# Tier 1: Qwen lokal
|
||||||
text = await _call_ollama(system, user, max_tokens=max_tokens)
|
text = await _call_ollama(system, user, max_tokens=max_tokens, model=model)
|
||||||
conf = _heuristic_confidence(text, input_len)
|
conf = _heuristic_confidence(text, input_len)
|
||||||
if text and conf >= min_confidence:
|
if text and conf >= min_confidence:
|
||||||
out = {"text": text, "confidence": conf,
|
out = {"text": text, "confidence": conf,
|
||||||
|
|||||||
Reference in New Issue
Block a user