feat(cra): Datenblatt→Grenzen-Extraktor (hybrid, lokales 35B)
Hybrid-Extraktion Datenblatt → IACE Grenzen (ISO 12100): deterministischer Detektor (Schnittstellen/Einheiten per Regex) + lokales 35B via llm_cascade (Qwen-lokal-first) fuer die semantische Zuordnung auf die echten LimitsFormData- Keys. Nichts erfinden: Feld nicht im Text → leer + Quellen-Zitat je Feld. Essenzielle ISO-12100-Felder, die leer bleiben → gezielte Rückfragen (foreseeable_misuses, person_groups, qualification, temporal_limits …). Endpoint POST /api/v1/cra/extract-datasheet. 13 Tests gruen (reine Teile). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
"""Datasheet -> Grenzen extraction (deterministic + parser parts)."""
|
||||
from compliance.services.cra_datasheet_extractor import (
|
||||
detect_signals, parse_grenzen_json, compute_followups, _merge_detected, _ESSENTIAL,
|
||||
)
|
||||
|
||||
OWIS = (
|
||||
"PS 90+ Universelle Positioniersteuerung, bis 9 Achsen. Schnittstellen: Ethernet, "
|
||||
"USB, RS232, optional Anybus (Modbus/TCP). Versorgung 24 V. SDK fuer C/C++/C#/LabView."
|
||||
)
|
||||
|
||||
|
||||
class TestDetectSignals:
|
||||
def test_interfaces_detected(self):
|
||||
s = detect_signals(OWIS)
|
||||
for tok in ("Ethernet", "USB", "RS232", "Modbus", "Anybus"):
|
||||
assert tok in s["interfaces"], tok
|
||||
|
||||
def test_units_detected(self):
|
||||
s = detect_signals(OWIS)
|
||||
assert any("24" in u and "V" in u for u in s["units"])
|
||||
|
||||
def test_empty_text(self):
|
||||
s = detect_signals("")
|
||||
assert s["interfaces"] == [] and s["units"] == []
|
||||
|
||||
def test_no_duplicate_rs232_variants(self):
|
||||
s = detect_signals("RS232 and RS-232 ports")
|
||||
rs = [i for i in s["interfaces"] if i.lower().startswith("rs")]
|
||||
assert len(rs) == 1
|
||||
|
||||
|
||||
class TestParse:
|
||||
def test_parses_fields_wrapper(self):
|
||||
raw = '{"fields": {"machine_designation": {"value": "PS 90+", "source": "PS 90+ ..."}, "intended_purpose": {"value": "", "source": ""}}}'
|
||||
out = parse_grenzen_json(raw)
|
||||
assert out["machine_designation"]["value"] == "PS 90+"
|
||||
assert "intended_purpose" not in out # empty dropped
|
||||
|
||||
def test_unknown_keys_ignored(self):
|
||||
out = parse_grenzen_json('{"fields": {"nonsense": {"value": "x"}}}')
|
||||
assert out == {}
|
||||
|
||||
def test_string_entry_tolerated(self):
|
||||
out = parse_grenzen_json('{"fields": {"manufacturer": "OWIS"}}')
|
||||
assert out["manufacturer"]["value"] == "OWIS"
|
||||
|
||||
def test_bad_json(self):
|
||||
assert parse_grenzen_json("not json") == {}
|
||||
assert parse_grenzen_json("") == {}
|
||||
|
||||
|
||||
class TestFollowups:
|
||||
def test_empty_limits_asks_all_essentials(self):
|
||||
fu = compute_followups({})
|
||||
assert {f["key"] for f in fu} == _ESSENTIAL
|
||||
assert all(f["question"] for f in fu)
|
||||
|
||||
def test_filled_essential_not_asked(self):
|
||||
fu = compute_followups({"intended_purpose": "Positionieren"})
|
||||
assert "intended_purpose" not in {f["key"] for f in fu}
|
||||
|
||||
def test_blank_string_still_asked(self):
|
||||
fu = compute_followups({"intended_purpose": " "})
|
||||
assert "intended_purpose" in {f["key"] for f in fu}
|
||||
|
||||
|
||||
class TestMergeDetected:
|
||||
def test_backfills_electrical_interfaces_excluding_usb(self):
|
||||
limits, prov = {}, {}
|
||||
_merge_detected(limits, prov, {"interfaces": ["Ethernet", "Modbus", "USB"]})
|
||||
assert "Ethernet" in limits["electrical_interfaces"]
|
||||
assert "USB" not in limits["electrical_interfaces"]
|
||||
assert prov["electrical_interfaces"].startswith("deterministisch")
|
||||
|
||||
def test_does_not_overwrite_llm_value(self):
|
||||
limits = {"electrical_interfaces": "PROFINET (vom LLM)"}
|
||||
_merge_detected(limits, {}, {"interfaces": ["Ethernet"]})
|
||||
assert limits["electrical_interfaces"] == "PROFINET (vom LLM)"
|
||||
Reference in New Issue
Block a user