cfdc5fe277
Hybrid-Extraktion Datenblatt → IACE Grenzen (ISO 12100): deterministischer Detektor (Schnittstellen/Einheiten per Regex) + lokales 35B via llm_cascade (Qwen-lokal-first) fuer die semantische Zuordnung auf die echten LimitsFormData- Keys. Nichts erfinden: Feld nicht im Text → leer + Quellen-Zitat je Feld. Essenzielle ISO-12100-Felder, die leer bleiben → gezielte Rückfragen (foreseeable_misuses, person_groups, qualification, temporal_limits …). Endpoint POST /api/v1/cra/extract-datasheet. 13 Tests gruen (reine Teile). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
79 lines
3.0 KiB
Python
79 lines
3.0 KiB
Python
"""Datasheet -> Grenzen extraction (deterministic + parser parts)."""
|
|
from compliance.services.cra_datasheet_extractor import (
|
|
detect_signals, parse_grenzen_json, compute_followups, _merge_detected, _ESSENTIAL,
|
|
)
|
|
|
|
OWIS = (
|
|
"PS 90+ Universelle Positioniersteuerung, bis 9 Achsen. Schnittstellen: Ethernet, "
|
|
"USB, RS232, optional Anybus (Modbus/TCP). Versorgung 24 V. SDK fuer C/C++/C#/LabView."
|
|
)
|
|
|
|
|
|
class TestDetectSignals:
|
|
def test_interfaces_detected(self):
|
|
s = detect_signals(OWIS)
|
|
for tok in ("Ethernet", "USB", "RS232", "Modbus", "Anybus"):
|
|
assert tok in s["interfaces"], tok
|
|
|
|
def test_units_detected(self):
|
|
s = detect_signals(OWIS)
|
|
assert any("24" in u and "V" in u for u in s["units"])
|
|
|
|
def test_empty_text(self):
|
|
s = detect_signals("")
|
|
assert s["interfaces"] == [] and s["units"] == []
|
|
|
|
def test_no_duplicate_rs232_variants(self):
|
|
s = detect_signals("RS232 and RS-232 ports")
|
|
rs = [i for i in s["interfaces"] if i.lower().startswith("rs")]
|
|
assert len(rs) == 1
|
|
|
|
|
|
class TestParse:
|
|
def test_parses_fields_wrapper(self):
|
|
raw = '{"fields": {"machine_designation": {"value": "PS 90+", "source": "PS 90+ ..."}, "intended_purpose": {"value": "", "source": ""}}}'
|
|
out = parse_grenzen_json(raw)
|
|
assert out["machine_designation"]["value"] == "PS 90+"
|
|
assert "intended_purpose" not in out # empty dropped
|
|
|
|
def test_unknown_keys_ignored(self):
|
|
out = parse_grenzen_json('{"fields": {"nonsense": {"value": "x"}}}')
|
|
assert out == {}
|
|
|
|
def test_string_entry_tolerated(self):
|
|
out = parse_grenzen_json('{"fields": {"manufacturer": "OWIS"}}')
|
|
assert out["manufacturer"]["value"] == "OWIS"
|
|
|
|
def test_bad_json(self):
|
|
assert parse_grenzen_json("not json") == {}
|
|
assert parse_grenzen_json("") == {}
|
|
|
|
|
|
class TestFollowups:
|
|
def test_empty_limits_asks_all_essentials(self):
|
|
fu = compute_followups({})
|
|
assert {f["key"] for f in fu} == _ESSENTIAL
|
|
assert all(f["question"] for f in fu)
|
|
|
|
def test_filled_essential_not_asked(self):
|
|
fu = compute_followups({"intended_purpose": "Positionieren"})
|
|
assert "intended_purpose" not in {f["key"] for f in fu}
|
|
|
|
def test_blank_string_still_asked(self):
|
|
fu = compute_followups({"intended_purpose": " "})
|
|
assert "intended_purpose" in {f["key"] for f in fu}
|
|
|
|
|
|
class TestMergeDetected:
|
|
def test_backfills_electrical_interfaces_excluding_usb(self):
|
|
limits, prov = {}, {}
|
|
_merge_detected(limits, prov, {"interfaces": ["Ethernet", "Modbus", "USB"]})
|
|
assert "Ethernet" in limits["electrical_interfaces"]
|
|
assert "USB" not in limits["electrical_interfaces"]
|
|
assert prov["electrical_interfaces"].startswith("deterministisch")
|
|
|
|
def test_does_not_overwrite_llm_value(self):
|
|
limits = {"electrical_interfaces": "PROFINET (vom LLM)"}
|
|
_merge_detected(limits, {}, {"interfaces": ["Ethernet"]})
|
|
assert limits["electrical_interfaces"] == "PROFINET (vom LLM)"
|