Files
breakpilot-compliance/backend-compliance/tests/test_cra_datasheet_extractor.py
T
Benjamin Admin fda94afd5f
CI / detect-changes (push) Successful in 19s
CI / guardrail-integrity (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 10s
CI / validate-canonical-controls (push) Successful in 9s
CI / loc-budget (push) Successful in 22s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 32s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
fix(cra): prod hang-guard /readiness machinery + robuster Datenblatt-JSON-Parse
#1 _machinery_obligations: SET statement_timeout=4s + run_in_threadpool — auf
   prod hing die maschinen-Query ~30s (langsame/unindizierte DB nach DB-Swap)
   und blockierte den async-Worker. Jetzt: bei Langsamkeit graceful 'keine
   Maschinen-Pflichten' statt Hang. (Fehlender prod-Index = Controls/DB-Session.)
#2 parse_grenzen_json: tolerant ggue. ```json-Fences / Prosa-umschlossenem JSON
   (gehostete Modelle wie OVH ignorieren z.T. response_format) → Datenblatt-
   Extraktion liefert auch ueber den OVH-Fallback Felder.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-17 07:39:39 +02:00

87 lines
3.4 KiB
Python

"""Datasheet -> Grenzen extraction (deterministic + parser parts)."""
from compliance.services.cra_datasheet_extractor import (
detect_signals, parse_grenzen_json, compute_followups, _merge_detected, _ESSENTIAL,
)
OWIS = (
"PS 90+ Universelle Positioniersteuerung, bis 9 Achsen. Schnittstellen: Ethernet, "
"USB, RS232, optional Anybus (Modbus/TCP). Versorgung 24 V. SDK fuer C/C++/C#/LabView."
)
class TestDetectSignals:
def test_interfaces_detected(self):
s = detect_signals(OWIS)
for tok in ("Ethernet", "USB", "RS232", "Modbus", "Anybus"):
assert tok in s["interfaces"], tok
def test_units_detected(self):
s = detect_signals(OWIS)
assert any("24" in u and "V" in u for u in s["units"])
def test_empty_text(self):
s = detect_signals("")
assert s["interfaces"] == [] and s["units"] == []
def test_no_duplicate_rs232_variants(self):
s = detect_signals("RS232 and RS-232 ports")
rs = [i for i in s["interfaces"] if i.lower().startswith("rs")]
assert len(rs) == 1
class TestParse:
def test_parses_fields_wrapper(self):
raw = '{"fields": {"machine_designation": {"value": "PS 90+", "source": "PS 90+ ..."}, "intended_purpose": {"value": "", "source": ""}}}'
out = parse_grenzen_json(raw)
assert out["machine_designation"]["value"] == "PS 90+"
assert "intended_purpose" not in out # empty dropped
def test_unknown_keys_ignored(self):
out = parse_grenzen_json('{"fields": {"nonsense": {"value": "x"}}}')
assert out == {}
def test_string_entry_tolerated(self):
out = parse_grenzen_json('{"fields": {"manufacturer": "OWIS"}}')
assert out["manufacturer"]["value"] == "OWIS"
def test_bad_json(self):
assert parse_grenzen_json("not json") == {}
assert parse_grenzen_json("") == {}
def test_fenced_json(self):
raw = '```json\n{"fields": {"manufacturer": {"value": "OWIS", "source": "x"}}}\n```'
assert parse_grenzen_json(raw)["manufacturer"]["value"] == "OWIS"
def test_prose_wrapped_json(self):
raw = 'Hier das Ergebnis:\n{"fields": {"machine_type": {"value": "Steuerung"}}}\nDanke.'
assert parse_grenzen_json(raw)["machine_type"]["value"] == "Steuerung"
class TestFollowups:
def test_empty_limits_asks_all_essentials(self):
fu = compute_followups({})
assert {f["key"] for f in fu} == _ESSENTIAL
assert all(f["question"] for f in fu)
def test_filled_essential_not_asked(self):
fu = compute_followups({"intended_purpose": "Positionieren"})
assert "intended_purpose" not in {f["key"] for f in fu}
def test_blank_string_still_asked(self):
fu = compute_followups({"intended_purpose": " "})
assert "intended_purpose" in {f["key"] for f in fu}
class TestMergeDetected:
def test_backfills_electrical_interfaces_excluding_usb(self):
limits, prov = {}, {}
_merge_detected(limits, prov, {"interfaces": ["Ethernet", "Modbus", "USB"]})
assert "Ethernet" in limits["electrical_interfaces"]
assert "USB" not in limits["electrical_interfaces"]
assert prov["electrical_interfaces"].startswith("deterministisch")
def test_does_not_overwrite_llm_value(self):
limits = {"electrical_interfaces": "PROFINET (vom LLM)"}
_merge_detected(limits, {}, {"interfaces": ["Ethernet"]})
assert limits["electrical_interfaces"] == "PROFINET (vom LLM)"