Files
breakpilot-compliance/backend-compliance/compliance/reasoning/taxonomy_claims.py
T
Benjamin Admin 1607c89459 feat(reasoning): Regulatory Reasoning Engine MVP (scope/obligations/implementation/interpretation)
Deterministic reasoning layer ON TOP of the Legal Knowledge Graph (obligation
registry) and the Compliance Execution Graph (control mapping/evidence). Answers
which regulations apply to a concrete product, which obligations follow, whether
the customer's implementation covers them, and whether a customer interpretation
is too narrow/broad/plausible.

- ProductProfile with tri-state facts (Optional[bool]=None => uncertain, never
  false security); safe predicate evaluator (no eval).
- 6 regulation triggers (CRA/MaschinenVO/RED/EMV/DataAct/NIS2) with missing-fact
  prompts; 24 obligation scope rules.
- CRA obligation_ids RE-USED verbatim from the registry (93 ids) — never re-minted
  (control_uuid trap); Machine/Data-Act flagged proposed=True.
- required_evidence constrained to the framework-agnostic shared evidence catalog;
  capabilities echo the planned Obligation->Capability layer.
- Overlap groups (CRA<->MaschinenVO cyber-safety) + evidence-for-multiple (USP).
- 4 endpoints POST /reasoning/{scope,obligations,implementation-assessment,
  interpretation-assessment}; thin handlers, registered in api/__init__.py.
- 22 tests (5 machine-builder scenarios + 10 acceptance questions). No DB
  migration, no RAG, no new controls.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-25 19:30:53 +02:00

105 lines
4.8 KiB
Python

"""Deterministic taxonomy for normalising free-text customer claims.
Capability names echo the planned Obligation -> Capability layer of the
Compliance Execution Graph (memory `project_compliance_graph.md`), so the
reasoning layer's claim capabilities line up with the registry's capabilities.
Matching is lowercase substring matching — deterministic, no LLM, no RAG.
"""
from __future__ import annotations
from typing import Dict, List
# capability -> trigger substrings (German + English), matched lowercase.
CAPABILITY_KEYWORDS: Dict[str, List[str]] = {
"software_bill_of_materials": [
"sbom", "stückliste", "stueckliste", "bill of materials", "komponentenliste",
],
"secure_updates": ["update", "patch", "aktualisier", "release", "rollout"],
"software_integrity": ["signier", "signatur", "signed", "integrität", "integritaet", "hash"],
"vulnerability_management": [
"schwachstelle", "vulnerab", "cve", "schwachstellenmanagement", "vuln",
],
"coordinated_disclosure": [
"disclosure", "offenlegung", "security.txt", "responsible disclosure",
],
"incident_reporting": [
"incident", "vorfall", "behörde", "behoerde", "csirt", "meldepflicht", "an die behörde",
],
"authentication": [
"authentifizier", "login", "passwort", "password", "mfa", "2fa", "anmeldung",
],
"secure_by_default": [
"härtung", "haertung", "hardening", "default", "standardkonfig",
"sichere konfiguration", "angriffsfläche", "angriffsflaeche",
],
"security_logging": ["logging", "log ", "logs", "protokoll", "audit-trail", "ereignisprotokoll"],
"secure_communication": ["verschlüssel", "verschluessel", "encryption", "tls", "vpn", "ssl"],
"risk_assessment": [
"risikoanalyse", "risikobeurteil", "risk assessment", "gefährdungsbeurteil",
"gefaehrdungsbeurteil", "bedrohungsanalyse", "threat model",
],
"technical_documentation": [
"dokumentation", "technische unterlagen", "betriebsanleitung", "handbuch", "documentation",
],
"conformity_assessment": ["konformität", "konformitaet", "conformity", "baumuster", "ce-kenn"],
"functional_safety": [
"performance level", "sil ", "iso 13849", "funktionale sicherheit", "safety control",
],
"data_access_provision": [
"datenzugang", "data access", "datenportabilität", "datenexport", "data export",
],
}
# capability -> broader compliance topics it touches (spec related_topics).
CAPABILITY_TOPICS: Dict[str, List[str]] = {
"software_bill_of_materials": ["component_transparency", "supply_chain", "vulnerability_management"],
"secure_updates": ["secure_updates", "vulnerability_remediation", "release_management"],
"software_integrity": ["secure_updates", "supply_chain", "tamper_protection"],
"vulnerability_management": ["vulnerability_handling", "monitoring", "patch_management"],
"coordinated_disclosure": ["vulnerability_handling", "transparency"],
"incident_reporting": ["incident_handling", "authority_notification"],
"authentication": ["access_control", "identity"],
"secure_by_default": ["hardening", "attack_surface", "configuration"],
"security_logging": ["monitoring", "forensics", "incident_handling"],
"secure_communication": ["confidentiality", "integrity", "remote_access"],
"risk_assessment": ["risk_management", "secure_by_design"],
"technical_documentation": ["documentation", "conformity"],
"conformity_assessment": ["conformity", "ce_marking"],
"functional_safety": ["machine_safety", "control_systems"],
"data_access_provision": ["data_sharing", "portability"],
}
# qualifier -> substrings that signal a weak/incomplete implementation.
QUALIFIER_KEYWORDS: Dict[str, List[str]] = {
"reactive": [
"wenn kunden", "wenn ein kunde", "nach meldung", "auf anfrage", "auf nachfrage",
"nur wenn", "reaktiv", "wenn fehler", "when customers", "on request", "when reported",
"ad hoc", "ad-hoc", "bei bedarf",
],
"manual": ["manuell", "von hand", "manual", "händisch", "haendisch"],
"planned": [
"geplant", "in planung", "wollen wir", "planen wir", "noch nicht", "zukünftig", "künftig",
],
"absent": ["haben wir nicht", "gibt es nicht", "nicht vorhanden", "keinen prozess", "keine"],
}
def match_capabilities(text: str) -> List[str]:
low = text.lower()
return [cap for cap, kws in CAPABILITY_KEYWORDS.items() if any(k in low for k in kws)]
def match_qualifiers(text: str) -> List[str]:
low = text.lower()
return [q for q, kws in QUALIFIER_KEYWORDS.items() if any(k in low for k in kws)]
def topics_for(capabilities: List[str]) -> List[str]:
out: List[str] = []
for cap in capabilities:
for t in CAPABILITY_TOPICS.get(cap, []):
if t not in out:
out.append(t)
return out