"""Deterministic taxonomy for normalising free-text customer claims. Capability names echo the planned Obligation -> Capability layer of the Compliance Execution Graph (memory `project_compliance_graph.md`), so the reasoning layer's claim capabilities line up with the registry's capabilities. Matching is lowercase substring matching — deterministic, no LLM, no RAG. """ from __future__ import annotations from typing import Dict, List # capability -> trigger substrings (German + English), matched lowercase. CAPABILITY_KEYWORDS: Dict[str, List[str]] = { "software_bill_of_materials": [ "sbom", "stückliste", "stueckliste", "bill of materials", "komponentenliste", ], "secure_updates": ["update", "patch", "aktualisier", "release", "rollout"], "software_integrity": ["signier", "signatur", "signed", "integrität", "integritaet", "hash"], "vulnerability_management": [ "schwachstelle", "vulnerab", "cve", "schwachstellenmanagement", "vuln", ], "coordinated_disclosure": [ "disclosure", "offenlegung", "security.txt", "responsible disclosure", ], "incident_reporting": [ "incident", "vorfall", "behörde", "behoerde", "csirt", "meldepflicht", "an die behörde", ], "authentication": [ "authentifizier", "login", "passwort", "password", "mfa", "2fa", "anmeldung", ], "secure_by_default": [ "härtung", "haertung", "hardening", "default", "standardkonfig", "sichere konfiguration", "angriffsfläche", "angriffsflaeche", ], "security_logging": ["logging", "log ", "logs", "protokoll", "audit-trail", "ereignisprotokoll"], "secure_communication": ["verschlüssel", "verschluessel", "encryption", "tls", "vpn", "ssl"], "risk_assessment": [ "risikoanalyse", "risikobeurteil", "risk assessment", "gefährdungsbeurteil", "gefaehrdungsbeurteil", "bedrohungsanalyse", "threat model", ], "technical_documentation": [ "dokumentation", "technische unterlagen", "betriebsanleitung", "handbuch", "documentation", ], "conformity_assessment": ["konformität", "konformitaet", "conformity", "baumuster", "ce-kenn"], "functional_safety": [ "performance level", "sil ", "iso 13849", "funktionale sicherheit", "safety control", ], "data_access_provision": [ "datenzugang", "data access", "datenportabilität", "datenexport", "data export", ], } # capability -> broader compliance topics it touches (spec related_topics). CAPABILITY_TOPICS: Dict[str, List[str]] = { "software_bill_of_materials": ["component_transparency", "supply_chain", "vulnerability_management"], "secure_updates": ["secure_updates", "vulnerability_remediation", "release_management"], "software_integrity": ["secure_updates", "supply_chain", "tamper_protection"], "vulnerability_management": ["vulnerability_handling", "monitoring", "patch_management"], "coordinated_disclosure": ["vulnerability_handling", "transparency"], "incident_reporting": ["incident_handling", "authority_notification"], "authentication": ["access_control", "identity"], "secure_by_default": ["hardening", "attack_surface", "configuration"], "security_logging": ["monitoring", "forensics", "incident_handling"], "secure_communication": ["confidentiality", "integrity", "remote_access"], "risk_assessment": ["risk_management", "secure_by_design"], "technical_documentation": ["documentation", "conformity"], "conformity_assessment": ["conformity", "ce_marking"], "functional_safety": ["machine_safety", "control_systems"], "data_access_provision": ["data_sharing", "portability"], } # qualifier -> substrings that signal a weak/incomplete implementation. QUALIFIER_KEYWORDS: Dict[str, List[str]] = { "reactive": [ "wenn kunden", "wenn ein kunde", "nach meldung", "auf anfrage", "auf nachfrage", "nur wenn", "reaktiv", "wenn fehler", "when customers", "on request", "when reported", "ad hoc", "ad-hoc", "bei bedarf", ], "manual": ["manuell", "von hand", "manual", "händisch", "haendisch"], "planned": [ "geplant", "in planung", "wollen wir", "planen wir", "noch nicht", "zukünftig", "künftig", ], "absent": ["haben wir nicht", "gibt es nicht", "nicht vorhanden", "keinen prozess", "keine"], } def match_capabilities(text: str) -> List[str]: low = text.lower() return [cap for cap, kws in CAPABILITY_KEYWORDS.items() if any(k in low for k in kws)] def match_qualifiers(text: str) -> List[str]: low = text.lower() return [q for q, kws in QUALIFIER_KEYWORDS.items() if any(k in low for k in kws)] def topics_for(capabilities: List[str]) -> List[str]: out: List[str] = [] for cap in capabilities: for t in CAPABILITY_TOPICS.get(cap, []): if t not in out: out.append(t) return out