From 48ca0a6bef5157dc4690eb0b811df87cfbf8b4fc Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Mon, 23 Mar 2026 12:11:55 +0100 Subject: [PATCH] feat: Framework Decomposition Engine + Composite Detection for Pass 0b Adds a routing layer between Pass 0a and Pass 0b that classifies obligations into atomic/compound/framework_container. Framework-container obligations (e.g. "CCM-Praktiken fuer AIS") are decomposed into concrete sub-obligations via an internal framework registry before Pass 0b composition. - New: framework_decomposition.py with routing, matching, decomposition - New: Framework registry (NIST SP 800-53, OWASP ASVS, CSA CCM) as JSON - New: Composite detection flags on atomic controls (is_composite, atomicity) - New: gen_meta fields: framework_ref, framework_domain, decomposition_source - Integration: _route_and_compose() in run_pass0b() deterministic path - 248 tests (198 decomposition + 50 framework), all passing Co-Authored-By: Claude Opus 4.6 --- .../compliance/data/frameworks/__init__.py | 0 .../compliance/data/frameworks/csa_ccm.json | 443 +++++++++++ .../data/frameworks/nist_sp800_53.json | 514 +++++++++++++ .../data/frameworks/owasp_asvs.json | 353 +++++++++ .../compliance/services/decomposition_pass.py | 167 +++- .../services/framework_decomposition.py | 714 ++++++++++++++++++ .../tests/test_decomposition_pass.py | 118 +++ .../tests/test_framework_decomposition.py | 453 +++++++++++ 8 files changed, 2744 insertions(+), 18 deletions(-) create mode 100644 backend-compliance/compliance/data/frameworks/__init__.py create mode 100644 backend-compliance/compliance/data/frameworks/csa_ccm.json create mode 100644 backend-compliance/compliance/data/frameworks/nist_sp800_53.json create mode 100644 backend-compliance/compliance/data/frameworks/owasp_asvs.json create mode 100644 backend-compliance/compliance/services/framework_decomposition.py create mode 100644 backend-compliance/tests/test_framework_decomposition.py diff --git a/backend-compliance/compliance/data/frameworks/__init__.py b/backend-compliance/compliance/data/frameworks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend-compliance/compliance/data/frameworks/csa_ccm.json b/backend-compliance/compliance/data/frameworks/csa_ccm.json new file mode 100644 index 0000000..c98b372 --- /dev/null +++ b/backend-compliance/compliance/data/frameworks/csa_ccm.json @@ -0,0 +1,443 @@ +{ + "framework_id": "CSA_CCM", + "display_name": "Cloud Security Alliance CCM v4", + "license": { + "type": "restricted", + "rag_allowed": false, + "use_as_metadata": true, + "note": "Abstrahierte Struktur — keine Originaltexte uebernommen" + }, + "domains": [ + { + "domain_id": "AIS", + "title": "Application and Interface Security", + "aliases": ["ais", "application and interface security", "anwendungssicherheit", "schnittstellensicherheit"], + "keywords": ["application", "anwendung", "interface", "schnittstelle", "api", "web", "eingabevalidierung"], + "subcontrols": [ + { + "subcontrol_id": "AIS-01", + "title": "Application Security Policy", + "statement": "Sicherheitsrichtlinien fuer Anwendungsentwicklung und Schnittstellenmanagement muessen definiert und angewendet werden.", + "keywords": ["policy", "richtlinie", "entwicklung"], + "action_hint": "document", + "object_hint": "Anwendungssicherheitsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "AIS-02", + "title": "Application Security Design", + "statement": "Sicherheitsanforderungen muessen in den Entwurf jeder Anwendung integriert werden.", + "keywords": ["design", "entwurf", "security by design"], + "action_hint": "implement", + "object_hint": "Sicherheitsanforderungen im Anwendungsentwurf", + "object_class": "process" + }, + { + "subcontrol_id": "AIS-03", + "title": "Application Security Testing", + "statement": "Anwendungen muessen vor dem Deployment und regelmaessig auf Sicherheitsschwachstellen getestet werden.", + "keywords": ["testing", "test", "sast", "dast", "penetration"], + "action_hint": "test", + "object_hint": "Anwendungssicherheitstests", + "object_class": "process" + }, + { + "subcontrol_id": "AIS-04", + "title": "Secure Development Practices", + "statement": "Sichere Entwicklungspraktiken (Code Review, Pair Programming, SAST) muessen fuer alle Entwicklungsprojekte gelten.", + "keywords": ["development", "entwicklung", "code review", "sast", "praktiken"], + "action_hint": "implement", + "object_hint": "Sichere Entwicklungspraktiken", + "object_class": "process" + }, + { + "subcontrol_id": "AIS-05", + "title": "API Security", + "statement": "APIs muessen authentifiziert, autorisiert und gegen Missbrauch geschuetzt werden.", + "keywords": ["api", "schnittstelle", "authentifizierung", "rate limiting"], + "action_hint": "implement", + "object_hint": "API-Sicherheitskontrollen", + "object_class": "interface" + }, + { + "subcontrol_id": "AIS-06", + "title": "Automated Application Security Testing", + "statement": "Automatisierte Sicherheitstests muessen in die CI/CD-Pipeline integriert werden.", + "keywords": ["automatisiert", "ci/cd", "pipeline", "sast", "dast"], + "action_hint": "configure", + "object_hint": "Automatisierte Sicherheitstests in CI/CD", + "object_class": "configuration" + } + ] + }, + { + "domain_id": "BCR", + "title": "Business Continuity and Resilience", + "aliases": ["bcr", "business continuity", "resilience", "geschaeftskontinuitaet", "resilienz"], + "keywords": ["continuity", "kontinuitaet", "resilience", "resilienz", "disaster", "recovery", "backup"], + "subcontrols": [ + { + "subcontrol_id": "BCR-01", + "title": "Business Continuity Planning", + "statement": "Ein Geschaeftskontinuitaetsplan muss erstellt, dokumentiert und regelmaessig getestet werden.", + "keywords": ["plan", "kontinuitaet", "geschaeft"], + "action_hint": "document", + "object_hint": "Geschaeftskontinuitaetsplan", + "object_class": "policy" + }, + { + "subcontrol_id": "BCR-02", + "title": "Risk Assessment for BCM", + "statement": "Risikobewertungen muessen fuer geschaeftskritische Prozesse durchgefuehrt werden.", + "keywords": ["risiko", "bewertung", "kritisch"], + "action_hint": "assess", + "object_hint": "BCM-Risikobewertung", + "object_class": "risk_artifact" + }, + { + "subcontrol_id": "BCR-03", + "title": "Backup and Recovery", + "statement": "Datensicherungen muessen regelmaessig erstellt und Wiederherstellungstests durchgefuehrt werden.", + "keywords": ["backup", "sicherung", "wiederherstellung", "recovery"], + "action_hint": "maintain", + "object_hint": "Datensicherung und Wiederherstellung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "BCR-04", + "title": "Disaster Recovery Planning", + "statement": "Ein Disaster-Recovery-Plan muss dokumentiert und jaehrlich getestet werden.", + "keywords": ["disaster", "recovery", "katastrophe"], + "action_hint": "document", + "object_hint": "Disaster-Recovery-Plan", + "object_class": "policy" + } + ] + }, + { + "domain_id": "CCC", + "title": "Change Control and Configuration Management", + "aliases": ["ccc", "change control", "configuration management", "aenderungsmanagement", "konfigurationsmanagement"], + "keywords": ["change", "aenderung", "konfiguration", "configuration", "release", "deployment"], + "subcontrols": [ + { + "subcontrol_id": "CCC-01", + "title": "Change Management Policy", + "statement": "Ein Aenderungsmanagement-Prozess muss definiert und fuer alle Aenderungen angewendet werden.", + "keywords": ["policy", "richtlinie", "aenderung"], + "action_hint": "document", + "object_hint": "Aenderungsmanagement-Richtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "CCC-02", + "title": "Change Testing", + "statement": "Aenderungen muessen vor der Produktivsetzung getestet und genehmigt werden.", + "keywords": ["test", "genehmigung", "approval"], + "action_hint": "test", + "object_hint": "Aenderungstests", + "object_class": "process" + }, + { + "subcontrol_id": "CCC-03", + "title": "Configuration Baseline", + "statement": "Basiskonfigurationen fuer alle Systeme muessen definiert und dokumentiert werden.", + "keywords": ["baseline", "basis", "standard"], + "action_hint": "define", + "object_hint": "Konfigurationsbaseline", + "object_class": "configuration" + } + ] + }, + { + "domain_id": "CEK", + "title": "Cryptography, Encryption and Key Management", + "aliases": ["cek", "cryptography", "encryption", "key management", "kryptographie", "verschluesselung", "schluesselverwaltung"], + "keywords": ["kryptographie", "verschluesselung", "schluessel", "key", "encryption", "certificate", "zertifikat"], + "subcontrols": [ + { + "subcontrol_id": "CEK-01", + "title": "Encryption Policy", + "statement": "Verschluesselungsrichtlinien muessen definiert werden, die Algorithmen, Schluessellaengen und Einsatzbereiche festlegen.", + "keywords": ["policy", "richtlinie", "algorithmus"], + "action_hint": "document", + "object_hint": "Verschluesselungsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "CEK-02", + "title": "Key Management", + "statement": "Kryptographische Schluessel muessen ueber ihren Lebenszyklus sicher verwaltet werden.", + "keywords": ["key", "schluessel", "management", "lebenszyklus"], + "action_hint": "maintain", + "object_hint": "Schluesselverwaltung", + "object_class": "cryptographic_control" + }, + { + "subcontrol_id": "CEK-03", + "title": "Data Encryption", + "statement": "Sensible Daten muessen bei Speicherung und Uebertragung verschluesselt werden.", + "keywords": ["data", "daten", "speicherung", "uebertragung"], + "action_hint": "encrypt", + "object_hint": "Datenverschluesselung", + "object_class": "cryptographic_control" + } + ] + }, + { + "domain_id": "DSP", + "title": "Data Security and Privacy", + "aliases": ["dsp", "data security", "privacy", "datensicherheit", "datenschutz"], + "keywords": ["datenschutz", "datensicherheit", "privacy", "data security", "pii", "personenbezogen", "dsgvo"], + "subcontrols": [ + { + "subcontrol_id": "DSP-01", + "title": "Data Classification", + "statement": "Daten muessen nach Sensibilitaet klassifiziert und entsprechend geschuetzt werden.", + "keywords": ["klassifizierung", "sensibilitaet", "classification"], + "action_hint": "define", + "object_hint": "Datenklassifizierung", + "object_class": "data" + }, + { + "subcontrol_id": "DSP-02", + "title": "Data Inventory", + "statement": "Ein Dateninventar muss gefuehrt werden, das alle Verarbeitungen personenbezogener Daten dokumentiert.", + "keywords": ["inventar", "verzeichnis", "verarbeitung", "vvt"], + "action_hint": "maintain", + "object_hint": "Dateninventar", + "object_class": "register" + }, + { + "subcontrol_id": "DSP-03", + "title": "Data Retention and Deletion", + "statement": "Aufbewahrungsfristen muessen definiert und Daten nach Ablauf sicher geloescht werden.", + "keywords": ["retention", "aufbewahrung", "loeschung", "frist"], + "action_hint": "delete", + "object_hint": "Datenloeschung nach Frist", + "object_class": "data" + }, + { + "subcontrol_id": "DSP-04", + "title": "Privacy Impact Assessment", + "statement": "Datenschutz-Folgenabschaetzungen muessen fuer risikoreiche Verarbeitungen durchgefuehrt werden.", + "keywords": ["dsfa", "pia", "folgenabschaetzung", "impact"], + "action_hint": "assess", + "object_hint": "Datenschutz-Folgenabschaetzung", + "object_class": "risk_artifact" + }, + { + "subcontrol_id": "DSP-05", + "title": "Data Subject Rights", + "statement": "Verfahren zur Bearbeitung von Betroffenenrechten muessen implementiert werden.", + "keywords": ["betroffenenrechte", "auskunft", "loeschung", "data subject"], + "action_hint": "implement", + "object_hint": "Betroffenenrechte-Verfahren", + "object_class": "process" + } + ] + }, + { + "domain_id": "GRC", + "title": "Governance, Risk and Compliance", + "aliases": ["grc", "governance", "risk", "compliance", "risikomanagement"], + "keywords": ["governance", "risiko", "compliance", "management", "policy", "richtlinie"], + "subcontrols": [ + { + "subcontrol_id": "GRC-01", + "title": "Information Security Program", + "statement": "Ein umfassendes Informationssicherheitsprogramm muss etabliert und aufrechterhalten werden.", + "keywords": ["programm", "sicherheit", "information"], + "action_hint": "maintain", + "object_hint": "Informationssicherheitsprogramm", + "object_class": "policy" + }, + { + "subcontrol_id": "GRC-02", + "title": "Risk Management Program", + "statement": "Ein Risikomanagement-Programm muss implementiert werden, das Identifikation, Bewertung und Behandlung umfasst.", + "keywords": ["risiko", "management", "bewertung", "behandlung"], + "action_hint": "implement", + "object_hint": "Risikomanagement-Programm", + "object_class": "process" + }, + { + "subcontrol_id": "GRC-03", + "title": "Compliance Monitoring", + "statement": "Die Einhaltung regulatorischer und vertraglicher Anforderungen muss ueberwacht werden.", + "keywords": ["compliance", "einhaltung", "regulatorisch", "ueberwachung"], + "action_hint": "monitor", + "object_hint": "Compliance-Ueberwachung", + "object_class": "process" + } + ] + }, + { + "domain_id": "IAM", + "title": "Identity and Access Management", + "aliases": ["iam", "identity", "access management", "identitaetsmanagement", "zugriffsverwaltung"], + "keywords": ["identitaet", "zugriff", "identity", "access", "authentifizierung", "autorisierung", "sso"], + "subcontrols": [ + { + "subcontrol_id": "IAM-01", + "title": "Identity and Access Policy", + "statement": "Identitaets- und Zugriffsmanagement-Richtlinien muessen definiert werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "IAM-Richtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "IAM-02", + "title": "Strong Authentication", + "statement": "Starke Authentifizierung (MFA) muss fuer administrative und sicherheitskritische Zugriffe gefordert werden.", + "keywords": ["mfa", "stark", "authentifizierung", "admin"], + "action_hint": "implement", + "object_hint": "Starke Authentifizierung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "IAM-03", + "title": "Identity Lifecycle Management", + "statement": "Identitaeten muessen ueber ihren gesamten Lebenszyklus verwaltet werden.", + "keywords": ["lifecycle", "lebenszyklus", "onboarding", "offboarding"], + "action_hint": "maintain", + "object_hint": "Identitaets-Lebenszyklus", + "object_class": "account" + }, + { + "subcontrol_id": "IAM-04", + "title": "Access Review", + "statement": "Zugriffsrechte muessen regelmaessig ueberprueft und ueberschuessige Rechte entzogen werden.", + "keywords": ["review", "ueberpruefen", "rechte", "rezertifizierung"], + "action_hint": "review", + "object_hint": "Zugriffsrechte-Review", + "object_class": "access_control" + } + ] + }, + { + "domain_id": "LOG", + "title": "Logging and Monitoring", + "aliases": ["log", "logging", "monitoring", "protokollierung", "ueberwachung"], + "keywords": ["logging", "monitoring", "protokollierung", "ueberwachung", "siem", "alarm"], + "subcontrols": [ + { + "subcontrol_id": "LOG-01", + "title": "Logging Policy", + "statement": "Protokollierungs-Richtlinien muessen definiert werden, die Umfang und Aufbewahrung festlegen.", + "keywords": ["policy", "richtlinie", "umfang", "aufbewahrung"], + "action_hint": "document", + "object_hint": "Protokollierungsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "LOG-02", + "title": "Security Event Logging", + "statement": "Sicherheitsrelevante Ereignisse muessen erfasst und zentral gespeichert werden.", + "keywords": ["event", "ereignis", "sicherheit", "zentral"], + "action_hint": "configure", + "object_hint": "Sicherheits-Event-Logging", + "object_class": "configuration" + }, + { + "subcontrol_id": "LOG-03", + "title": "Monitoring and Alerting", + "statement": "Sicherheitsrelevante Logs muessen ueberwacht und bei Anomalien Alarme ausgeloest werden.", + "keywords": ["monitoring", "alerting", "alarm", "anomalie"], + "action_hint": "monitor", + "object_hint": "Log-Ueberwachung und Alarmierung", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "SEF", + "title": "Security Incident Management", + "aliases": ["sef", "security incident", "incident management", "vorfallmanagement", "sicherheitsvorfall"], + "keywords": ["vorfall", "incident", "sicherheitsvorfall", "reaktion", "response", "meldung"], + "subcontrols": [ + { + "subcontrol_id": "SEF-01", + "title": "Incident Management Policy", + "statement": "Ein Vorfallmanagement-Prozess muss definiert, dokumentiert und getestet werden.", + "keywords": ["policy", "richtlinie", "prozess"], + "action_hint": "document", + "object_hint": "Vorfallmanagement-Richtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "SEF-02", + "title": "Incident Response Team", + "statement": "Ein Incident-Response-Team muss benannt und geschult werden.", + "keywords": ["team", "response", "schulung"], + "action_hint": "define", + "object_hint": "Incident-Response-Team", + "object_class": "role" + }, + { + "subcontrol_id": "SEF-03", + "title": "Incident Reporting", + "statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an zustaendige Stellen gemeldet werden.", + "keywords": ["reporting", "meldung", "frist", "behoerde"], + "action_hint": "report", + "object_hint": "Vorfallmeldung", + "object_class": "incident" + }, + { + "subcontrol_id": "SEF-04", + "title": "Incident Lessons Learned", + "statement": "Nach jedem Vorfall muss eine Nachbereitung mit Lessons Learned durchgefuehrt werden.", + "keywords": ["lessons learned", "nachbereitung", "verbesserung"], + "action_hint": "review", + "object_hint": "Vorfall-Nachbereitung", + "object_class": "record" + } + ] + }, + { + "domain_id": "TVM", + "title": "Threat and Vulnerability Management", + "aliases": ["tvm", "threat", "vulnerability", "schwachstelle", "bedrohung", "schwachstellenmanagement"], + "keywords": ["schwachstelle", "vulnerability", "threat", "bedrohung", "patch", "scan"], + "subcontrols": [ + { + "subcontrol_id": "TVM-01", + "title": "Vulnerability Management Policy", + "statement": "Schwachstellenmanagement-Richtlinien muessen definiert und umgesetzt werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Schwachstellenmanagement-Richtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "TVM-02", + "title": "Vulnerability Scanning", + "statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt werden.", + "keywords": ["scan", "scanning", "regelmaessig"], + "action_hint": "test", + "object_hint": "Schwachstellenscan", + "object_class": "system" + }, + { + "subcontrol_id": "TVM-03", + "title": "Vulnerability Remediation", + "statement": "Erkannte Schwachstellen muessen priorisiert und innerhalb definierter Fristen behoben werden.", + "keywords": ["remediation", "behebung", "frist", "priorisierung"], + "action_hint": "remediate", + "object_hint": "Schwachstellenbehebung", + "object_class": "system" + }, + { + "subcontrol_id": "TVM-04", + "title": "Penetration Testing", + "statement": "Regelmaessige Penetrationstests muessen durchgefuehrt werden.", + "keywords": ["penetration", "pentest", "test"], + "action_hint": "test", + "object_hint": "Penetrationstest", + "object_class": "system" + } + ] + } + ] +} diff --git a/backend-compliance/compliance/data/frameworks/nist_sp800_53.json b/backend-compliance/compliance/data/frameworks/nist_sp800_53.json new file mode 100644 index 0000000..1d7f724 --- /dev/null +++ b/backend-compliance/compliance/data/frameworks/nist_sp800_53.json @@ -0,0 +1,514 @@ +{ + "framework_id": "NIST_SP800_53", + "display_name": "NIST SP 800-53 Rev. 5", + "license": { + "type": "public_domain", + "rag_allowed": true, + "use_as_metadata": true + }, + "domains": [ + { + "domain_id": "AC", + "title": "Access Control", + "aliases": ["access control", "zugriffskontrolle", "zugriffssteuerung"], + "keywords": ["access", "zugriff", "berechtigung", "authorization", "autorisierung"], + "subcontrols": [ + { + "subcontrol_id": "AC-1", + "title": "Access Control Policy and Procedures", + "statement": "Zugriffskontrollrichtlinien und -verfahren muessen definiert, dokumentiert und regelmaessig ueberprueft werden.", + "keywords": ["policy", "richtlinie", "verfahren", "procedures"], + "action_hint": "document", + "object_hint": "Zugriffskontrollrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "AC-2", + "title": "Account Management", + "statement": "Benutzerkonten muessen ueber ihren gesamten Lebenszyklus verwaltet werden: Erstellung, Aktivierung, Aenderung, Deaktivierung und Loeschung.", + "keywords": ["account", "konto", "benutzer", "lifecycle", "lebenszyklus"], + "action_hint": "maintain", + "object_hint": "Benutzerkontenverwaltung", + "object_class": "account" + }, + { + "subcontrol_id": "AC-3", + "title": "Access Enforcement", + "statement": "Der Zugriff auf Systemressourcen muss gemaess der definierten Zugriffskontrollrichtlinie durchgesetzt werden.", + "keywords": ["enforcement", "durchsetzung", "ressourcen", "system"], + "action_hint": "restrict_access", + "object_hint": "Zugriffsdurchsetzung", + "object_class": "access_control" + }, + { + "subcontrol_id": "AC-5", + "title": "Separation of Duties", + "statement": "Aufgabentrennung muss definiert und durchgesetzt werden, um Interessenkonflikte und Missbrauch zu verhindern.", + "keywords": ["separation", "trennung", "duties", "aufgaben", "funktionstrennung"], + "action_hint": "define", + "object_hint": "Aufgabentrennung", + "object_class": "role" + }, + { + "subcontrol_id": "AC-6", + "title": "Least Privilege", + "statement": "Zugriffsrechte muessen nach dem Prinzip der minimalen Rechte vergeben werden.", + "keywords": ["least privilege", "minimal", "rechte", "privileg"], + "action_hint": "restrict_access", + "object_hint": "Minimale Rechtevergabe", + "object_class": "access_control" + }, + { + "subcontrol_id": "AC-7", + "title": "Unsuccessful Logon Attempts", + "statement": "Fehlgeschlagene Anmeldeversuche muessen begrenzt und ueberwacht werden.", + "keywords": ["logon", "anmeldung", "fehlgeschlagen", "sperre", "lockout"], + "action_hint": "monitor", + "object_hint": "Anmeldeversuchsueberwachung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "AC-17", + "title": "Remote Access", + "statement": "Fernzugriff muss autorisiert, ueberwacht und verschluesselt werden.", + "keywords": ["remote", "fern", "vpn", "fernzugriff"], + "action_hint": "configure", + "object_hint": "Fernzugriffskonfiguration", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "AU", + "title": "Audit and Accountability", + "aliases": ["audit", "protokollierung", "accountability", "rechenschaftspflicht"], + "keywords": ["audit", "log", "protokoll", "nachvollziehbarkeit", "logging"], + "subcontrols": [ + { + "subcontrol_id": "AU-1", + "title": "Audit Policy and Procedures", + "statement": "Audit- und Protokollierungsrichtlinien muessen definiert und regelmaessig ueberprueft werden.", + "keywords": ["policy", "richtlinie", "audit"], + "action_hint": "document", + "object_hint": "Auditrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "AU-2", + "title": "Event Logging", + "statement": "Sicherheitsrelevante Ereignisse muessen identifiziert und protokolliert werden.", + "keywords": ["event", "ereignis", "logging", "protokollierung"], + "action_hint": "configure", + "object_hint": "Ereignisprotokollierung", + "object_class": "configuration" + }, + { + "subcontrol_id": "AU-3", + "title": "Content of Audit Records", + "statement": "Audit-Eintraege muessen ausreichende Informationen enthalten: Zeitstempel, Quelle, Ergebnis, Identitaet.", + "keywords": ["content", "inhalt", "record", "eintrag"], + "action_hint": "define", + "object_hint": "Audit-Eintragsformat", + "object_class": "record" + }, + { + "subcontrol_id": "AU-6", + "title": "Audit Record Review and Reporting", + "statement": "Audit-Eintraege muessen regelmaessig ueberprueft und bei Anomalien berichtet werden.", + "keywords": ["review", "ueberpruefen", "reporting", "anomalie"], + "action_hint": "review", + "object_hint": "Audit-Ueberpruefung", + "object_class": "record" + }, + { + "subcontrol_id": "AU-9", + "title": "Protection of Audit Information", + "statement": "Audit-Daten muessen vor unbefugtem Zugriff, Aenderung und Loeschung geschuetzt werden.", + "keywords": ["schutz", "protection", "integritaet", "integrity"], + "action_hint": "implement", + "object_hint": "Audit-Datenschutz", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "AT", + "title": "Awareness and Training", + "aliases": ["awareness", "training", "schulung", "sensibilisierung"], + "keywords": ["training", "schulung", "awareness", "sensibilisierung", "weiterbildung"], + "subcontrols": [ + { + "subcontrol_id": "AT-1", + "title": "Policy and Procedures", + "statement": "Schulungs- und Sensibilisierungsrichtlinien muessen definiert und regelmaessig aktualisiert werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Schulungsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "AT-2", + "title": "Literacy Training and Awareness", + "statement": "Alle Mitarbeiter muessen regelmaessig Sicherheitsschulungen erhalten.", + "keywords": ["mitarbeiter", "schulung", "sicherheit"], + "action_hint": "train", + "object_hint": "Sicherheitsschulung", + "object_class": "training" + }, + { + "subcontrol_id": "AT-3", + "title": "Role-Based Training", + "statement": "Rollenbasierte Sicherheitsschulungen muessen fuer Mitarbeiter mit besonderen Sicherheitsaufgaben durchgefuehrt werden.", + "keywords": ["rollenbasiert", "role-based", "speziell"], + "action_hint": "train", + "object_hint": "Rollenbasierte Sicherheitsschulung", + "object_class": "training" + } + ] + }, + { + "domain_id": "CM", + "title": "Configuration Management", + "aliases": ["configuration management", "konfigurationsmanagement", "konfiguration"], + "keywords": ["konfiguration", "configuration", "baseline", "haertung", "hardening"], + "subcontrols": [ + { + "subcontrol_id": "CM-1", + "title": "Policy and Procedures", + "statement": "Konfigurationsmanagement-Richtlinien muessen dokumentiert und gepflegt werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Konfigurationsmanagement-Richtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "CM-2", + "title": "Baseline Configuration", + "statement": "Basiskonfigurationen fuer Systeme muessen definiert, dokumentiert und gepflegt werden.", + "keywords": ["baseline", "basis", "standard"], + "action_hint": "define", + "object_hint": "Basiskonfiguration", + "object_class": "configuration" + }, + { + "subcontrol_id": "CM-6", + "title": "Configuration Settings", + "statement": "Sicherheitsrelevante Konfigurationseinstellungen muessen definiert und durchgesetzt werden.", + "keywords": ["settings", "einstellungen", "sicherheit"], + "action_hint": "configure", + "object_hint": "Sicherheitskonfiguration", + "object_class": "configuration" + }, + { + "subcontrol_id": "CM-7", + "title": "Least Functionality", + "statement": "Systeme muessen so konfiguriert werden, dass nur notwendige Funktionen aktiv sind.", + "keywords": ["least functionality", "minimal", "dienste", "ports"], + "action_hint": "configure", + "object_hint": "Minimalkonfiguration", + "object_class": "configuration" + }, + { + "subcontrol_id": "CM-8", + "title": "System Component Inventory", + "statement": "Ein Inventar aller Systemkomponenten muss gefuehrt und aktuell gehalten werden.", + "keywords": ["inventar", "inventory", "komponenten", "assets"], + "action_hint": "maintain", + "object_hint": "Systemkomponenten-Inventar", + "object_class": "register" + } + ] + }, + { + "domain_id": "IA", + "title": "Identification and Authentication", + "aliases": ["identification", "authentication", "identifikation", "authentifizierung"], + "keywords": ["authentifizierung", "identifikation", "identity", "passwort", "mfa", "credential"], + "subcontrols": [ + { + "subcontrol_id": "IA-1", + "title": "Policy and Procedures", + "statement": "Identifikations- und Authentifizierungsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Authentifizierungsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "IA-2", + "title": "Identification and Authentication", + "statement": "Benutzer und Geraete muessen eindeutig identifiziert und authentifiziert werden.", + "keywords": ["benutzer", "geraete", "identifizierung"], + "action_hint": "implement", + "object_hint": "Benutzerauthentifizierung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "IA-2(1)", + "title": "Multi-Factor Authentication", + "statement": "Multi-Faktor-Authentifizierung muss fuer privilegierte Konten implementiert werden.", + "keywords": ["mfa", "multi-faktor", "zwei-faktor", "2fa"], + "action_hint": "implement", + "object_hint": "Multi-Faktor-Authentifizierung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "IA-5", + "title": "Authenticator Management", + "statement": "Authentifizierungsmittel (Passwoerter, Token, Zertifikate) muessen sicher verwaltet werden.", + "keywords": ["passwort", "token", "zertifikat", "credential"], + "action_hint": "maintain", + "object_hint": "Authentifizierungsmittel-Verwaltung", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "IR", + "title": "Incident Response", + "aliases": ["incident response", "vorfallbehandlung", "vorfallreaktion", "incident management"], + "keywords": ["vorfall", "incident", "reaktion", "response", "breach", "sicherheitsvorfall"], + "subcontrols": [ + { + "subcontrol_id": "IR-1", + "title": "Policy and Procedures", + "statement": "Vorfallreaktionsrichtlinien und -verfahren muessen definiert und regelmaessig aktualisiert werden.", + "keywords": ["policy", "richtlinie", "verfahren"], + "action_hint": "document", + "object_hint": "Vorfallreaktionsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "IR-2", + "title": "Incident Response Training", + "statement": "Mitarbeiter muessen regelmaessig in der Vorfallreaktion geschult werden.", + "keywords": ["training", "schulung"], + "action_hint": "train", + "object_hint": "Vorfallreaktionsschulung", + "object_class": "training" + }, + { + "subcontrol_id": "IR-4", + "title": "Incident Handling", + "statement": "Ein strukturierter Prozess fuer die Vorfallbehandlung muss implementiert werden: Erkennung, Analyse, Eindaemmung, Behebung.", + "keywords": ["handling", "behandlung", "erkennung", "eindaemmung"], + "action_hint": "implement", + "object_hint": "Vorfallbehandlungsprozess", + "object_class": "process" + }, + { + "subcontrol_id": "IR-5", + "title": "Incident Monitoring", + "statement": "Sicherheitsvorfaelle muessen kontinuierlich ueberwacht und verfolgt werden.", + "keywords": ["monitoring", "ueberwachung", "tracking"], + "action_hint": "monitor", + "object_hint": "Vorfallsueberwachung", + "object_class": "incident" + }, + { + "subcontrol_id": "IR-6", + "title": "Incident Reporting", + "statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an die zustaendigen Stellen gemeldet werden.", + "keywords": ["reporting", "meldung", "melden", "frist"], + "action_hint": "report", + "object_hint": "Vorfallmeldung", + "object_class": "incident" + }, + { + "subcontrol_id": "IR-8", + "title": "Incident Response Plan", + "statement": "Ein Vorfallreaktionsplan muss dokumentiert und regelmaessig getestet werden.", + "keywords": ["plan", "dokumentation", "test"], + "action_hint": "document", + "object_hint": "Vorfallreaktionsplan", + "object_class": "policy" + } + ] + }, + { + "domain_id": "RA", + "title": "Risk Assessment", + "aliases": ["risk assessment", "risikobewertung", "risikoanalyse"], + "keywords": ["risiko", "risk", "bewertung", "assessment", "analyse", "bedrohung", "threat"], + "subcontrols": [ + { + "subcontrol_id": "RA-1", + "title": "Policy and Procedures", + "statement": "Risikobewertungsrichtlinien muessen dokumentiert und regelmaessig aktualisiert werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Risikobewertungsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "RA-3", + "title": "Risk Assessment", + "statement": "Regelmaessige Risikobewertungen muessen durchgefuehrt und dokumentiert werden.", + "keywords": ["bewertung", "assessment", "regelmaessig"], + "action_hint": "assess", + "object_hint": "Risikobewertung", + "object_class": "risk_artifact" + }, + { + "subcontrol_id": "RA-5", + "title": "Vulnerability Monitoring and Scanning", + "statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt und ueberwacht werden.", + "keywords": ["vulnerability", "schwachstelle", "scan", "monitoring"], + "action_hint": "monitor", + "object_hint": "Schwachstellenueberwachung", + "object_class": "system" + } + ] + }, + { + "domain_id": "SC", + "title": "System and Communications Protection", + "aliases": ["system protection", "communications protection", "kommunikationsschutz", "systemschutz"], + "keywords": ["verschluesselung", "encryption", "tls", "netzwerk", "network", "kommunikation", "firewall"], + "subcontrols": [ + { + "subcontrol_id": "SC-1", + "title": "Policy and Procedures", + "statement": "System- und Kommunikationsschutzrichtlinien muessen dokumentiert und aktuell gehalten werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Kommunikationsschutzrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "SC-7", + "title": "Boundary Protection", + "statement": "Netzwerkgrenzen muessen durch Firewall-Regeln und Zugangskontrollen geschuetzt werden.", + "keywords": ["boundary", "grenze", "firewall", "netzwerk"], + "action_hint": "implement", + "object_hint": "Netzwerkgrenzschutz", + "object_class": "technical_control" + }, + { + "subcontrol_id": "SC-8", + "title": "Transmission Confidentiality and Integrity", + "statement": "Daten muessen bei der Uebertragung durch Verschluesselung geschuetzt werden.", + "keywords": ["transmission", "uebertragung", "verschluesselung", "tls"], + "action_hint": "encrypt", + "object_hint": "Uebertragungsverschluesselung", + "object_class": "cryptographic_control" + }, + { + "subcontrol_id": "SC-12", + "title": "Cryptographic Key Establishment and Management", + "statement": "Kryptographische Schluessel muessen sicher erzeugt, verteilt, gespeichert und widerrufen werden.", + "keywords": ["key", "schluessel", "kryptographie", "management"], + "action_hint": "maintain", + "object_hint": "Schluesselverwaltung", + "object_class": "cryptographic_control" + }, + { + "subcontrol_id": "SC-13", + "title": "Cryptographic Protection", + "statement": "Kryptographische Mechanismen muessen gemaess anerkannten Standards implementiert werden.", + "keywords": ["kryptographie", "verschluesselung", "standard"], + "action_hint": "implement", + "object_hint": "Kryptographischer Schutz", + "object_class": "cryptographic_control" + } + ] + }, + { + "domain_id": "SI", + "title": "System and Information Integrity", + "aliases": ["system integrity", "information integrity", "systemintegritaet", "informationsintegritaet"], + "keywords": ["integritaet", "integrity", "malware", "patch", "flaw", "schwachstelle"], + "subcontrols": [ + { + "subcontrol_id": "SI-1", + "title": "Policy and Procedures", + "statement": "System- und Informationsintegritaetsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.", + "keywords": ["policy", "richtlinie"], + "action_hint": "document", + "object_hint": "Integritaetsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "SI-2", + "title": "Flaw Remediation", + "statement": "Bekannte Schwachstellen muessen innerhalb definierter Fristen behoben werden.", + "keywords": ["flaw", "schwachstelle", "patch", "behebung", "remediation"], + "action_hint": "remediate", + "object_hint": "Schwachstellenbehebung", + "object_class": "system" + }, + { + "subcontrol_id": "SI-3", + "title": "Malicious Code Protection", + "statement": "Systeme muessen vor Schadsoftware geschuetzt werden durch Erkennung und Abwehrmechanismen.", + "keywords": ["malware", "schadsoftware", "antivirus", "erkennung"], + "action_hint": "implement", + "object_hint": "Schadsoftwareschutz", + "object_class": "technical_control" + }, + { + "subcontrol_id": "SI-4", + "title": "System Monitoring", + "statement": "Systeme muessen kontinuierlich auf Sicherheitsereignisse und Anomalien ueberwacht werden.", + "keywords": ["monitoring", "ueberwachung", "anomalie", "siem"], + "action_hint": "monitor", + "object_hint": "Systemueberwachung", + "object_class": "system" + }, + { + "subcontrol_id": "SI-5", + "title": "Security Alerts and Advisories", + "statement": "Sicherheitswarnungen muessen empfangen, bewertet und darauf reagiert werden.", + "keywords": ["alert", "warnung", "advisory", "cve"], + "action_hint": "monitor", + "object_hint": "Sicherheitswarnungen", + "object_class": "incident" + } + ] + }, + { + "domain_id": "SA", + "title": "System and Services Acquisition", + "aliases": ["system acquisition", "services acquisition", "systembeschaffung", "secure development"], + "keywords": ["beschaffung", "acquisition", "entwicklung", "development", "lieferkette", "supply chain"], + "subcontrols": [ + { + "subcontrol_id": "SA-1", + "title": "Policy and Procedures", + "statement": "Beschaffungsrichtlinien mit Sicherheitsanforderungen muessen dokumentiert werden.", + "keywords": ["policy", "richtlinie", "beschaffung"], + "action_hint": "document", + "object_hint": "Beschaffungsrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "SA-8", + "title": "Security and Privacy Engineering Principles", + "statement": "Sicherheits- und Datenschutzprinzipien muessen in die Systementwicklung integriert werden.", + "keywords": ["engineering", "development", "prinzipien", "design"], + "action_hint": "implement", + "object_hint": "Security-by-Design-Prinzipien", + "object_class": "process" + }, + { + "subcontrol_id": "SA-11", + "title": "Developer Testing and Evaluation", + "statement": "Entwickler muessen Sicherheitstests und Code-Reviews durchfuehren.", + "keywords": ["testing", "test", "code review", "evaluation"], + "action_hint": "test", + "object_hint": "Entwickler-Sicherheitstests", + "object_class": "process" + }, + { + "subcontrol_id": "SA-12", + "title": "Supply Chain Protection", + "statement": "Lieferkettenrisiken muessen bewertet und Schutzmassnahmen implementiert werden.", + "keywords": ["supply chain", "lieferkette", "third party", "drittanbieter"], + "action_hint": "assess", + "object_hint": "Lieferkettenrisikobewertung", + "object_class": "risk_artifact" + } + ] + } + ] +} diff --git a/backend-compliance/compliance/data/frameworks/owasp_asvs.json b/backend-compliance/compliance/data/frameworks/owasp_asvs.json new file mode 100644 index 0000000..a2182b0 --- /dev/null +++ b/backend-compliance/compliance/data/frameworks/owasp_asvs.json @@ -0,0 +1,353 @@ +{ + "framework_id": "OWASP_ASVS", + "display_name": "OWASP Application Security Verification Standard 4.0", + "license": { + "type": "cc_by_sa_4", + "rag_allowed": true, + "use_as_metadata": true + }, + "domains": [ + { + "domain_id": "V1", + "title": "Architecture, Design and Threat Modeling", + "aliases": ["architecture", "architektur", "design", "threat modeling", "bedrohungsmodellierung"], + "keywords": ["architektur", "design", "threat model", "bedrohung", "modellierung"], + "subcontrols": [ + { + "subcontrol_id": "V1.1", + "title": "Secure Software Development Lifecycle", + "statement": "Ein sicherer Softwareentwicklungs-Lebenszyklus (SSDLC) muss definiert und angewendet werden.", + "keywords": ["sdlc", "lifecycle", "lebenszyklus", "entwicklung"], + "action_hint": "implement", + "object_hint": "Sicherer Entwicklungs-Lebenszyklus", + "object_class": "process" + }, + { + "subcontrol_id": "V1.2", + "title": "Authentication Architecture", + "statement": "Die Authentifizierungsarchitektur muss dokumentiert und regelmaessig ueberprueft werden.", + "keywords": ["authentication", "authentifizierung", "architektur"], + "action_hint": "document", + "object_hint": "Authentifizierungsarchitektur", + "object_class": "policy" + }, + { + "subcontrol_id": "V1.4", + "title": "Access Control Architecture", + "statement": "Die Zugriffskontrollarchitektur muss dokumentiert und zentral durchgesetzt werden.", + "keywords": ["access control", "zugriffskontrolle", "architektur"], + "action_hint": "document", + "object_hint": "Zugriffskontrollarchitektur", + "object_class": "policy" + }, + { + "subcontrol_id": "V1.5", + "title": "Input and Output Architecture", + "statement": "Eingabe- und Ausgabevalidierung muss architektonisch verankert und durchgaengig angewendet werden.", + "keywords": ["input", "output", "eingabe", "ausgabe", "validierung"], + "action_hint": "implement", + "object_hint": "Ein-/Ausgabevalidierung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V1.6", + "title": "Cryptographic Architecture", + "statement": "Kryptographische Mechanismen muessen architektonisch definiert und standardisiert sein.", + "keywords": ["crypto", "kryptographie", "verschluesselung"], + "action_hint": "define", + "object_hint": "Kryptographie-Architektur", + "object_class": "cryptographic_control" + } + ] + }, + { + "domain_id": "V2", + "title": "Authentication", + "aliases": ["authentication", "authentifizierung", "anmeldung", "login"], + "keywords": ["authentication", "authentifizierung", "passwort", "login", "anmeldung", "credential"], + "subcontrols": [ + { + "subcontrol_id": "V2.1", + "title": "Password Security", + "statement": "Passwortrichtlinien muessen Mindestlaenge, Komplexitaet und Sperrmechanismen definieren.", + "keywords": ["passwort", "password", "laenge", "komplexitaet"], + "action_hint": "define", + "object_hint": "Passwortrichtlinie", + "object_class": "policy" + }, + { + "subcontrol_id": "V2.2", + "title": "General Authenticator Security", + "statement": "Authentifizierungsmittel muessen sicher gespeichert und uebertragen werden.", + "keywords": ["authenticator", "credential", "speicherung"], + "action_hint": "implement", + "object_hint": "Sichere Credential-Verwaltung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V2.7", + "title": "Out-of-Band Verification", + "statement": "Out-of-Band-Verifikationsmechanismen muessen sicher implementiert werden.", + "keywords": ["oob", "out-of-band", "sms", "push"], + "action_hint": "implement", + "object_hint": "Out-of-Band-Verifikation", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V2.8", + "title": "Multi-Factor Authentication", + "statement": "Multi-Faktor-Authentifizierung muss fuer sicherheitskritische Funktionen verfuegbar sein.", + "keywords": ["mfa", "multi-faktor", "totp", "fido"], + "action_hint": "implement", + "object_hint": "Multi-Faktor-Authentifizierung", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "V3", + "title": "Session Management", + "aliases": ["session", "sitzung", "session management", "sitzungsverwaltung"], + "keywords": ["session", "sitzung", "token", "cookie", "timeout"], + "subcontrols": [ + { + "subcontrol_id": "V3.1", + "title": "Session Management Security", + "statement": "Sitzungstoken muessen sicher erzeugt, uebertragen und invalidiert werden.", + "keywords": ["token", "sitzung", "sicherheit"], + "action_hint": "implement", + "object_hint": "Sichere Sitzungsverwaltung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V3.3", + "title": "Session Termination", + "statement": "Sitzungen muessen nach Inaktivitaet und bei Abmeldung zuverlaessig beendet werden.", + "keywords": ["termination", "timeout", "abmeldung", "beenden"], + "action_hint": "configure", + "object_hint": "Sitzungstimeout", + "object_class": "configuration" + }, + { + "subcontrol_id": "V3.5", + "title": "Token-Based Session Management", + "statement": "Tokenbasierte Sitzungsmechanismen muessen gegen Diebstahl und Replay geschuetzt sein.", + "keywords": ["jwt", "token", "replay", "diebstahl"], + "action_hint": "implement", + "object_hint": "Token-Schutz", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "V5", + "title": "Validation, Sanitization and Encoding", + "aliases": ["validation", "validierung", "sanitization", "encoding", "eingabevalidierung"], + "keywords": ["validierung", "sanitization", "encoding", "xss", "injection", "eingabe"], + "subcontrols": [ + { + "subcontrol_id": "V5.1", + "title": "Input Validation", + "statement": "Alle Eingabedaten muessen serverseitig validiert werden.", + "keywords": ["input", "eingabe", "validierung", "serverseitig"], + "action_hint": "implement", + "object_hint": "Eingabevalidierung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V5.2", + "title": "Sanitization and Sandboxing", + "statement": "Eingaben muessen bereinigt und in sicherer Umgebung verarbeitet werden.", + "keywords": ["sanitization", "bereinigung", "sandbox"], + "action_hint": "implement", + "object_hint": "Eingabebereinigung", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V5.3", + "title": "Output Encoding and Injection Prevention", + "statement": "Ausgaben muessen kontextabhaengig kodiert werden, um Injection-Angriffe zu verhindern.", + "keywords": ["output", "encoding", "injection", "xss", "sql"], + "action_hint": "implement", + "object_hint": "Ausgabe-Encoding", + "object_class": "technical_control" + } + ] + }, + { + "domain_id": "V6", + "title": "Stored Cryptography", + "aliases": ["cryptography", "kryptographie", "verschluesselung", "stored cryptography"], + "keywords": ["kryptographie", "verschluesselung", "hashing", "schluessel", "key management"], + "subcontrols": [ + { + "subcontrol_id": "V6.1", + "title": "Data Classification", + "statement": "Daten muessen klassifiziert und entsprechend ihrer Schutzklasse behandelt werden.", + "keywords": ["klassifizierung", "classification", "schutzklasse"], + "action_hint": "define", + "object_hint": "Datenklassifizierung", + "object_class": "data" + }, + { + "subcontrol_id": "V6.2", + "title": "Algorithms", + "statement": "Nur zugelassene und aktuelle kryptographische Algorithmen duerfen verwendet werden.", + "keywords": ["algorithmus", "algorithm", "aes", "rsa"], + "action_hint": "configure", + "object_hint": "Kryptographische Algorithmen", + "object_class": "cryptographic_control" + }, + { + "subcontrol_id": "V6.4", + "title": "Secret Management", + "statement": "Geheimnisse (Schluessel, Passwoerter, Tokens) muessen in einem Secret-Management-System verwaltet werden.", + "keywords": ["secret", "geheimnis", "vault", "key management"], + "action_hint": "maintain", + "object_hint": "Secret-Management", + "object_class": "cryptographic_control" + } + ] + }, + { + "domain_id": "V8", + "title": "Data Protection", + "aliases": ["data protection", "datenschutz", "datenverarbeitung"], + "keywords": ["datenschutz", "data protection", "pii", "personenbezogen", "privacy"], + "subcontrols": [ + { + "subcontrol_id": "V8.1", + "title": "General Data Protection", + "statement": "Personenbezogene Daten muessen gemaess Datenschutzanforderungen geschuetzt werden.", + "keywords": ["personenbezogen", "pii", "datenschutz"], + "action_hint": "implement", + "object_hint": "Datenschutzmassnahmen", + "object_class": "data" + }, + { + "subcontrol_id": "V8.2", + "title": "Client-Side Data Protection", + "statement": "Clientseitig gespeicherte sensible Daten muessen geschuetzt und minimiert werden.", + "keywords": ["client", "browser", "localstorage", "cookie"], + "action_hint": "implement", + "object_hint": "Clientseitiger Datenschutz", + "object_class": "technical_control" + }, + { + "subcontrol_id": "V8.3", + "title": "Sensitive Private Data", + "statement": "Sensible Daten muessen bei Speicherung und Verarbeitung besonders geschuetzt werden.", + "keywords": ["sensibel", "vertraulich", "speicherung"], + "action_hint": "encrypt", + "object_hint": "Verschluesselung sensibler Daten", + "object_class": "data" + } + ] + }, + { + "domain_id": "V9", + "title": "Communication", + "aliases": ["communication", "kommunikation", "tls", "transport"], + "keywords": ["tls", "ssl", "https", "transport", "kommunikation", "verschluesselung"], + "subcontrols": [ + { + "subcontrol_id": "V9.1", + "title": "Client Communication Security", + "statement": "Alle Client-Server-Kommunikation muss ueber TLS verschluesselt werden.", + "keywords": ["tls", "https", "client", "server"], + "action_hint": "encrypt", + "object_hint": "TLS-Transportverschluesselung", + "object_class": "cryptographic_control" + }, + { + "subcontrol_id": "V9.2", + "title": "Server Communication Security", + "statement": "Server-zu-Server-Kommunikation muss authentifiziert und verschluesselt erfolgen.", + "keywords": ["server", "mtls", "backend"], + "action_hint": "encrypt", + "object_hint": "Server-Kommunikationsverschluesselung", + "object_class": "cryptographic_control" + } + ] + }, + { + "domain_id": "V13", + "title": "API and Web Service", + "aliases": ["api", "web service", "rest", "graphql", "webservice"], + "keywords": ["api", "rest", "graphql", "webservice", "endpoint", "schnittstelle"], + "subcontrols": [ + { + "subcontrol_id": "V13.1", + "title": "Generic Web Service Security", + "statement": "Web-Services muessen gegen gaengige Angriffe abgesichert werden.", + "keywords": ["web service", "sicherheit", "angriff"], + "action_hint": "implement", + "object_hint": "Web-Service-Absicherung", + "object_class": "interface" + }, + { + "subcontrol_id": "V13.2", + "title": "RESTful Web Service", + "statement": "REST-APIs muessen Input-Validierung, Rate Limiting und sichere Authentifizierung implementieren.", + "keywords": ["rest", "api", "rate limiting", "input"], + "action_hint": "implement", + "object_hint": "REST-API-Absicherung", + "object_class": "interface" + }, + { + "subcontrol_id": "V13.4", + "title": "GraphQL and Web Services", + "statement": "GraphQL-Endpoints muessen gegen Query-Complexity-Angriffe und Introspection geschuetzt werden.", + "keywords": ["graphql", "query", "complexity", "introspection"], + "action_hint": "configure", + "object_hint": "GraphQL-Absicherung", + "object_class": "interface" + } + ] + }, + { + "domain_id": "V14", + "title": "Configuration", + "aliases": ["configuration", "konfiguration", "hardening", "haertung"], + "keywords": ["konfiguration", "hardening", "haertung", "header", "deployment"], + "subcontrols": [ + { + "subcontrol_id": "V14.1", + "title": "Build and Deploy", + "statement": "Build- und Deployment-Prozesse muessen sicher konfiguriert und reproduzierbar sein.", + "keywords": ["build", "deploy", "ci/cd", "pipeline"], + "action_hint": "configure", + "object_hint": "Sichere Build-Pipeline", + "object_class": "configuration" + }, + { + "subcontrol_id": "V14.2", + "title": "Dependency Management", + "statement": "Abhaengigkeiten muessen auf Schwachstellen geprueft und aktuell gehalten werden.", + "keywords": ["dependency", "abhaengigkeit", "sca", "sbom"], + "action_hint": "maintain", + "object_hint": "Abhaengigkeitsverwaltung", + "object_class": "system" + }, + { + "subcontrol_id": "V14.3", + "title": "Unintended Security Disclosure", + "statement": "Fehlermeldungen und Debug-Informationen duerfen keine sicherheitsrelevanten Details preisgeben.", + "keywords": ["disclosure", "fehlermeldung", "debug", "information leakage"], + "action_hint": "configure", + "object_hint": "Fehlerbehandlung", + "object_class": "configuration" + }, + { + "subcontrol_id": "V14.4", + "title": "HTTP Security Headers", + "statement": "HTTP-Sicherheitsheader muessen korrekt konfiguriert sein.", + "keywords": ["header", "csp", "hsts", "x-frame"], + "action_hint": "configure", + "object_hint": "HTTP-Sicherheitsheader", + "object_class": "configuration" + } + ] + } + ] +} diff --git a/backend-compliance/compliance/services/decomposition_pass.py b/backend-compliance/compliance/services/decomposition_pass.py index afb557d..6b53c85 100644 --- a/backend-compliance/compliance/services/decomposition_pass.py +++ b/backend-compliance/compliance/services/decomposition_pass.py @@ -1493,7 +1493,37 @@ def _normalize_object(object_raw: str) -> str: return obj[:80] or "unknown" -# ── 7b. Output Validator (Negativregeln) ───────────────────────────────── +# ── 7b. Framework / Composite Detection ────────────────────────────────── + +_FRAMEWORK_KEYWORDS: list[str] = [ + "praktiken", "kontrollen gemäß", "maßnahmen gemäß", "anforderungen aus", + "anforderungen gemäß", "gemäß .+ umzusetzen", "framework", "standard", + "controls for", "practices for", "requirements from", +] + +_COMPOSITE_OBJECT_KEYWORDS: list[str] = [ + "ccm", "nist", "iso 27001", "iso 27002", "owasp", "bsi", + "cis controls", "cobit", "sox", "pci dss", "hitrust", + "soc 2", "soc2", "enisa", "kritis", +] + +_COMPOSITE_RE = re.compile( + "|".join(_FRAMEWORK_KEYWORDS + _COMPOSITE_OBJECT_KEYWORDS), + re.IGNORECASE, +) + + +def _is_composite_obligation(obligation_text: str, object_: str) -> bool: + """Detect framework-level / composite obligations that are NOT atomic. + + Returns True if the obligation references a framework domain, standard, + or set of practices rather than a single auditable requirement. + """ + combined = f"{obligation_text} {object_}" + return bool(_COMPOSITE_RE.search(combined)) + + +# ── 7c. Output Validator (Negativregeln) ───────────────────────────────── def _validate_atomic_control( atomic: "AtomicControlCandidate", @@ -1544,6 +1574,9 @@ def _validate_atomic_control( if object_class == "general": issues.append("WARN: object_class is 'general' (unclassified)") + if getattr(atomic, "_is_composite", False): + issues.append("WARN: composite/framework obligation — requires further decomposition") + for issue in issues: if issue.startswith("ERROR:"): logger.warning("Validation: %s — title=%s", issue, atomic.title[:60]) @@ -1703,6 +1736,12 @@ def _compose_deterministic( atomic._deadline_hours = deadline_hours # type: ignore[attr-defined] atomic._frequency = frequency # type: ignore[attr-defined] + # ── Composite / Framework detection ─────────────────────── + is_composite = _is_composite_obligation(obligation_text, object_) + atomic._is_composite = is_composite # type: ignore[attr-defined] + atomic._atomicity = "composite" if is_composite else "atomic" # type: ignore[attr-defined] + atomic._requires_decomposition = is_composite # type: ignore[attr-defined] + # ── Validate (log issues, never reject) ─────────────────── validation_issues = _validate_atomic_control(atomic, action_type, object_class) atomic._validation_issues = validation_issues # type: ignore[attr-defined] @@ -2403,23 +2442,7 @@ class DecompositionPass: else: # Deterministic engine — no LLM required for obl in batch: - sub_actions = _split_compound_action(obl["action"]) - for sub_action in sub_actions: - atomic = _compose_deterministic( - obligation_text=obl["obligation_text"], - action=sub_action, - object_=obl["object"], - parent_title=obl["parent_title"], - parent_severity=obl["parent_severity"], - parent_category=obl["parent_category"], - is_test=obl["is_test"], - is_reporting=obl["is_reporting"], - trigger_type=obl.get("trigger_type"), - condition=obl.get("condition"), - ) - await self._process_pass0b_control( - obl, {}, stats, atomic=atomic, - ) + await self._route_and_compose(obl, stats) # Commit after each successful sub-batch self.db.commit() @@ -2435,6 +2458,107 @@ class DecompositionPass: logger.info("Pass 0b: %s", stats) return stats + async def _route_and_compose( + self, obl: dict, stats: dict, + ) -> None: + """Route an obligation through the framework detection layer, + then compose atomic controls. + + Routing types: + - atomic: compose directly via _compose_deterministic + - compound: split compound verbs, compose each + - framework_container: decompose via framework registry, + then compose each sub-obligation + """ + from compliance.services.framework_decomposition import ( + classify_routing, + decompose_framework_container, + ) + + routing = classify_routing( + obligation_text=obl["obligation_text"], + action_raw=obl["action"], + object_raw=obl["object"], + condition_raw=obl.get("condition"), + ) + + if routing.routing_type == "framework_container" and routing.framework_ref: + # Decompose framework container into sub-obligations + result = decompose_framework_container( + obligation_candidate_id=obl["candidate_id"], + parent_control_id=obl["parent_control_id"], + obligation_text=obl["obligation_text"], + framework_ref=routing.framework_ref, + framework_domain=routing.framework_domain, + ) + stats.setdefault("framework_decomposed", 0) + stats.setdefault("framework_sub_obligations", 0) + + if result.release_state == "decomposed" and result.decomposed_obligations: + stats["framework_decomposed"] += 1 + stats["framework_sub_obligations"] += len(result.decomposed_obligations) + logger.info( + "Framework decomposition: %s → %s/%s → %d sub-obligations", + obl["candidate_id"], routing.framework_ref, + routing.framework_domain, len(result.decomposed_obligations), + ) + # Compose each sub-obligation + for d_obl in result.decomposed_obligations: + sub_obl = { + **obl, + "obligation_text": d_obl.obligation_text, + "action": d_obl.action_raw, + "object": d_obl.object_raw, + } + sub_actions = _split_compound_action(sub_obl["action"]) + for sub_action in sub_actions: + atomic = _compose_deterministic( + obligation_text=sub_obl["obligation_text"], + action=sub_action, + object_=sub_obl["object"], + parent_title=obl["parent_title"], + parent_severity=obl["parent_severity"], + parent_category=obl["parent_category"], + is_test=obl["is_test"], + is_reporting=obl["is_reporting"], + trigger_type=obl.get("trigger_type"), + condition=obl.get("condition"), + ) + # Enrich gen_meta with framework info + atomic._framework_ref = routing.framework_ref # type: ignore[attr-defined] + atomic._framework_domain = routing.framework_domain # type: ignore[attr-defined] + atomic._framework_subcontrol_id = d_obl.subcontrol_id # type: ignore[attr-defined] + atomic._decomposition_source = "framework_decomposition" # type: ignore[attr-defined] + await self._process_pass0b_control( + obl, {}, stats, atomic=atomic, + ) + return + else: + # Unmatched framework — fall through to normal composition + logger.warning( + "Framework decomposition unmatched: %s — %s", + obl["candidate_id"], result.issues, + ) + + # Atomic or compound or unmatched framework: normal composition + sub_actions = _split_compound_action(obl["action"]) + for sub_action in sub_actions: + atomic = _compose_deterministic( + obligation_text=obl["obligation_text"], + action=sub_action, + object_=obl["object"], + parent_title=obl["parent_title"], + parent_severity=obl["parent_severity"], + parent_category=obl["parent_category"], + is_test=obl["is_test"], + is_reporting=obl["is_reporting"], + trigger_type=obl.get("trigger_type"), + condition=obl.get("condition"), + ) + await self._process_pass0b_control( + obl, {}, stats, atomic=atomic, + ) + async def _process_pass0b_control( self, obl: dict, parsed: dict, stats: dict, atomic: Optional[AtomicControlCandidate] = None, @@ -2855,6 +2979,13 @@ class DecompositionPass: "deadline_hours": getattr(atomic, "_deadline_hours", None), "frequency": getattr(atomic, "_frequency", None), "validation_issues": getattr(atomic, "_validation_issues", []), + "is_composite": getattr(atomic, "_is_composite", False), + "atomicity": getattr(atomic, "_atomicity", "atomic"), + "requires_decomposition": getattr(atomic, "_requires_decomposition", False), + "framework_ref": getattr(atomic, "_framework_ref", None), + "framework_domain": getattr(atomic, "_framework_domain", None), + "framework_subcontrol_id": getattr(atomic, "_framework_subcontrol_id", None), + "decomposition_source": getattr(atomic, "_decomposition_source", "direct"), }), "framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf", }, diff --git a/backend-compliance/compliance/services/framework_decomposition.py b/backend-compliance/compliance/services/framework_decomposition.py new file mode 100644 index 0000000..40010d2 --- /dev/null +++ b/backend-compliance/compliance/services/framework_decomposition.py @@ -0,0 +1,714 @@ +"""Framework Decomposition Engine — decomposes framework-container obligations. + +Sits between Pass 0a (obligation extraction) and Pass 0b (atomic control +composition). Detects obligations that reference a framework domain (e.g. +"CCM-Praktiken fuer AIS") and decomposes them into concrete sub-obligations +using an internal framework registry. + +Three routing types: + atomic → pass through to Pass 0b unchanged + compound → split compound verbs, then Pass 0b + framework_container → decompose via registry, then Pass 0b + +The registry is a set of JSON files under compliance/data/frameworks/. +""" + +import json +import logging +import os +import re +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Registry loading +# --------------------------------------------------------------------------- + +_REGISTRY_DIR = Path(__file__).resolve().parent.parent / "data" / "frameworks" +_REGISTRY: dict[str, dict] = {} # framework_id → framework dict + + +def _load_registry() -> dict[str, dict]: + """Load all framework JSON files from the registry directory.""" + registry: dict[str, dict] = {} + if not _REGISTRY_DIR.is_dir(): + logger.warning("Framework registry dir not found: %s", _REGISTRY_DIR) + return registry + + for fpath in sorted(_REGISTRY_DIR.glob("*.json")): + try: + with open(fpath, encoding="utf-8") as f: + fw = json.load(f) + fw_id = fw.get("framework_id", fpath.stem) + registry[fw_id] = fw + logger.info( + "Loaded framework: %s (%d domains)", + fw_id, + len(fw.get("domains", [])), + ) + except Exception: + logger.exception("Failed to load framework file: %s", fpath) + return registry + + +def get_registry() -> dict[str, dict]: + """Return the global framework registry (lazy-loaded).""" + global _REGISTRY + if not _REGISTRY: + _REGISTRY = _load_registry() + return _REGISTRY + + +def reload_registry() -> dict[str, dict]: + """Force-reload the framework registry from disk.""" + global _REGISTRY + _REGISTRY = _load_registry() + return _REGISTRY + + +# --------------------------------------------------------------------------- +# Framework alias index (built from registry) +# --------------------------------------------------------------------------- + +def _build_alias_index(registry: dict[str, dict]) -> dict[str, str]: + """Build a lowercase alias → framework_id lookup.""" + idx: dict[str, str] = {} + for fw_id, fw in registry.items(): + # Framework-level aliases + idx[fw_id.lower()] = fw_id + name = fw.get("display_name", "") + if name: + idx[name.lower()] = fw_id + # Common short forms + for part in fw_id.lower().replace("_", " ").split(): + if len(part) >= 3: + idx[part] = fw_id + return idx + + +# --------------------------------------------------------------------------- +# Routing — classify obligation type +# --------------------------------------------------------------------------- + +# Extended patterns for framework detection (beyond the simple _COMPOSITE_RE +# in decomposition_pass.py — here we also capture the framework name) +_FRAMEWORK_PATTERN = re.compile( + r"(?:praktiken|kontrollen|ma(?:ss|ß)nahmen|anforderungen|vorgaben|controls|practices|measures|requirements)" + r"\s+(?:f(?:ue|ü)r|aus|gem(?:ae|ä)(?:ss|ß)|nach|from|of|for|per)\s+" + r"(.+?)(?:\s+(?:m(?:ue|ü)ssen|sollen|sind|werden|implementieren|umsetzen|einf(?:ue|ü)hren)|\.|,|$)", + re.IGNORECASE, +) + +# Direct framework name references +_DIRECT_FRAMEWORK_RE = re.compile( + r"\b(?:CSA\s*CCM|NIST\s*(?:SP\s*)?800-53|OWASP\s*(?:ASVS|SAMM|Top\s*10)" + r"|CIS\s*Controls|BSI\s*(?:IT-)?Grundschutz|ENISA|ISO\s*2700[12]" + r"|COBIT|SOX|PCI\s*DSS|HITRUST|SOC\s*2|KRITIS)\b", + re.IGNORECASE, +) + +# Compound verb patterns (multiple main verbs) +_COMPOUND_VERB_RE = re.compile( + r"\b(?:und|sowie|als\s+auch|or|and)\b", + re.IGNORECASE, +) + +# No-split phrases that look compound but aren't +_NO_SPLIT_PHRASES = [ + "pflegen und aufrechterhalten", + "dokumentieren und pflegen", + "definieren und dokumentieren", + "erstellen und freigeben", + "pruefen und genehmigen", + "identifizieren und bewerten", + "erkennen und melden", + "define and maintain", + "create and maintain", + "establish and maintain", + "monitor and review", + "detect and respond", +] + + +@dataclass +class RoutingResult: + """Result of obligation routing classification.""" + routing_type: str # atomic | compound | framework_container | unknown_review + framework_ref: Optional[str] = None + framework_domain: Optional[str] = None + domain_title: Optional[str] = None + confidence: float = 0.0 + reason: str = "" + + +def classify_routing( + obligation_text: str, + action_raw: str, + object_raw: str, + condition_raw: Optional[str] = None, +) -> RoutingResult: + """Classify an obligation into atomic / compound / framework_container.""" + combined = f"{obligation_text} {object_raw}".lower() + + # --- Step 1: Framework container detection --- + fw_result = _detect_framework(obligation_text, object_raw) + if fw_result.routing_type == "framework_container": + return fw_result + + # --- Step 2: Compound verb detection --- + if _is_compound_obligation(action_raw, obligation_text): + return RoutingResult( + routing_type="compound", + confidence=0.7, + reason="multiple_main_verbs", + ) + + # --- Step 3: Default = atomic --- + return RoutingResult( + routing_type="atomic", + confidence=0.9, + reason="single_action_single_object", + ) + + +def _detect_framework( + obligation_text: str, object_raw: str, +) -> RoutingResult: + """Detect if obligation references a framework domain.""" + combined = f"{obligation_text} {object_raw}" + registry = get_registry() + alias_idx = _build_alias_index(registry) + + # Strategy 1: direct framework name match + m = _DIRECT_FRAMEWORK_RE.search(combined) + if m: + fw_name = m.group(0).strip() + fw_id = _resolve_framework_id(fw_name, alias_idx, registry) + if fw_id: + domain_id, domain_title = _match_domain( + combined, registry[fw_id], + ) + return RoutingResult( + routing_type="framework_container", + framework_ref=fw_id, + framework_domain=domain_id, + domain_title=domain_title, + confidence=0.95 if domain_id else 0.75, + reason=f"direct_framework_match:{fw_name}", + ) + else: + # Framework name recognized but not in registry + return RoutingResult( + routing_type="framework_container", + framework_ref=None, + framework_domain=None, + confidence=0.6, + reason=f"direct_framework_match_no_registry:{fw_name}", + ) + + # Strategy 2: pattern match ("Praktiken fuer X") + m2 = _FRAMEWORK_PATTERN.search(combined) + if m2: + ref_text = m2.group(1).strip() + fw_id, domain_id, domain_title = _resolve_from_ref_text( + ref_text, registry, alias_idx, + ) + if fw_id: + return RoutingResult( + routing_type="framework_container", + framework_ref=fw_id, + framework_domain=domain_id, + domain_title=domain_title, + confidence=0.85 if domain_id else 0.65, + reason=f"pattern_match:{ref_text}", + ) + + # Strategy 3: keyword-heavy object + if _has_framework_keywords(object_raw): + return RoutingResult( + routing_type="framework_container", + framework_ref=None, + framework_domain=None, + confidence=0.5, + reason="framework_keywords_in_object", + ) + + return RoutingResult(routing_type="atomic", confidence=0.0) + + +def _resolve_framework_id( + name: str, + alias_idx: dict[str, str], + registry: dict[str, dict], +) -> Optional[str]: + """Resolve a framework name to its registry ID.""" + normalized = re.sub(r"\s+", " ", name.strip().lower()) + # Direct alias match + if normalized in alias_idx: + return alias_idx[normalized] + # Try compact form (strip spaces, hyphens, underscores) + compact = re.sub(r"[\s_\-]+", "", normalized) + for alias, fw_id in alias_idx.items(): + if re.sub(r"[\s_\-]+", "", alias) == compact: + return fw_id + # Substring match in display names + for fw_id, fw in registry.items(): + display = fw.get("display_name", "").lower() + if normalized in display or display in normalized: + return fw_id + # Partial match: check if normalized contains any alias (for multi-word refs) + for alias, fw_id in alias_idx.items(): + if len(alias) >= 4 and alias in normalized: + return fw_id + return None + + +def _match_domain( + text: str, framework: dict, +) -> tuple[Optional[str], Optional[str]]: + """Match a domain within a framework from text references.""" + text_lower = text.lower() + best_id: Optional[str] = None + best_title: Optional[str] = None + best_score = 0 + + for domain in framework.get("domains", []): + score = 0 + domain_id = domain["domain_id"] + title = domain.get("title", "") + + # Exact domain ID match (e.g. "AIS") + if re.search(rf"\b{re.escape(domain_id)}\b", text, re.IGNORECASE): + score += 10 + + # Full title match + if title.lower() in text_lower: + score += 8 + + # Alias match + for alias in domain.get("aliases", []): + if alias.lower() in text_lower: + score += 6 + break + + # Keyword overlap + kw_hits = sum( + 1 for kw in domain.get("keywords", []) + if kw.lower() in text_lower + ) + score += kw_hits + + if score > best_score: + best_score = score + best_id = domain_id + best_title = title + + if best_score >= 3: + return best_id, best_title + return None, None + + +def _resolve_from_ref_text( + ref_text: str, + registry: dict[str, dict], + alias_idx: dict[str, str], +) -> tuple[Optional[str], Optional[str], Optional[str]]: + """Resolve framework + domain from a reference text like 'AIS' or 'Application Security'.""" + ref_lower = ref_text.lower() + + for fw_id, fw in registry.items(): + for domain in fw.get("domains", []): + # Check domain ID + if domain["domain_id"].lower() in ref_lower: + return fw_id, domain["domain_id"], domain.get("title") + # Check title + if domain.get("title", "").lower() in ref_lower: + return fw_id, domain["domain_id"], domain.get("title") + # Check aliases + for alias in domain.get("aliases", []): + if alias.lower() in ref_lower or ref_lower in alias.lower(): + return fw_id, domain["domain_id"], domain.get("title") + + return None, None, None + + +_FRAMEWORK_KW_SET = { + "praktiken", "kontrollen", "massnahmen", "maßnahmen", + "anforderungen", "vorgaben", "framework", "standard", + "baseline", "katalog", "domain", "family", "category", + "practices", "controls", "measures", "requirements", +} + + +def _has_framework_keywords(text: str) -> bool: + """Check if text contains framework-indicator keywords.""" + words = set(re.findall(r"[a-zäöüß]+", text.lower())) + return len(words & _FRAMEWORK_KW_SET) >= 2 + + +def _is_compound_obligation(action_raw: str, obligation_text: str) -> bool: + """Detect if the obligation has multiple competing main verbs.""" + if not action_raw: + return False + + action_lower = action_raw.lower().strip() + + # Check no-split phrases first + for phrase in _NO_SPLIT_PHRASES: + if phrase in action_lower: + return False + + # Must have a conjunction + if not _COMPOUND_VERB_RE.search(action_lower): + return False + + # Split by conjunctions and check if we get 2+ meaningful verbs + parts = re.split(r"\b(?:und|sowie|als\s+auch|or|and)\b", action_lower) + meaningful = [p.strip() for p in parts if len(p.strip()) >= 3] + return len(meaningful) >= 2 + + +# --------------------------------------------------------------------------- +# Framework Decomposition +# --------------------------------------------------------------------------- + +@dataclass +class DecomposedObligation: + """A concrete obligation derived from a framework container.""" + obligation_candidate_id: str + parent_control_id: str + parent_framework_container_id: str + source_ref_law: str + source_ref_article: str + obligation_text: str + actor: str + action_raw: str + object_raw: str + condition_raw: Optional[str] = None + trigger_raw: Optional[str] = None + routing_type: str = "atomic" + release_state: str = "decomposed" + subcontrol_id: str = "" + # Metadata + action_hint: str = "" + object_hint: str = "" + object_class: str = "" + keywords: list[str] = field(default_factory=list) + + +@dataclass +class FrameworkDecompositionResult: + """Result of framework decomposition.""" + framework_container_id: str + source_obligation_candidate_id: str + framework_ref: Optional[str] + framework_domain: Optional[str] + domain_title: Optional[str] + matched_subcontrols: list[str] + decomposition_confidence: float + release_state: str # decomposed | unmatched | error + decomposed_obligations: list[DecomposedObligation] + issues: list[str] + + +def decompose_framework_container( + obligation_candidate_id: str, + parent_control_id: str, + obligation_text: str, + framework_ref: Optional[str], + framework_domain: Optional[str], + actor: str = "organization", +) -> FrameworkDecompositionResult: + """Decompose a framework-container obligation into concrete sub-obligations. + + Steps: + 1. Resolve framework from registry + 2. Resolve domain within framework + 3. Select relevant subcontrols (keyword filter or full domain) + 4. Generate decomposed obligations + """ + container_id = f"FWC-{uuid.uuid4().hex[:8]}" + registry = get_registry() + issues: list[str] = [] + + # Step 1: Resolve framework + fw = None + if framework_ref and framework_ref in registry: + fw = registry[framework_ref] + else: + # Try to find by name in text + fw, framework_ref = _find_framework_in_text(obligation_text, registry) + + if not fw: + issues.append("ERROR: framework_not_matched") + return FrameworkDecompositionResult( + framework_container_id=container_id, + source_obligation_candidate_id=obligation_candidate_id, + framework_ref=framework_ref, + framework_domain=framework_domain, + domain_title=None, + matched_subcontrols=[], + decomposition_confidence=0.0, + release_state="unmatched", + decomposed_obligations=[], + issues=issues, + ) + + # Step 2: Resolve domain + domain_data = None + domain_title = None + if framework_domain: + for d in fw.get("domains", []): + if d["domain_id"].lower() == framework_domain.lower(): + domain_data = d + domain_title = d.get("title") + break + if not domain_data: + # Try matching from text + domain_id, domain_title = _match_domain(obligation_text, fw) + if domain_id: + for d in fw.get("domains", []): + if d["domain_id"] == domain_id: + domain_data = d + framework_domain = domain_id + break + + if not domain_data: + issues.append("WARN: domain_not_matched — using all domains") + # Fall back to all subcontrols across all domains + all_subcontrols = [] + for d in fw.get("domains", []): + for sc in d.get("subcontrols", []): + sc["_domain_id"] = d["domain_id"] + all_subcontrols.append(sc) + subcontrols = _select_subcontrols(obligation_text, all_subcontrols) + if not subcontrols: + issues.append("ERROR: no_subcontrols_matched") + return FrameworkDecompositionResult( + framework_container_id=container_id, + source_obligation_candidate_id=obligation_candidate_id, + framework_ref=framework_ref, + framework_domain=framework_domain, + domain_title=None, + matched_subcontrols=[], + decomposition_confidence=0.0, + release_state="unmatched", + decomposed_obligations=[], + issues=issues, + ) + else: + # Step 3: Select subcontrols from domain + raw_subcontrols = domain_data.get("subcontrols", []) + subcontrols = _select_subcontrols(obligation_text, raw_subcontrols) + if not subcontrols: + # Full domain decomposition + subcontrols = raw_subcontrols + + # Quality check: too many subcontrols + if len(subcontrols) > 25: + issues.append(f"WARN: {len(subcontrols)} subcontrols — may be too broad") + + # Step 4: Generate decomposed obligations + display_name = fw.get("display_name", framework_ref or "Unknown") + decomposed: list[DecomposedObligation] = [] + matched_ids: list[str] = [] + + for sc in subcontrols: + sc_id = sc.get("subcontrol_id", "") + matched_ids.append(sc_id) + + action_hint = sc.get("action_hint", "") + object_hint = sc.get("object_hint", "") + + # Quality warnings + if not action_hint: + issues.append(f"WARN: {sc_id} missing action_hint") + if not object_hint: + issues.append(f"WARN: {sc_id} missing object_hint") + + obl_id = f"{obligation_candidate_id}-{sc_id}" + + decomposed.append(DecomposedObligation( + obligation_candidate_id=obl_id, + parent_control_id=parent_control_id, + parent_framework_container_id=container_id, + source_ref_law=display_name, + source_ref_article=sc_id, + obligation_text=sc.get("statement", ""), + actor=actor, + action_raw=action_hint or _infer_action(sc.get("statement", "")), + object_raw=object_hint or _infer_object(sc.get("statement", "")), + routing_type="atomic", + release_state="decomposed", + subcontrol_id=sc_id, + action_hint=action_hint, + object_hint=object_hint, + object_class=sc.get("object_class", ""), + keywords=sc.get("keywords", []), + )) + + # Check if decomposed are identical to container + for d in decomposed: + if d.obligation_text.strip() == obligation_text.strip(): + issues.append(f"WARN: {d.subcontrol_id} identical to container text") + + confidence = _compute_decomposition_confidence( + framework_ref, framework_domain, domain_data, len(subcontrols), issues, + ) + + return FrameworkDecompositionResult( + framework_container_id=container_id, + source_obligation_candidate_id=obligation_candidate_id, + framework_ref=framework_ref, + framework_domain=framework_domain, + domain_title=domain_title, + matched_subcontrols=matched_ids, + decomposition_confidence=confidence, + release_state="decomposed", + decomposed_obligations=decomposed, + issues=issues, + ) + + +def _find_framework_in_text( + text: str, registry: dict[str, dict], +) -> tuple[Optional[dict], Optional[str]]: + """Try to find a framework by searching text for known names.""" + alias_idx = _build_alias_index(registry) + m = _DIRECT_FRAMEWORK_RE.search(text) + if m: + fw_id = _resolve_framework_id(m.group(0), alias_idx, registry) + if fw_id and fw_id in registry: + return registry[fw_id], fw_id + return None, None + + +def _select_subcontrols( + obligation_text: str, subcontrols: list[dict], +) -> list[dict]: + """Select relevant subcontrols based on keyword matching. + + Returns empty list if no targeted match found (caller falls back to + full domain). + """ + text_lower = obligation_text.lower() + scored: list[tuple[int, dict]] = [] + + for sc in subcontrols: + score = 0 + for kw in sc.get("keywords", []): + if kw.lower() in text_lower: + score += 1 + # Title match + title = sc.get("title", "").lower() + if title and title in text_lower: + score += 3 + # Object hint in text + obj = sc.get("object_hint", "").lower() + if obj and obj in text_lower: + score += 2 + + if score > 0: + scored.append((score, sc)) + + if not scored: + return [] + + # Only return those with meaningful overlap (score >= 2) + scored.sort(key=lambda x: x[0], reverse=True) + return [sc for score, sc in scored if score >= 2] + + +def _infer_action(statement: str) -> str: + """Infer a basic action verb from a statement.""" + s = statement.lower() + if any(w in s for w in ["definiert", "definieren", "define"]): + return "definieren" + if any(w in s for w in ["implementiert", "implementieren", "implement"]): + return "implementieren" + if any(w in s for w in ["dokumentiert", "dokumentieren", "document"]): + return "dokumentieren" + if any(w in s for w in ["ueberwacht", "ueberwachen", "monitor"]): + return "ueberwachen" + if any(w in s for w in ["getestet", "testen", "test"]): + return "testen" + if any(w in s for w in ["geschuetzt", "schuetzen", "protect"]): + return "implementieren" + if any(w in s for w in ["verwaltet", "verwalten", "manage"]): + return "pflegen" + if any(w in s for w in ["gemeldet", "melden", "report"]): + return "melden" + return "implementieren" + + +def _infer_object(statement: str) -> str: + """Infer the primary object from a statement (first noun phrase).""" + # Simple heuristic: take the text after "muessen"/"muss" up to the verb + m = re.search( + r"(?:muessen|muss|m(?:ü|ue)ssen)\s+(.+?)(?:\s+werden|\s+sein|\.|,|$)", + statement, + re.IGNORECASE, + ) + if m: + return m.group(1).strip()[:80] + # Fallback: first 80 chars + return statement[:80] if statement else "" + + +def _compute_decomposition_confidence( + framework_ref: Optional[str], + domain: Optional[str], + domain_data: Optional[dict], + num_subcontrols: int, + issues: list[str], +) -> float: + """Compute confidence score for the decomposition.""" + score = 0.3 + if framework_ref: + score += 0.25 + if domain: + score += 0.20 + if domain_data: + score += 0.10 + if 1 <= num_subcontrols <= 15: + score += 0.10 + elif num_subcontrols > 15: + score += 0.05 # less confident with too many + + # Penalize errors + errors = sum(1 for i in issues if i.startswith("ERROR:")) + score -= errors * 0.15 + return round(max(min(score, 1.0), 0.0), 2) + + +# --------------------------------------------------------------------------- +# Registry statistics (for admin/debugging) +# --------------------------------------------------------------------------- + +def registry_stats() -> dict: + """Return summary statistics about the loaded registry.""" + reg = get_registry() + stats = { + "frameworks": len(reg), + "details": [], + } + total_domains = 0 + total_subcontrols = 0 + for fw_id, fw in reg.items(): + domains = fw.get("domains", []) + n_sc = sum(len(d.get("subcontrols", [])) for d in domains) + total_domains += len(domains) + total_subcontrols += n_sc + stats["details"].append({ + "framework_id": fw_id, + "display_name": fw.get("display_name", ""), + "domains": len(domains), + "subcontrols": n_sc, + }) + stats["total_domains"] = total_domains + stats["total_subcontrols"] = total_subcontrols + return stats diff --git a/backend-compliance/tests/test_decomposition_pass.py b/backend-compliance/tests/test_decomposition_pass.py index d90bf36..53b65a9 100644 --- a/backend-compliance/tests/test_decomposition_pass.py +++ b/backend-compliance/tests/test_decomposition_pass.py @@ -62,6 +62,7 @@ from compliance.services.decomposition_pass import ( _validate_atomic_control, _PATTERN_CANDIDATES_MAP, _PATTERN_CANDIDATES_BY_ACTION, + _is_composite_obligation, ) @@ -1049,6 +1050,123 @@ class TestOutputValidator: issues = _validate_atomic_control(ac, "implement", "policy") assert not any("raw infinitive" in i for i in issues) + def test_composite_obligation_warns(self): + """Composite obligations produce a WARN in validation.""" + ac = AtomicControlCandidate( + title="CCM-Praktiken", objective="x", + test_procedure=["tp"], evidence=["ev"], + ) + ac._is_composite = True # type: ignore[attr-defined] + issues = _validate_atomic_control(ac, "implement", "policy") + assert any("composite" in i for i in issues) + + def test_non_composite_no_warn(self): + """Non-composite obligations do NOT produce composite WARN.""" + ac = AtomicControlCandidate( + title="MFA", objective="x", + test_procedure=["tp"], evidence=["ev"], + ) + ac._is_composite = False # type: ignore[attr-defined] + issues = _validate_atomic_control(ac, "implement", "technical_control") + assert not any("composite" in i for i in issues) + + +# --------------------------------------------------------------------------- +# COMPOSITE / FRAMEWORK DETECTION TESTS +# --------------------------------------------------------------------------- + + +class TestCompositeDetection: + """Tests for _is_composite_obligation().""" + + def test_ccm_praktiken_detected(self): + """'CCM-Praktiken für AIS implementieren' is composite.""" + assert _is_composite_obligation( + "CCM-Praktiken für AIS implementieren", "CCM-Praktiken" + ) + + def test_kontrollen_gemaess_nist(self): + """'Kontrollen gemäß NIST umsetzen' is composite.""" + assert _is_composite_obligation( + "Kontrollen gemäß NIST SP 800-53 umsetzen", "Kontrollen" + ) + + def test_iso_27001_referenced(self): + """ISO 27001 reference in object triggers composite.""" + assert _is_composite_obligation( + "Maßnahmen umsetzen", "ISO 27001 Anhang A" + ) + + def test_owasp_framework(self): + """OWASP reference triggers composite.""" + assert _is_composite_obligation( + "OWASP Top 10 Maßnahmen implementieren", "Sicherheitsmaßnahmen" + ) + + def test_bsi_grundschutz(self): + """BSI reference triggers composite.""" + assert _is_composite_obligation( + "BSI-Grundschutz-Kompendium anwenden", "IT-Grundschutz" + ) + + def test_anforderungen_gemaess(self): + """'Anforderungen gemäß X' is composite.""" + assert _is_composite_obligation( + "Anforderungen gemäß EU AI Act umsetzen", "Anforderungen" + ) + + def test_simple_mfa_not_composite(self): + """'MFA implementieren' is atomic, not composite.""" + assert not _is_composite_obligation( + "Multi-Faktor-Authentifizierung implementieren", "MFA" + ) + + def test_simple_policy_not_composite(self): + """'Sicherheitsrichtlinie dokumentieren' is atomic.""" + assert not _is_composite_obligation( + "Eine Sicherheitsrichtlinie dokumentieren und pflegen", + "Sicherheitsrichtlinie", + ) + + def test_encryption_not_composite(self): + """'Daten verschlüsseln' is atomic.""" + assert not _is_composite_obligation( + "Personenbezogene Daten bei der Übertragung verschlüsseln", + "Personenbezogene Daten", + ) + + def test_composite_flags_on_atomic(self): + """_compose_deterministic sets composite flags on the atomic.""" + atomic = _compose_deterministic( + obligation_text="CCM-Praktiken für AIS implementieren", + action="implementieren", + object_="CCM-Praktiken", + parent_title="AI System Controls", + parent_severity="high", + parent_category="security", + is_test=False, + is_reporting=False, + ) + assert atomic._is_composite is True # type: ignore[attr-defined] + assert atomic._atomicity == "composite" # type: ignore[attr-defined] + assert atomic._requires_decomposition is True # type: ignore[attr-defined] + + def test_non_composite_flags_on_atomic(self): + """_compose_deterministic sets atomic flags for non-composite.""" + atomic = _compose_deterministic( + obligation_text="MFA implementieren", + action="implementieren", + object_="MFA", + parent_title="Access Control", + parent_severity="high", + parent_category="security", + is_test=False, + is_reporting=False, + ) + assert atomic._is_composite is False # type: ignore[attr-defined] + assert atomic._atomicity == "atomic" # type: ignore[attr-defined] + assert atomic._requires_decomposition is False # type: ignore[attr-defined] + # --------------------------------------------------------------------------- # PROMPT BUILDER TESTS diff --git a/backend-compliance/tests/test_framework_decomposition.py b/backend-compliance/tests/test_framework_decomposition.py new file mode 100644 index 0000000..301538d --- /dev/null +++ b/backend-compliance/tests/test_framework_decomposition.py @@ -0,0 +1,453 @@ +"""Tests for Framework Decomposition Engine. + +Covers: +- Registry loading +- Routing classification (atomic / compound / framework_container) +- Framework + domain matching +- Subcontrol selection +- Decomposition into sub-obligations +- Quality rules (warnings, errors) +- Inference helpers +""" + +import pytest + +from compliance.services.framework_decomposition import ( + classify_routing, + decompose_framework_container, + get_registry, + registry_stats, + reload_registry, + DecomposedObligation, + FrameworkDecompositionResult, + RoutingResult, + _detect_framework, + _has_framework_keywords, + _infer_action, + _infer_object, + _is_compound_obligation, + _match_domain, + _select_subcontrols, +) + + +# --------------------------------------------------------------------------- +# REGISTRY TESTS +# --------------------------------------------------------------------------- + + +class TestRegistryLoading: + + def test_registry_loads_successfully(self): + reg = get_registry() + assert len(reg) >= 3 + + def test_nist_in_registry(self): + reg = get_registry() + assert "NIST_SP800_53" in reg + + def test_owasp_asvs_in_registry(self): + reg = get_registry() + assert "OWASP_ASVS" in reg + + def test_csa_ccm_in_registry(self): + reg = get_registry() + assert "CSA_CCM" in reg + + def test_nist_has_domains(self): + reg = get_registry() + nist = reg["NIST_SP800_53"] + assert len(nist["domains"]) >= 5 + + def test_nist_ac_has_subcontrols(self): + reg = get_registry() + nist = reg["NIST_SP800_53"] + ac = next(d for d in nist["domains"] if d["domain_id"] == "AC") + assert len(ac["subcontrols"]) >= 5 + + def test_registry_stats(self): + stats = registry_stats() + assert stats["frameworks"] >= 3 + assert stats["total_domains"] >= 10 + assert stats["total_subcontrols"] >= 30 + + def test_reload_registry(self): + reg = reload_registry() + assert len(reg) >= 3 + + +# --------------------------------------------------------------------------- +# ROUTING TESTS +# --------------------------------------------------------------------------- + + +class TestClassifyRouting: + + def test_atomic_simple_obligation(self): + result = classify_routing( + obligation_text="Multi-Faktor-Authentifizierung muss implementiert werden", + action_raw="implementieren", + object_raw="MFA", + ) + assert result.routing_type == "atomic" + + def test_framework_container_ccm_ais(self): + result = classify_routing( + obligation_text="Die CCM-Praktiken fuer Application and Interface Security (AIS) muessen implementiert werden", + action_raw="implementieren", + object_raw="CCM-Praktiken fuer AIS", + ) + assert result.routing_type == "framework_container" + assert result.framework_ref == "CSA_CCM" + assert result.framework_domain == "AIS" + + def test_framework_container_nist_800_53(self): + result = classify_routing( + obligation_text="Kontrollen gemaess NIST SP 800-53 umsetzen", + action_raw="umsetzen", + object_raw="Kontrollen gemaess NIST SP 800-53", + ) + assert result.routing_type == "framework_container" + assert result.framework_ref == "NIST_SP800_53" + + def test_framework_container_owasp_asvs(self): + result = classify_routing( + obligation_text="OWASP ASVS Anforderungen muessen implementiert werden", + action_raw="implementieren", + object_raw="OWASP ASVS Anforderungen", + ) + assert result.routing_type == "framework_container" + assert result.framework_ref == "OWASP_ASVS" + + def test_compound_obligation(self): + result = classify_routing( + obligation_text="Richtlinie erstellen und Schulungen durchfuehren", + action_raw="erstellen und durchfuehren", + object_raw="Richtlinie", + ) + assert result.routing_type == "compound" + + def test_no_split_phrase_not_compound(self): + result = classify_routing( + obligation_text="Richtlinie dokumentieren und pflegen", + action_raw="dokumentieren und pflegen", + object_raw="Richtlinie", + ) + assert result.routing_type == "atomic" + + def test_framework_keywords_in_object(self): + result = classify_routing( + obligation_text="Massnahmen umsetzen", + action_raw="umsetzen", + object_raw="Framework-Praktiken und Kontrollen", + ) + assert result.routing_type == "framework_container" + + def test_bsi_grundschutz_detected(self): + result = classify_routing( + obligation_text="BSI IT-Grundschutz Massnahmen umsetzen", + action_raw="umsetzen", + object_raw="BSI IT-Grundschutz Massnahmen", + ) + assert result.routing_type == "framework_container" + + +# --------------------------------------------------------------------------- +# FRAMEWORK DETECTION TESTS +# --------------------------------------------------------------------------- + + +class TestFrameworkDetection: + + def test_detect_csa_ccm_with_domain(self): + result = _detect_framework( + "CCM-Praktiken fuer AIS implementieren", + "CCM-Praktiken", + ) + assert result.routing_type == "framework_container" + assert result.framework_ref == "CSA_CCM" + assert result.framework_domain == "AIS" + + def test_detect_nist_without_domain(self): + result = _detect_framework( + "NIST SP 800-53 Kontrollen implementieren", + "Kontrollen", + ) + assert result.routing_type == "framework_container" + assert result.framework_ref == "NIST_SP800_53" + + def test_no_framework_in_simple_text(self): + result = _detect_framework( + "Passwortrichtlinie dokumentieren", + "Passwortrichtlinie", + ) + assert result.routing_type == "atomic" + + def test_csa_ccm_iam_domain(self): + result = _detect_framework( + "CSA CCM Identity and Access Management Kontrollen", + "IAM-Kontrollen", + ) + assert result.routing_type == "framework_container" + assert result.framework_ref == "CSA_CCM" + assert result.framework_domain == "IAM" + + +# --------------------------------------------------------------------------- +# DOMAIN MATCHING TESTS +# --------------------------------------------------------------------------- + + +class TestDomainMatching: + + def test_match_ais_by_id(self): + reg = get_registry() + ccm = reg["CSA_CCM"] + domain_id, title = _match_domain("AIS-Kontrollen implementieren", ccm) + assert domain_id == "AIS" + + def test_match_by_full_title(self): + reg = get_registry() + ccm = reg["CSA_CCM"] + domain_id, title = _match_domain( + "Application and Interface Security Massnahmen", ccm, + ) + assert domain_id == "AIS" + + def test_match_nist_incident_response(self): + reg = get_registry() + nist = reg["NIST_SP800_53"] + domain_id, title = _match_domain( + "Vorfallreaktionsverfahren gemaess NIST IR", nist, + ) + assert domain_id == "IR" + + def test_no_match_generic_text(self): + reg = get_registry() + nist = reg["NIST_SP800_53"] + domain_id, title = _match_domain("etwas Allgemeines", nist) + assert domain_id is None + + +# --------------------------------------------------------------------------- +# SUBCONTROL SELECTION TESTS +# --------------------------------------------------------------------------- + + +class TestSubcontrolSelection: + + def test_keyword_based_selection(self): + subcontrols = [ + {"subcontrol_id": "SC-1", "title": "X", "keywords": ["api", "schnittstelle"], "object_hint": ""}, + {"subcontrol_id": "SC-2", "title": "Y", "keywords": ["backup", "sicherung"], "object_hint": ""}, + ] + selected = _select_subcontrols("API-Schnittstellen schuetzen", subcontrols) + assert len(selected) == 1 + assert selected[0]["subcontrol_id"] == "SC-1" + + def test_no_keyword_match_returns_empty(self): + subcontrols = [ + {"subcontrol_id": "SC-1", "keywords": ["backup"], "title": "Backup", "object_hint": ""}, + ] + selected = _select_subcontrols("Passwort aendern", subcontrols) + assert selected == [] + + def test_title_match_boosts_score(self): + subcontrols = [ + {"subcontrol_id": "SC-1", "title": "Password Security", "keywords": ["passwort"], "object_hint": ""}, + {"subcontrol_id": "SC-2", "title": "Network Security", "keywords": ["netzwerk"], "object_hint": ""}, + ] + selected = _select_subcontrols("Password Security muss implementiert werden", subcontrols) + assert len(selected) >= 1 + assert selected[0]["subcontrol_id"] == "SC-1" + + +# --------------------------------------------------------------------------- +# DECOMPOSITION TESTS +# --------------------------------------------------------------------------- + + +class TestDecomposeFrameworkContainer: + + def test_decompose_ccm_ais(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-001", + parent_control_id="COMP-001", + obligation_text="Die CCM-Praktiken fuer AIS muessen implementiert werden", + framework_ref="CSA_CCM", + framework_domain="AIS", + ) + assert result.release_state == "decomposed" + assert result.framework_ref == "CSA_CCM" + assert result.framework_domain == "AIS" + assert len(result.decomposed_obligations) >= 3 + assert len(result.matched_subcontrols) >= 3 + + def test_decomposed_obligations_have_ids(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-001", + parent_control_id="COMP-001", + obligation_text="CCM-Praktiken fuer AIS", + framework_ref="CSA_CCM", + framework_domain="AIS", + ) + for d in result.decomposed_obligations: + assert d.obligation_candidate_id.startswith("OBL-001-AIS-") + assert d.parent_control_id == "COMP-001" + assert d.source_ref_law == "Cloud Security Alliance CCM v4" + assert d.routing_type == "atomic" + assert d.release_state == "decomposed" + + def test_decomposed_have_action_and_object(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-002", + parent_control_id="COMP-002", + obligation_text="CSA CCM AIS Massnahmen implementieren", + framework_ref="CSA_CCM", + framework_domain="AIS", + ) + for d in result.decomposed_obligations: + assert d.action_raw, f"{d.subcontrol_id} missing action_raw" + assert d.object_raw, f"{d.subcontrol_id} missing object_raw" + + def test_unknown_framework_returns_unmatched(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-003", + parent_control_id="COMP-003", + obligation_text="XYZ-Framework Controls", + framework_ref="NONEXISTENT", + framework_domain="ABC", + ) + assert result.release_state == "unmatched" + assert any("framework_not_matched" in i for i in result.issues) + assert len(result.decomposed_obligations) == 0 + + def test_unknown_domain_falls_back_to_full(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-004", + parent_control_id="COMP-004", + obligation_text="CSA CCM Kontrollen implementieren", + framework_ref="CSA_CCM", + framework_domain=None, + ) + # Should still decompose (falls back to keyword match or all domains) + assert result.release_state in ("decomposed", "unmatched") + + def test_nist_incident_response_decomposition(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-010", + parent_control_id="COMP-010", + obligation_text="NIST SP 800-53 Vorfallreaktionsmassnahmen implementieren", + framework_ref="NIST_SP800_53", + framework_domain="IR", + ) + assert result.release_state == "decomposed" + assert len(result.decomposed_obligations) >= 3 + sc_ids = [d.subcontrol_id for d in result.decomposed_obligations] + assert any("IR-" in sc for sc in sc_ids) + + def test_confidence_high_with_full_match(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-005", + parent_control_id="COMP-005", + obligation_text="CSA CCM AIS", + framework_ref="CSA_CCM", + framework_domain="AIS", + ) + assert result.decomposition_confidence >= 0.7 + + def test_confidence_low_without_framework(self): + result = decompose_framework_container( + obligation_candidate_id="OBL-006", + parent_control_id="COMP-006", + obligation_text="Unbekannte Massnahmen", + framework_ref=None, + framework_domain=None, + ) + assert result.decomposition_confidence <= 0.3 + + +# --------------------------------------------------------------------------- +# COMPOUND DETECTION TESTS +# --------------------------------------------------------------------------- + + +class TestCompoundDetection: + + def test_compound_verb(self): + assert _is_compound_obligation( + "erstellen und schulen", + "Richtlinie erstellen und Schulungen durchfuehren", + ) + + def test_no_split_phrase(self): + assert not _is_compound_obligation( + "dokumentieren und pflegen", + "Richtlinie dokumentieren und pflegen", + ) + + def test_no_split_define_and_maintain(self): + assert not _is_compound_obligation( + "define and maintain", + "Define and maintain a security policy", + ) + + def test_single_verb_not_compound(self): + assert not _is_compound_obligation( + "implementieren", + "MFA implementieren", + ) + + def test_empty_action_not_compound(self): + assert not _is_compound_obligation("", "something") + + +# --------------------------------------------------------------------------- +# FRAMEWORK KEYWORD TESTS +# --------------------------------------------------------------------------- + + +class TestFrameworkKeywords: + + def test_two_keywords_detected(self): + assert _has_framework_keywords("Framework-Praktiken implementieren") + + def test_single_keyword_not_enough(self): + assert not _has_framework_keywords("Praktiken implementieren") + + def test_no_keywords(self): + assert not _has_framework_keywords("MFA einrichten") + + +# --------------------------------------------------------------------------- +# INFERENCE HELPER TESTS +# --------------------------------------------------------------------------- + + +class TestInferAction: + + def test_infer_implementieren(self): + assert _infer_action("Massnahmen muessen implementiert werden") == "implementieren" + + def test_infer_dokumentieren(self): + assert _infer_action("Richtlinie muss dokumentiert werden") == "dokumentieren" + + def test_infer_testen(self): + assert _infer_action("System wird getestet") == "testen" + + def test_infer_ueberwachen(self): + assert _infer_action("Logs werden ueberwacht") == "ueberwachen" + + def test_infer_default(self): + assert _infer_action("etwas passiert") == "implementieren" + + +class TestInferObject: + + def test_infer_from_muessen_pattern(self): + result = _infer_object("Zugriffsrechte muessen ueberprueft werden") + assert "ueberprueft" in result or "Zugriffsrechte" in result + + def test_infer_fallback(self): + result = _infer_object("Einfacher Satz ohne Modalverb") + assert len(result) > 0