feat(canonical-controls): Canonical Control Library — rechtssichere Security Controls
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 41s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 18s
CI/CD / deploy-hetzner (push) Successful in 2m26s

Eigenstaendig formulierte Security Controls mit unabhaengiger Taxonomie
und Open-Source-Verankerung (OWASP, NIST, ENISA). Keine BSI-Nomenklatur.

- Migration 044: 5 DB-Tabellen (frameworks, controls, sources, licenses, mappings)
- 10 Seed Controls mit 39 Open-Source-Referenzen
- License Gate: Quellen-Berechtigungspruefung (analysis/excerpt/embeddings/product)
- Too-Close-Detektor: 5 Metriken (exact-phrase, token-overlap, ngram, embedding, LCS)
- REST API: 8 Endpoints unter /v1/canonical/
- Go Loader mit Multi-Index (ID, domain, severity, framework)
- Frontend: Control Library Browser + Provenance Wiki
- CI/CD: validate-controls.py Job (schema, no-leak, open-anchors)
- 67 Tests (8 Go + 59 Python), alle PASS
- MkDocs Dokumentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-12 19:55:06 +01:00
parent 8442115e7c
commit 050f353192
20 changed files with 3935 additions and 0 deletions

View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
"""
Canonical Control Library — CI/CD Validator
Checks:
1. Schema Validation — JSON against defined structure
2. License Gate — Every mapping reference fulfils allowed_usages
3. No-Leak Scanner — Regex against forbidden locator patterns (e.g. O.Auth_*, O.Netz_*)
4. Provenance Integrity — Every referenced source_id exists in seed data
5. Open Anchor Check — Every control has >= 1 open anchor
Usage:
python scripts/validate-controls.py
"""
import json
import re
import sys
from pathlib import Path
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
REPO_ROOT = Path(__file__).resolve().parent.parent
CONTROLS_FILE = REPO_ROOT / "ai-compliance-sdk" / "policies" / "canonical_controls_v1.json"
MIGRATION_FILE = REPO_ROOT / "backend-compliance" / "migrations" / "044_canonical_control_library.sql"
# ---------------------------------------------------------------------------
# Forbidden patterns (BSI proprietary nomenclature — must NOT appear in controls)
# ---------------------------------------------------------------------------
FORBIDDEN_PATTERNS = [
re.compile(r"O\.[A-Za-z]+_[0-9]+"), # BSI objective IDs: O.Auth_1, O.Netz_3
re.compile(r"TR-03161"), # Direct TR reference in control text
re.compile(r"BSI-TR-"), # Direct BSI-TR reference
re.compile(r"Anforderung\s+[A-Z]\.\d+"), # BSI requirement format
]
# Fields that are product-facing and must not contain forbidden patterns
PRODUCT_FIELDS = ["objective", "rationale", "title", "requirements", "test_procedure"]
# ---------------------------------------------------------------------------
# Known open sources (from migration seed)
# ---------------------------------------------------------------------------
KNOWN_OPEN_SOURCES = {
"OWASP_ASVS", "OWASP_MASVS", "OWASP_TOP10",
"NIST_SP800_53", "NIST_SP800_63B",
"ENISA_GOOD_PRACTICES", "CIS_CONTROLS",
}
KNOWN_ALL_SOURCES = KNOWN_OPEN_SOURCES | {
"BSI_TR03161_1", "BSI_TR03161_2", "BSI_TR03161_3",
}
# ---------------------------------------------------------------------------
# Validators
# ---------------------------------------------------------------------------
errors: list[str] = []
warnings: list[str] = []
def error(msg: str) -> None:
errors.append(msg)
def warn(msg: str) -> None:
warnings.append(msg)
def check_schema(data: dict) -> None:
"""Validate JSON structure."""
required_top = ["version", "schema", "framework", "total_controls", "domains", "controls"]
for key in required_top:
if key not in data:
error(f"[SCHEMA] Missing top-level key: {key}")
required_control = [
"control_id", "title", "domain", "severity", "objective",
"rationale", "scope", "requirements", "test_procedure",
"evidence", "open_anchors",
]
control_id_pattern = re.compile(r"^[A-Z]{2,6}-[0-9]{3}$")
valid_severities = {"low", "medium", "high", "critical"}
for ctrl in data.get("controls", []):
cid = ctrl.get("control_id", "???")
for key in required_control:
if key not in ctrl:
error(f"[SCHEMA] Control {cid}: missing field '{key}'")
if not control_id_pattern.match(cid):
error(f"[SCHEMA] Control {cid}: ID does not match ^[A-Z]{{2,6}}-[0-9]{{3}}$")
sev = ctrl.get("severity", "")
if sev not in valid_severities:
error(f"[SCHEMA] Control {cid}: invalid severity '{sev}'")
if ctrl.get("risk_score") is not None:
rs = ctrl["risk_score"]
if not (0 <= rs <= 10):
error(f"[SCHEMA] Control {cid}: risk_score {rs} out of range [0, 10]")
domain_ids = {d["id"] for d in data.get("domains", [])}
for ctrl in data.get("controls", []):
cid = ctrl.get("control_id", "???")
if ctrl.get("domain") not in domain_ids:
error(f"[SCHEMA] Control {cid}: domain '{ctrl.get('domain')}' not in domains list")
def check_no_leak(data: dict) -> None:
"""Ensure no BSI-proprietary nomenclature leaks into product-facing fields."""
for ctrl in data.get("controls", []):
cid = ctrl.get("control_id", "???")
for field_name in PRODUCT_FIELDS:
values = ctrl.get(field_name, "")
if isinstance(values, list):
texts = values
else:
texts = [values]
for text_val in texts:
if not isinstance(text_val, str):
continue
for pattern in FORBIDDEN_PATTERNS:
match = pattern.search(text_val)
if match:
error(
f"[NO-LEAK] Control {cid}.{field_name}: "
f"forbidden pattern '{match.group()}' found"
)
def check_open_anchors(data: dict) -> None:
"""Every control must have at least 1 open anchor."""
for ctrl in data.get("controls", []):
cid = ctrl.get("control_id", "???")
anchors = ctrl.get("open_anchors", [])
if len(anchors) < 1:
error(f"[ANCHOR] Control {cid}: no open anchors — every control needs >= 1")
# Check anchor structure
for i, anchor in enumerate(anchors):
for key in ["framework", "ref", "url"]:
if key not in anchor or not anchor[key]:
error(f"[ANCHOR] Control {cid}: open_anchor[{i}] missing '{key}'")
def check_independent_taxonomy(data: dict) -> None:
"""Verify controls use independent taxonomy, not BSI structure."""
bsi_domain_patterns = [
re.compile(r"^O\.", re.IGNORECASE), # BSI objective prefix
]
for ctrl in data.get("controls", []):
cid = ctrl.get("control_id", "???")
for pattern in bsi_domain_patterns:
if pattern.match(cid):
error(f"[TAXONOMY] Control {cid}: uses BSI-style ID prefix")
def check_evidence_fields(data: dict) -> None:
"""Validate evidence items have required fields."""
for ctrl in data.get("controls", []):
cid = ctrl.get("control_id", "???")
for i, ev in enumerate(ctrl.get("evidence", [])):
if not isinstance(ev, dict):
error(f"[EVIDENCE] Control {cid}: evidence[{i}] is not an object")
continue
for key in ["type", "description"]:
if key not in ev or not ev[key]:
error(f"[EVIDENCE] Control {cid}: evidence[{i}] missing '{key}'")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> int:
print(f"Validating: {CONTROLS_FILE}")
print()
if not CONTROLS_FILE.exists():
print(f"ERROR: File not found: {CONTROLS_FILE}")
return 1
with open(CONTROLS_FILE) as f:
data = json.load(f)
check_schema(data)
check_no_leak(data)
check_open_anchors(data)
check_independent_taxonomy(data)
check_evidence_fields(data)
total_controls = len(data.get("controls", []))
total_anchors = sum(len(c.get("open_anchors", [])) for c in data.get("controls", []))
print(f"Controls: {total_controls}")
print(f"Open Anchors: {total_anchors}")
print()
if warnings:
print(f"WARNINGS ({len(warnings)}):")
for w in warnings:
print(f"{w}")
print()
if errors:
print(f"ERRORS ({len(errors)}):")
for e in errors:
print(f"{e}")
print()
print("VALIDATION FAILED")
return 1
print("ALL CHECKS PASSED")
return 0
if __name__ == "__main__":
sys.exit(main())