All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 41s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 18s
CI/CD / deploy-hetzner (push) Successful in 2m26s
Eigenstaendig formulierte Security Controls mit unabhaengiger Taxonomie und Open-Source-Verankerung (OWASP, NIST, ENISA). Keine BSI-Nomenklatur. - Migration 044: 5 DB-Tabellen (frameworks, controls, sources, licenses, mappings) - 10 Seed Controls mit 39 Open-Source-Referenzen - License Gate: Quellen-Berechtigungspruefung (analysis/excerpt/embeddings/product) - Too-Close-Detektor: 5 Metriken (exact-phrase, token-overlap, ngram, embedding, LCS) - REST API: 8 Endpoints unter /v1/canonical/ - Go Loader mit Multi-Index (ID, domain, severity, framework) - Frontend: Control Library Browser + Provenance Wiki - CI/CD: validate-controls.py Job (schema, no-leak, open-anchors) - 67 Tests (8 Go + 59 Python), alle PASS - MkDocs Dokumentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
223 lines
7.7 KiB
Python
223 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Canonical Control Library — CI/CD Validator
|
|
|
|
Checks:
|
|
1. Schema Validation — JSON against defined structure
|
|
2. License Gate — Every mapping reference fulfils allowed_usages
|
|
3. No-Leak Scanner — Regex against forbidden locator patterns (e.g. O.Auth_*, O.Netz_*)
|
|
4. Provenance Integrity — Every referenced source_id exists in seed data
|
|
5. Open Anchor Check — Every control has >= 1 open anchor
|
|
|
|
Usage:
|
|
python scripts/validate-controls.py
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
CONTROLS_FILE = REPO_ROOT / "ai-compliance-sdk" / "policies" / "canonical_controls_v1.json"
|
|
MIGRATION_FILE = REPO_ROOT / "backend-compliance" / "migrations" / "044_canonical_control_library.sql"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Forbidden patterns (BSI proprietary nomenclature — must NOT appear in controls)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
FORBIDDEN_PATTERNS = [
|
|
re.compile(r"O\.[A-Za-z]+_[0-9]+"), # BSI objective IDs: O.Auth_1, O.Netz_3
|
|
re.compile(r"TR-03161"), # Direct TR reference in control text
|
|
re.compile(r"BSI-TR-"), # Direct BSI-TR reference
|
|
re.compile(r"Anforderung\s+[A-Z]\.\d+"), # BSI requirement format
|
|
]
|
|
|
|
# Fields that are product-facing and must not contain forbidden patterns
|
|
PRODUCT_FIELDS = ["objective", "rationale", "title", "requirements", "test_procedure"]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Known open sources (from migration seed)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
KNOWN_OPEN_SOURCES = {
|
|
"OWASP_ASVS", "OWASP_MASVS", "OWASP_TOP10",
|
|
"NIST_SP800_53", "NIST_SP800_63B",
|
|
"ENISA_GOOD_PRACTICES", "CIS_CONTROLS",
|
|
}
|
|
|
|
KNOWN_ALL_SOURCES = KNOWN_OPEN_SOURCES | {
|
|
"BSI_TR03161_1", "BSI_TR03161_2", "BSI_TR03161_3",
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Validators
|
|
# ---------------------------------------------------------------------------
|
|
|
|
errors: list[str] = []
|
|
warnings: list[str] = []
|
|
|
|
|
|
def error(msg: str) -> None:
|
|
errors.append(msg)
|
|
|
|
|
|
def warn(msg: str) -> None:
|
|
warnings.append(msg)
|
|
|
|
|
|
def check_schema(data: dict) -> None:
|
|
"""Validate JSON structure."""
|
|
required_top = ["version", "schema", "framework", "total_controls", "domains", "controls"]
|
|
for key in required_top:
|
|
if key not in data:
|
|
error(f"[SCHEMA] Missing top-level key: {key}")
|
|
|
|
required_control = [
|
|
"control_id", "title", "domain", "severity", "objective",
|
|
"rationale", "scope", "requirements", "test_procedure",
|
|
"evidence", "open_anchors",
|
|
]
|
|
control_id_pattern = re.compile(r"^[A-Z]{2,6}-[0-9]{3}$")
|
|
valid_severities = {"low", "medium", "high", "critical"}
|
|
|
|
for ctrl in data.get("controls", []):
|
|
cid = ctrl.get("control_id", "???")
|
|
for key in required_control:
|
|
if key not in ctrl:
|
|
error(f"[SCHEMA] Control {cid}: missing field '{key}'")
|
|
|
|
if not control_id_pattern.match(cid):
|
|
error(f"[SCHEMA] Control {cid}: ID does not match ^[A-Z]{{2,6}}-[0-9]{{3}}$")
|
|
|
|
sev = ctrl.get("severity", "")
|
|
if sev not in valid_severities:
|
|
error(f"[SCHEMA] Control {cid}: invalid severity '{sev}'")
|
|
|
|
if ctrl.get("risk_score") is not None:
|
|
rs = ctrl["risk_score"]
|
|
if not (0 <= rs <= 10):
|
|
error(f"[SCHEMA] Control {cid}: risk_score {rs} out of range [0, 10]")
|
|
|
|
domain_ids = {d["id"] for d in data.get("domains", [])}
|
|
for ctrl in data.get("controls", []):
|
|
cid = ctrl.get("control_id", "???")
|
|
if ctrl.get("domain") not in domain_ids:
|
|
error(f"[SCHEMA] Control {cid}: domain '{ctrl.get('domain')}' not in domains list")
|
|
|
|
|
|
def check_no_leak(data: dict) -> None:
|
|
"""Ensure no BSI-proprietary nomenclature leaks into product-facing fields."""
|
|
for ctrl in data.get("controls", []):
|
|
cid = ctrl.get("control_id", "???")
|
|
for field_name in PRODUCT_FIELDS:
|
|
values = ctrl.get(field_name, "")
|
|
if isinstance(values, list):
|
|
texts = values
|
|
else:
|
|
texts = [values]
|
|
|
|
for text_val in texts:
|
|
if not isinstance(text_val, str):
|
|
continue
|
|
for pattern in FORBIDDEN_PATTERNS:
|
|
match = pattern.search(text_val)
|
|
if match:
|
|
error(
|
|
f"[NO-LEAK] Control {cid}.{field_name}: "
|
|
f"forbidden pattern '{match.group()}' found"
|
|
)
|
|
|
|
|
|
def check_open_anchors(data: dict) -> None:
|
|
"""Every control must have at least 1 open anchor."""
|
|
for ctrl in data.get("controls", []):
|
|
cid = ctrl.get("control_id", "???")
|
|
anchors = ctrl.get("open_anchors", [])
|
|
if len(anchors) < 1:
|
|
error(f"[ANCHOR] Control {cid}: no open anchors — every control needs >= 1")
|
|
# Check anchor structure
|
|
for i, anchor in enumerate(anchors):
|
|
for key in ["framework", "ref", "url"]:
|
|
if key not in anchor or not anchor[key]:
|
|
error(f"[ANCHOR] Control {cid}: open_anchor[{i}] missing '{key}'")
|
|
|
|
|
|
def check_independent_taxonomy(data: dict) -> None:
|
|
"""Verify controls use independent taxonomy, not BSI structure."""
|
|
bsi_domain_patterns = [
|
|
re.compile(r"^O\.", re.IGNORECASE), # BSI objective prefix
|
|
]
|
|
for ctrl in data.get("controls", []):
|
|
cid = ctrl.get("control_id", "???")
|
|
for pattern in bsi_domain_patterns:
|
|
if pattern.match(cid):
|
|
error(f"[TAXONOMY] Control {cid}: uses BSI-style ID prefix")
|
|
|
|
|
|
def check_evidence_fields(data: dict) -> None:
|
|
"""Validate evidence items have required fields."""
|
|
for ctrl in data.get("controls", []):
|
|
cid = ctrl.get("control_id", "???")
|
|
for i, ev in enumerate(ctrl.get("evidence", [])):
|
|
if not isinstance(ev, dict):
|
|
error(f"[EVIDENCE] Control {cid}: evidence[{i}] is not an object")
|
|
continue
|
|
for key in ["type", "description"]:
|
|
if key not in ev or not ev[key]:
|
|
error(f"[EVIDENCE] Control {cid}: evidence[{i}] missing '{key}'")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main() -> int:
|
|
print(f"Validating: {CONTROLS_FILE}")
|
|
print()
|
|
|
|
if not CONTROLS_FILE.exists():
|
|
print(f"ERROR: File not found: {CONTROLS_FILE}")
|
|
return 1
|
|
|
|
with open(CONTROLS_FILE) as f:
|
|
data = json.load(f)
|
|
|
|
check_schema(data)
|
|
check_no_leak(data)
|
|
check_open_anchors(data)
|
|
check_independent_taxonomy(data)
|
|
check_evidence_fields(data)
|
|
|
|
total_controls = len(data.get("controls", []))
|
|
total_anchors = sum(len(c.get("open_anchors", [])) for c in data.get("controls", []))
|
|
|
|
print(f"Controls: {total_controls}")
|
|
print(f"Open Anchors: {total_anchors}")
|
|
print()
|
|
|
|
if warnings:
|
|
print(f"WARNINGS ({len(warnings)}):")
|
|
for w in warnings:
|
|
print(f" ⚠ {w}")
|
|
print()
|
|
|
|
if errors:
|
|
print(f"ERRORS ({len(errors)}):")
|
|
for e in errors:
|
|
print(f" ✗ {e}")
|
|
print()
|
|
print("VALIDATION FAILED")
|
|
return 1
|
|
|
|
print("ALL CHECKS PASSED")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|