feat(canonical-controls): Canonical Control Library — rechtssichere Security Controls
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 41s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 18s
CI/CD / deploy-hetzner (push) Successful in 2m26s

Eigenstaendig formulierte Security Controls mit unabhaengiger Taxonomie
und Open-Source-Verankerung (OWASP, NIST, ENISA). Keine BSI-Nomenklatur.

- Migration 044: 5 DB-Tabellen (frameworks, controls, sources, licenses, mappings)
- 10 Seed Controls mit 39 Open-Source-Referenzen
- License Gate: Quellen-Berechtigungspruefung (analysis/excerpt/embeddings/product)
- Too-Close-Detektor: 5 Metriken (exact-phrase, token-overlap, ngram, embedding, LCS)
- REST API: 8 Endpoints unter /v1/canonical/
- Go Loader mit Multi-Index (ID, domain, severity, framework)
- Frontend: Control Library Browser + Provenance Wiki
- CI/CD: validate-controls.py Job (schema, no-leak, open-anchors)
- 67 Tests (8 Go + 59 Python), alle PASS
- MkDocs Dokumentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-12 19:55:06 +01:00
parent 8442115e7c
commit 050f353192
20 changed files with 3935 additions and 0 deletions

View File

@@ -0,0 +1,225 @@
"""Tests for Canonical Control Library routes (canonical_control_routes.py)."""
import pytest
from unittest.mock import MagicMock, patch
from datetime import datetime, timezone
from compliance.api.canonical_control_routes import (
FrameworkResponse,
ControlResponse,
SimilarityCheckRequest,
SimilarityCheckResponse,
_control_row,
)
class TestFrameworkResponse:
"""Tests for FrameworkResponse model."""
def test_basic_creation(self):
resp = FrameworkResponse(
id="uuid-1",
framework_id="bp_security_v1",
name="BreakPilot Security Controls",
version="1.0",
release_state="draft",
created_at="2026-03-12T00:00:00+00:00",
updated_at="2026-03-12T00:00:00+00:00",
)
assert resp.framework_id == "bp_security_v1"
assert resp.version == "1.0"
def test_optional_fields(self):
resp = FrameworkResponse(
id="uuid-1",
framework_id="test",
name="Test",
version="1.0",
release_state="draft",
created_at="2026-03-12T00:00:00+00:00",
updated_at="2026-03-12T00:00:00+00:00",
)
assert resp.description is None
assert resp.owner is None
assert resp.policy_version is None
class TestControlResponse:
"""Tests for ControlResponse model."""
def test_full_control(self):
resp = ControlResponse(
id="uuid-1",
framework_id="uuid-fw",
control_id="AUTH-001",
title="Multi-Factor Authentication",
objective="Require MFA for privileged access.",
rationale="Passwords alone are insufficient.",
scope={"platforms": ["web"]},
requirements=["MFA for admin accounts"],
test_procedure=["Test admin login without MFA"],
evidence=[{"type": "config", "description": "MFA config"}],
severity="high",
open_anchors=[{"framework": "OWASP ASVS", "ref": "V2.8", "url": "https://owasp.org"}],
release_state="draft",
tags=["mfa", "auth"],
created_at="2026-03-12T00:00:00+00:00",
updated_at="2026-03-12T00:00:00+00:00",
)
assert resp.control_id == "AUTH-001"
assert resp.severity == "high"
assert len(resp.open_anchors) == 1
def test_optional_numeric_fields(self):
resp = ControlResponse(
id="uuid-1",
framework_id="uuid-fw",
control_id="NET-001",
title="TLS",
objective="Encrypt traffic.",
rationale="Prevent eavesdropping.",
scope={},
requirements=[],
test_procedure=[],
evidence=[],
severity="high",
open_anchors=[],
release_state="draft",
tags=[],
created_at="2026-03-12T00:00:00+00:00",
updated_at="2026-03-12T00:00:00+00:00",
)
assert resp.risk_score is None
assert resp.implementation_effort is None
assert resp.evidence_confidence is None
class TestSimilarityCheckRequest:
"""Tests for SimilarityCheckRequest model."""
def test_valid_request(self):
req = SimilarityCheckRequest(
source_text="Die Anwendung muss MFA implementieren.",
candidate_text="Multi-factor authentication is required.",
)
assert req.source_text == "Die Anwendung muss MFA implementieren."
assert req.candidate_text == "Multi-factor authentication is required."
def test_empty_strings(self):
req = SimilarityCheckRequest(source_text="", candidate_text="")
assert req.source_text == ""
class TestSimilarityCheckResponse:
"""Tests for SimilarityCheckResponse model."""
def test_pass_status(self):
resp = SimilarityCheckResponse(
max_exact_run=2,
token_overlap=0.05,
ngram_jaccard=0.03,
embedding_cosine=0.45,
lcs_ratio=0.12,
status="PASS",
details={
"max_exact_run": "PASS",
"token_overlap": "PASS",
"ngram_jaccard": "PASS",
"embedding_cosine": "PASS",
"lcs_ratio": "PASS",
},
)
assert resp.status == "PASS"
def test_fail_status(self):
resp = SimilarityCheckResponse(
max_exact_run=15,
token_overlap=0.35,
ngram_jaccard=0.20,
embedding_cosine=0.95,
lcs_ratio=0.55,
status="FAIL",
details={
"max_exact_run": "FAIL",
"token_overlap": "FAIL",
"ngram_jaccard": "FAIL",
"embedding_cosine": "FAIL",
"lcs_ratio": "FAIL",
},
)
assert resp.status == "FAIL"
class TestControlRowConversion:
"""Tests for _control_row helper."""
def _make_row(self, **overrides):
now = datetime.now(timezone.utc)
defaults = {
"id": "uuid-ctrl-1",
"framework_id": "uuid-fw-1",
"control_id": "AUTH-001",
"title": "Multi-Factor Authentication",
"objective": "Require MFA.",
"rationale": "Passwords insufficient.",
"scope": {"platforms": ["web", "mobile"]},
"requirements": ["Req 1", "Req 2"],
"test_procedure": ["Test 1"],
"evidence": [{"type": "config", "description": "MFA config"}],
"severity": "high",
"risk_score": 8.5,
"implementation_effort": "m",
"evidence_confidence": 0.85,
"open_anchors": [
{"framework": "OWASP ASVS", "ref": "V2.8", "url": "https://owasp.org"},
],
"release_state": "draft",
"tags": ["mfa"],
"created_at": now,
"updated_at": now,
}
defaults.update(overrides)
mock = MagicMock()
for key, value in defaults.items():
setattr(mock, key, value)
return mock
def test_basic_conversion(self):
row = self._make_row()
result = _control_row(row)
assert result["control_id"] == "AUTH-001"
assert result["severity"] == "high"
assert result["risk_score"] == 8.5
assert result["implementation_effort"] == "m"
assert result["evidence_confidence"] == 0.85
assert len(result["open_anchors"]) == 1
def test_null_numeric_fields(self):
row = self._make_row(risk_score=None, evidence_confidence=None, implementation_effort=None)
result = _control_row(row)
assert result["risk_score"] is None
assert result["evidence_confidence"] is None
assert result["implementation_effort"] is None
def test_empty_tags(self):
row = self._make_row(tags=None)
result = _control_row(row)
assert result["tags"] == []
def test_empty_tags_list(self):
row = self._make_row(tags=[])
result = _control_row(row)
assert result["tags"] == []
def test_timestamp_format(self):
now = datetime(2026, 3, 12, 10, 30, 0, tzinfo=timezone.utc)
row = self._make_row(created_at=now, updated_at=now)
result = _control_row(row)
assert "2026-03-12" in result["created_at"]
assert "10:30" in result["created_at"]
def test_none_timestamps(self):
row = self._make_row(created_at=None, updated_at=None)
result = _control_row(row)
assert result["created_at"] is None
assert result["updated_at"] is None

View File

@@ -0,0 +1,161 @@
"""Tests for License Gate service (license_gate.py)."""
import pytest
from unittest.mock import MagicMock, patch
from collections import namedtuple
from compliance.services.license_gate import (
check_source_allowed,
get_license_matrix,
get_source_permissions,
USAGE_COLUMN_MAP,
)
class TestUsageColumnMap:
"""Test the usage type to column mapping."""
def test_all_usage_types_mapped(self):
expected = {"analysis", "store_excerpt", "ship_embeddings", "ship_in_product"}
assert set(USAGE_COLUMN_MAP.keys()) == expected
def test_column_names(self):
assert USAGE_COLUMN_MAP["analysis"] == "allowed_analysis"
assert USAGE_COLUMN_MAP["store_excerpt"] == "allowed_store_excerpt"
assert USAGE_COLUMN_MAP["ship_embeddings"] == "allowed_ship_embeddings"
assert USAGE_COLUMN_MAP["ship_in_product"] == "allowed_ship_in_product"
class TestCheckSourceAllowed:
"""Tests for check_source_allowed()."""
def _mock_db(self, return_value):
db = MagicMock()
mock_result = MagicMock()
if return_value is None:
mock_result.fetchone.return_value = None
else:
mock_result.fetchone.return_value = (return_value,)
db.execute.return_value = mock_result
return db
def test_allowed_analysis(self):
db = self._mock_db(True)
assert check_source_allowed(db, "OWASP_ASVS", "analysis") is True
def test_denied_ship_in_product(self):
db = self._mock_db(False)
assert check_source_allowed(db, "BSI_TR03161_1", "ship_in_product") is False
def test_unknown_source(self):
db = self._mock_db(None)
assert check_source_allowed(db, "NONEXISTENT", "analysis") is False
def test_unknown_usage_type(self):
db = MagicMock()
assert check_source_allowed(db, "OWASP_ASVS", "invalid_type") is False
# DB should not be called for invalid usage type
db.execute.assert_not_called()
def test_allowed_store_excerpt(self):
db = self._mock_db(True)
assert check_source_allowed(db, "OWASP_ASVS", "store_excerpt") is True
def test_denied_store_excerpt(self):
db = self._mock_db(False)
assert check_source_allowed(db, "BSI_TR03161_1", "store_excerpt") is False
class TestGetLicenseMatrix:
"""Tests for get_license_matrix()."""
def test_returns_list(self):
LicRow = namedtuple("LicRow", [
"license_id", "name", "terms_url", "commercial_use",
"ai_training_restriction", "tdm_allowed_under_44b",
"deletion_required", "notes",
])
rows = [
LicRow("OWASP_CC_BY_SA", "CC BY-SA 4.0", "https://example.com",
"allowed", None, "yes", False, "Open source"),
LicRow("BSI_TOS_2025", "BSI ToS", "https://bsi.bund.de",
"restricted", "unclear", "yes", True, "Commercial restricted"),
]
db = MagicMock()
db.execute.return_value.fetchall.return_value = rows
result = get_license_matrix(db)
assert len(result) == 2
assert result[0]["license_id"] == "OWASP_CC_BY_SA"
assert result[0]["commercial_use"] == "allowed"
assert result[0]["deletion_required"] is False
assert result[1]["license_id"] == "BSI_TOS_2025"
assert result[1]["commercial_use"] == "restricted"
assert result[1]["deletion_required"] is True
def test_empty_result(self):
db = MagicMock()
db.execute.return_value.fetchall.return_value = []
result = get_license_matrix(db)
assert result == []
class TestGetSourcePermissions:
"""Tests for get_source_permissions()."""
def test_returns_list_with_join(self):
SrcRow = namedtuple("SrcRow", [
"source_id", "title", "publisher", "url", "version_label",
"language", "license_id", "allowed_analysis", "allowed_store_excerpt",
"allowed_ship_embeddings", "allowed_ship_in_product",
"vault_retention_days", "vault_access_tier",
"license_name", "commercial_use",
])
rows = [
SrcRow(
"OWASP_ASVS", "OWASP ASVS", "OWASP Foundation",
"https://owasp.org", "4.0.3", "en", "OWASP_CC_BY_SA",
True, True, True, True, 30, "public",
"CC BY-SA 4.0", "allowed",
),
]
db = MagicMock()
db.execute.return_value.fetchall.return_value = rows
result = get_source_permissions(db)
assert len(result) == 1
src = result[0]
assert src["source_id"] == "OWASP_ASVS"
assert src["allowed_analysis"] is True
assert src["allowed_ship_in_product"] is True
assert src["license_name"] == "CC BY-SA 4.0"
assert src["commercial_use"] == "allowed"
def test_restricted_source(self):
SrcRow = namedtuple("SrcRow", [
"source_id", "title", "publisher", "url", "version_label",
"language", "license_id", "allowed_analysis", "allowed_store_excerpt",
"allowed_ship_embeddings", "allowed_ship_in_product",
"vault_retention_days", "vault_access_tier",
"license_name", "commercial_use",
])
rows = [
SrcRow(
"BSI_TR03161_1", "BSI TR-03161 Teil 1", "BSI",
"https://bsi.bund.de", "1.0", "de", "BSI_TOS_2025",
True, False, False, False, 30, "restricted",
"BSI Nutzungsbedingungen", "restricted",
),
]
db = MagicMock()
db.execute.return_value.fetchall.return_value = rows
result = get_source_permissions(db)
src = result[0]
assert src["allowed_analysis"] is True
assert src["allowed_store_excerpt"] is False
assert src["allowed_ship_embeddings"] is False
assert src["allowed_ship_in_product"] is False

View File

@@ -0,0 +1,142 @@
"""Tests for the CI/CD control validator script."""
import json
import subprocess
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
VALIDATOR = REPO_ROOT / "scripts" / "validate-controls.py"
CONTROLS_FILE = REPO_ROOT / "ai-compliance-sdk" / "policies" / "canonical_controls_v1.json"
class TestValidatorScript:
"""Integration tests for validate-controls.py."""
def test_validator_passes_on_valid_controls(self):
result = subprocess.run(
[sys.executable, str(VALIDATOR)],
capture_output=True, text=True, cwd=str(REPO_ROOT),
)
assert result.returncode == 0, f"Validator failed:\n{result.stdout}\n{result.stderr}"
assert "ALL CHECKS PASSED" in result.stdout
def test_validator_reports_control_count(self):
result = subprocess.run(
[sys.executable, str(VALIDATOR)],
capture_output=True, text=True, cwd=str(REPO_ROOT),
)
assert "Controls: 10" in result.stdout
assert "Open Anchors:" in result.stdout
class TestControlsJsonStructure:
"""Direct validation of the JSON file structure."""
@pytest.fixture
def controls_data(self):
with open(CONTROLS_FILE) as f:
return json.load(f)
def test_top_level_keys(self, controls_data):
assert "version" in controls_data
assert "schema" in controls_data
assert "framework" in controls_data
assert "domains" in controls_data
assert "controls" in controls_data
def test_framework_metadata(self, controls_data):
fw = controls_data["framework"]
assert fw["id"] == "bp_security_v1"
assert fw["version"] == "1.0"
def test_all_controls_have_open_anchors(self, controls_data):
for ctrl in controls_data["controls"]:
anchors = ctrl.get("open_anchors", [])
assert len(anchors) >= 1, (
f"Control {ctrl['control_id']} has no open anchors"
)
def test_no_bsi_nomenclature_in_controls(self, controls_data):
"""Ensure no BSI-proprietary IDs leak into product-facing fields."""
import re
bsi_pattern = re.compile(r"O\.[A-Za-z]+_[0-9]+")
for ctrl in controls_data["controls"]:
for field in ["objective", "rationale", "title"]:
text = ctrl.get(field, "")
match = bsi_pattern.search(text)
assert match is None, (
f"Control {ctrl['control_id']}.{field} contains BSI pattern: {match.group()}"
)
def test_control_id_format(self, controls_data):
import re
pattern = re.compile(r"^[A-Z]{2,6}-[0-9]{3}$")
for ctrl in controls_data["controls"]:
assert pattern.match(ctrl["control_id"]), (
f"Invalid control_id format: {ctrl['control_id']}"
)
def test_valid_severities(self, controls_data):
valid = {"low", "medium", "high", "critical"}
for ctrl in controls_data["controls"]:
assert ctrl["severity"] in valid, (
f"Control {ctrl['control_id']} has invalid severity: {ctrl['severity']}"
)
def test_domains_referenced_by_controls(self, controls_data):
domain_ids = {d["id"] for d in controls_data["domains"]}
for ctrl in controls_data["controls"]:
assert ctrl["domain"] in domain_ids, (
f"Control {ctrl['control_id']} references unknown domain: {ctrl['domain']}"
)
def test_open_anchor_structure(self, controls_data):
for ctrl in controls_data["controls"]:
for i, anchor in enumerate(ctrl.get("open_anchors", [])):
assert "framework" in anchor, (
f"Control {ctrl['control_id']}: anchor[{i}] missing 'framework'"
)
assert "ref" in anchor, (
f"Control {ctrl['control_id']}: anchor[{i}] missing 'ref'"
)
assert "url" in anchor, (
f"Control {ctrl['control_id']}: anchor[{i}] missing 'url'"
)
assert anchor["url"].startswith("https://"), (
f"Control {ctrl['control_id']}: anchor[{i}] URL not HTTPS"
)
def test_evidence_structure(self, controls_data):
for ctrl in controls_data["controls"]:
for i, ev in enumerate(ctrl.get("evidence", [])):
assert "type" in ev, (
f"Control {ctrl['control_id']}: evidence[{i}] missing 'type'"
)
assert "description" in ev, (
f"Control {ctrl['control_id']}: evidence[{i}] missing 'description'"
)
def test_risk_scores_in_range(self, controls_data):
for ctrl in controls_data["controls"]:
if ctrl.get("risk_score") is not None:
assert 0 <= ctrl["risk_score"] <= 10, (
f"Control {ctrl['control_id']}: risk_score {ctrl['risk_score']} out of range"
)
def test_total_controls_matches(self, controls_data):
assert controls_data["total_controls"] == len(controls_data["controls"])
def test_independent_taxonomy_no_tr_reference(self, controls_data):
"""Verify controls don't reference BSI TR documents in product text."""
import re
tr_pattern = re.compile(r"TR-03161|BSI-TR-")
for ctrl in controls_data["controls"]:
for field in ["objective", "rationale", "title"]:
text = ctrl.get(field, "")
match = tr_pattern.search(text)
assert match is None, (
f"Control {ctrl['control_id']}.{field} references BSI TR: {match.group()}"
)