feat(canonical-controls): Canonical Control Library — rechtssichere Security Controls

Eigenstaendig formulierte Security Controls mit unabhaengiger Taxonomie und Open-Source-Verankerung (OWASP, NIST, ENISA). Keine BSI-Nomenklatur. - Migration 044: 5 DB-Tabellen (frameworks, controls, sources, licenses, mappings) - 10 Seed Controls mit 39 Open-Source-Referenzen - License Gate: Quellen-Berechtigungspruefung (analysis/excerpt/embeddings/product) - Too-Close-Detektor: 5 Metriken (exact-phrase, token-overlap, ngram, embedding, LCS) - REST API: 8 Endpoints unter /v1/canonical/ - Go Loader mit Multi-Index (ID, domain, severity, framework) - Frontend: Control Library Browser + Provenance Wiki - CI/CD: validate-controls.py Job (schema, no-leak, open-anchors) - 67 Tests (8 Go + 59 Python), alle PASS - MkDocs Dokumentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 19:55:06 +01:00
parent 8442115e7c
commit 050f353192
20 changed files with 3935 additions and 0 deletions
@@ -0,0 +1,225 @@
+"""Tests for Canonical Control Library routes (canonical_control_routes.py)."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+from datetime import datetime, timezone
+
+from compliance.api.canonical_control_routes import (
+    FrameworkResponse,
+    ControlResponse,
+    SimilarityCheckRequest,
+    SimilarityCheckResponse,
+    _control_row,
+)
+
+
+class TestFrameworkResponse:
+    """Tests for FrameworkResponse model."""
+
+    def test_basic_creation(self):
+        resp = FrameworkResponse(
+            id="uuid-1",
+            framework_id="bp_security_v1",
+            name="BreakPilot Security Controls",
+            version="1.0",
+            release_state="draft",
+            created_at="2026-03-12T00:00:00+00:00",
+            updated_at="2026-03-12T00:00:00+00:00",
+        )
+        assert resp.framework_id == "bp_security_v1"
+        assert resp.version == "1.0"
+
+    def test_optional_fields(self):
+        resp = FrameworkResponse(
+            id="uuid-1",
+            framework_id="test",
+            name="Test",
+            version="1.0",
+            release_state="draft",
+            created_at="2026-03-12T00:00:00+00:00",
+            updated_at="2026-03-12T00:00:00+00:00",
+        )
+        assert resp.description is None
+        assert resp.owner is None
+        assert resp.policy_version is None
+
+
+class TestControlResponse:
+    """Tests for ControlResponse model."""
+
+    def test_full_control(self):
+        resp = ControlResponse(
+            id="uuid-1",
+            framework_id="uuid-fw",
+            control_id="AUTH-001",
+            title="Multi-Factor Authentication",
+            objective="Require MFA for privileged access.",
+            rationale="Passwords alone are insufficient.",
+            scope={"platforms": ["web"]},
+            requirements=["MFA for admin accounts"],
+            test_procedure=["Test admin login without MFA"],
+            evidence=[{"type": "config", "description": "MFA config"}],
+            severity="high",
+            open_anchors=[{"framework": "OWASP ASVS", "ref": "V2.8", "url": "https://owasp.org"}],
+            release_state="draft",
+            tags=["mfa", "auth"],
+            created_at="2026-03-12T00:00:00+00:00",
+            updated_at="2026-03-12T00:00:00+00:00",
+        )
+        assert resp.control_id == "AUTH-001"
+        assert resp.severity == "high"
+        assert len(resp.open_anchors) == 1
+
+    def test_optional_numeric_fields(self):
+        resp = ControlResponse(
+            id="uuid-1",
+            framework_id="uuid-fw",
+            control_id="NET-001",
+            title="TLS",
+            objective="Encrypt traffic.",
+            rationale="Prevent eavesdropping.",
+            scope={},
+            requirements=[],
+            test_procedure=[],
+            evidence=[],
+            severity="high",
+            open_anchors=[],
+            release_state="draft",
+            tags=[],
+            created_at="2026-03-12T00:00:00+00:00",
+            updated_at="2026-03-12T00:00:00+00:00",
+        )
+        assert resp.risk_score is None
+        assert resp.implementation_effort is None
+        assert resp.evidence_confidence is None
+
+
+class TestSimilarityCheckRequest:
+    """Tests for SimilarityCheckRequest model."""
+
+    def test_valid_request(self):
+        req = SimilarityCheckRequest(
+            source_text="Die Anwendung muss MFA implementieren.",
+            candidate_text="Multi-factor authentication is required.",
+        )
+        assert req.source_text == "Die Anwendung muss MFA implementieren."
+        assert req.candidate_text == "Multi-factor authentication is required."
+
+    def test_empty_strings(self):
+        req = SimilarityCheckRequest(source_text="", candidate_text="")
+        assert req.source_text == ""
+
+
+class TestSimilarityCheckResponse:
+    """Tests for SimilarityCheckResponse model."""
+
+    def test_pass_status(self):
+        resp = SimilarityCheckResponse(
+            max_exact_run=2,
+            token_overlap=0.05,
+            ngram_jaccard=0.03,
+            embedding_cosine=0.45,
+            lcs_ratio=0.12,
+            status="PASS",
+            details={
+                "max_exact_run": "PASS",
+                "token_overlap": "PASS",
+                "ngram_jaccard": "PASS",
+                "embedding_cosine": "PASS",
+                "lcs_ratio": "PASS",
+            },
+        )
+        assert resp.status == "PASS"
+
+    def test_fail_status(self):
+        resp = SimilarityCheckResponse(
+            max_exact_run=15,
+            token_overlap=0.35,
+            ngram_jaccard=0.20,
+            embedding_cosine=0.95,
+            lcs_ratio=0.55,
+            status="FAIL",
+            details={
+                "max_exact_run": "FAIL",
+                "token_overlap": "FAIL",
+                "ngram_jaccard": "FAIL",
+                "embedding_cosine": "FAIL",
+                "lcs_ratio": "FAIL",
+            },
+        )
+        assert resp.status == "FAIL"
+
+
+class TestControlRowConversion:
+    """Tests for _control_row helper."""
+
+    def _make_row(self, **overrides):
+        now = datetime.now(timezone.utc)
+        defaults = {
+            "id": "uuid-ctrl-1",
+            "framework_id": "uuid-fw-1",
+            "control_id": "AUTH-001",
+            "title": "Multi-Factor Authentication",
+            "objective": "Require MFA.",
+            "rationale": "Passwords insufficient.",
+            "scope": {"platforms": ["web", "mobile"]},
+            "requirements": ["Req 1", "Req 2"],
+            "test_procedure": ["Test 1"],
+            "evidence": [{"type": "config", "description": "MFA config"}],
+            "severity": "high",
+            "risk_score": 8.5,
+            "implementation_effort": "m",
+            "evidence_confidence": 0.85,
+            "open_anchors": [
+                {"framework": "OWASP ASVS", "ref": "V2.8", "url": "https://owasp.org"},
+            ],
+            "release_state": "draft",
+            "tags": ["mfa"],
+            "created_at": now,
+            "updated_at": now,
+        }
+        defaults.update(overrides)
+        mock = MagicMock()
+        for key, value in defaults.items():
+            setattr(mock, key, value)
+        return mock
+
+    def test_basic_conversion(self):
+        row = self._make_row()
+        result = _control_row(row)
+        assert result["control_id"] == "AUTH-001"
+        assert result["severity"] == "high"
+        assert result["risk_score"] == 8.5
+        assert result["implementation_effort"] == "m"
+        assert result["evidence_confidence"] == 0.85
+        assert len(result["open_anchors"]) == 1
+
+    def test_null_numeric_fields(self):
+        row = self._make_row(risk_score=None, evidence_confidence=None, implementation_effort=None)
+        result = _control_row(row)
+        assert result["risk_score"] is None
+        assert result["evidence_confidence"] is None
+        assert result["implementation_effort"] is None
+
+    def test_empty_tags(self):
+        row = self._make_row(tags=None)
+        result = _control_row(row)
+        assert result["tags"] == []
+
+    def test_empty_tags_list(self):
+        row = self._make_row(tags=[])
+        result = _control_row(row)
+        assert result["tags"] == []
+
+    def test_timestamp_format(self):
+        now = datetime(2026, 3, 12, 10, 30, 0, tzinfo=timezone.utc)
+        row = self._make_row(created_at=now, updated_at=now)
+        result = _control_row(row)
+        assert "2026-03-12" in result["created_at"]
+        assert "10:30" in result["created_at"]
+
+    def test_none_timestamps(self):
+        row = self._make_row(created_at=None, updated_at=None)
+        result = _control_row(row)
+        assert result["created_at"] is None
+        assert result["updated_at"] is None
@@ -0,0 +1,161 @@
+"""Tests for License Gate service (license_gate.py)."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+from collections import namedtuple
+
+from compliance.services.license_gate import (
+    check_source_allowed,
+    get_license_matrix,
+    get_source_permissions,
+    USAGE_COLUMN_MAP,
+)
+
+
+class TestUsageColumnMap:
+    """Test the usage type to column mapping."""
+
+    def test_all_usage_types_mapped(self):
+        expected = {"analysis", "store_excerpt", "ship_embeddings", "ship_in_product"}
+        assert set(USAGE_COLUMN_MAP.keys()) == expected
+
+    def test_column_names(self):
+        assert USAGE_COLUMN_MAP["analysis"] == "allowed_analysis"
+        assert USAGE_COLUMN_MAP["store_excerpt"] == "allowed_store_excerpt"
+        assert USAGE_COLUMN_MAP["ship_embeddings"] == "allowed_ship_embeddings"
+        assert USAGE_COLUMN_MAP["ship_in_product"] == "allowed_ship_in_product"
+
+
+class TestCheckSourceAllowed:
+    """Tests for check_source_allowed()."""
+
+    def _mock_db(self, return_value):
+        db = MagicMock()
+        mock_result = MagicMock()
+        if return_value is None:
+            mock_result.fetchone.return_value = None
+        else:
+            mock_result.fetchone.return_value = (return_value,)
+        db.execute.return_value = mock_result
+        return db
+
+    def test_allowed_analysis(self):
+        db = self._mock_db(True)
+        assert check_source_allowed(db, "OWASP_ASVS", "analysis") is True
+
+    def test_denied_ship_in_product(self):
+        db = self._mock_db(False)
+        assert check_source_allowed(db, "BSI_TR03161_1", "ship_in_product") is False
+
+    def test_unknown_source(self):
+        db = self._mock_db(None)
+        assert check_source_allowed(db, "NONEXISTENT", "analysis") is False
+
+    def test_unknown_usage_type(self):
+        db = MagicMock()
+        assert check_source_allowed(db, "OWASP_ASVS", "invalid_type") is False
+        # DB should not be called for invalid usage type
+        db.execute.assert_not_called()
+
+    def test_allowed_store_excerpt(self):
+        db = self._mock_db(True)
+        assert check_source_allowed(db, "OWASP_ASVS", "store_excerpt") is True
+
+    def test_denied_store_excerpt(self):
+        db = self._mock_db(False)
+        assert check_source_allowed(db, "BSI_TR03161_1", "store_excerpt") is False
+
+
+class TestGetLicenseMatrix:
+    """Tests for get_license_matrix()."""
+
+    def test_returns_list(self):
+        LicRow = namedtuple("LicRow", [
+            "license_id", "name", "terms_url", "commercial_use",
+            "ai_training_restriction", "tdm_allowed_under_44b",
+            "deletion_required", "notes",
+        ])
+        rows = [
+            LicRow("OWASP_CC_BY_SA", "CC BY-SA 4.0", "https://example.com",
+                   "allowed", None, "yes", False, "Open source"),
+            LicRow("BSI_TOS_2025", "BSI ToS", "https://bsi.bund.de",
+                   "restricted", "unclear", "yes", True, "Commercial restricted"),
+        ]
+
+        db = MagicMock()
+        db.execute.return_value.fetchall.return_value = rows
+        result = get_license_matrix(db)
+
+        assert len(result) == 2
+        assert result[0]["license_id"] == "OWASP_CC_BY_SA"
+        assert result[0]["commercial_use"] == "allowed"
+        assert result[0]["deletion_required"] is False
+        assert result[1]["license_id"] == "BSI_TOS_2025"
+        assert result[1]["commercial_use"] == "restricted"
+        assert result[1]["deletion_required"] is True
+
+    def test_empty_result(self):
+        db = MagicMock()
+        db.execute.return_value.fetchall.return_value = []
+        result = get_license_matrix(db)
+        assert result == []
+
+
+class TestGetSourcePermissions:
+    """Tests for get_source_permissions()."""
+
+    def test_returns_list_with_join(self):
+        SrcRow = namedtuple("SrcRow", [
+            "source_id", "title", "publisher", "url", "version_label",
+            "language", "license_id", "allowed_analysis", "allowed_store_excerpt",
+            "allowed_ship_embeddings", "allowed_ship_in_product",
+            "vault_retention_days", "vault_access_tier",
+            "license_name", "commercial_use",
+        ])
+        rows = [
+            SrcRow(
+                "OWASP_ASVS", "OWASP ASVS", "OWASP Foundation",
+                "https://owasp.org", "4.0.3", "en", "OWASP_CC_BY_SA",
+                True, True, True, True, 30, "public",
+                "CC BY-SA 4.0", "allowed",
+            ),
+        ]
+
+        db = MagicMock()
+        db.execute.return_value.fetchall.return_value = rows
+        result = get_source_permissions(db)
+
+        assert len(result) == 1
+        src = result[0]
+        assert src["source_id"] == "OWASP_ASVS"
+        assert src["allowed_analysis"] is True
+        assert src["allowed_ship_in_product"] is True
+        assert src["license_name"] == "CC BY-SA 4.0"
+        assert src["commercial_use"] == "allowed"
+
+    def test_restricted_source(self):
+        SrcRow = namedtuple("SrcRow", [
+            "source_id", "title", "publisher", "url", "version_label",
+            "language", "license_id", "allowed_analysis", "allowed_store_excerpt",
+            "allowed_ship_embeddings", "allowed_ship_in_product",
+            "vault_retention_days", "vault_access_tier",
+            "license_name", "commercial_use",
+        ])
+        rows = [
+            SrcRow(
+                "BSI_TR03161_1", "BSI TR-03161 Teil 1", "BSI",
+                "https://bsi.bund.de", "1.0", "de", "BSI_TOS_2025",
+                True, False, False, False, 30, "restricted",
+                "BSI Nutzungsbedingungen", "restricted",
+            ),
+        ]
+
+        db = MagicMock()
+        db.execute.return_value.fetchall.return_value = rows
+        result = get_source_permissions(db)
+
+        src = result[0]
+        assert src["allowed_analysis"] is True
+        assert src["allowed_store_excerpt"] is False
+        assert src["allowed_ship_embeddings"] is False
+        assert src["allowed_ship_in_product"] is False
@@ -0,0 +1,142 @@
+"""Tests for the CI/CD control validator script."""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+VALIDATOR = REPO_ROOT / "scripts" / "validate-controls.py"
+CONTROLS_FILE = REPO_ROOT / "ai-compliance-sdk" / "policies" / "canonical_controls_v1.json"
+
+
+class TestValidatorScript:
+    """Integration tests for validate-controls.py."""
+
+    def test_validator_passes_on_valid_controls(self):
+        result = subprocess.run(
+            [sys.executable, str(VALIDATOR)],
+            capture_output=True, text=True, cwd=str(REPO_ROOT),
+        )
+        assert result.returncode == 0, f"Validator failed:\n{result.stdout}\n{result.stderr}"
+        assert "ALL CHECKS PASSED" in result.stdout
+
+    def test_validator_reports_control_count(self):
+        result = subprocess.run(
+            [sys.executable, str(VALIDATOR)],
+            capture_output=True, text=True, cwd=str(REPO_ROOT),
+        )
+        assert "Controls:     10" in result.stdout
+        assert "Open Anchors:" in result.stdout
+
+
+class TestControlsJsonStructure:
+    """Direct validation of the JSON file structure."""
+
+    @pytest.fixture
+    def controls_data(self):
+        with open(CONTROLS_FILE) as f:
+            return json.load(f)
+
+    def test_top_level_keys(self, controls_data):
+        assert "version" in controls_data
+        assert "schema" in controls_data
+        assert "framework" in controls_data
+        assert "domains" in controls_data
+        assert "controls" in controls_data
+
+    def test_framework_metadata(self, controls_data):
+        fw = controls_data["framework"]
+        assert fw["id"] == "bp_security_v1"
+        assert fw["version"] == "1.0"
+
+    def test_all_controls_have_open_anchors(self, controls_data):
+        for ctrl in controls_data["controls"]:
+            anchors = ctrl.get("open_anchors", [])
+            assert len(anchors) >= 1, (
+                f"Control {ctrl['control_id']} has no open anchors"
+            )
+
+    def test_no_bsi_nomenclature_in_controls(self, controls_data):
+        """Ensure no BSI-proprietary IDs leak into product-facing fields."""
+        import re
+        bsi_pattern = re.compile(r"O\.[A-Za-z]+_[0-9]+")
+        for ctrl in controls_data["controls"]:
+            for field in ["objective", "rationale", "title"]:
+                text = ctrl.get(field, "")
+                match = bsi_pattern.search(text)
+                assert match is None, (
+                    f"Control {ctrl['control_id']}.{field} contains BSI pattern: {match.group()}"
+                )
+
+    def test_control_id_format(self, controls_data):
+        import re
+        pattern = re.compile(r"^[A-Z]{2,6}-[0-9]{3}$")
+        for ctrl in controls_data["controls"]:
+            assert pattern.match(ctrl["control_id"]), (
+                f"Invalid control_id format: {ctrl['control_id']}"
+            )
+
+    def test_valid_severities(self, controls_data):
+        valid = {"low", "medium", "high", "critical"}
+        for ctrl in controls_data["controls"]:
+            assert ctrl["severity"] in valid, (
+                f"Control {ctrl['control_id']} has invalid severity: {ctrl['severity']}"
+            )
+
+    def test_domains_referenced_by_controls(self, controls_data):
+        domain_ids = {d["id"] for d in controls_data["domains"]}
+        for ctrl in controls_data["controls"]:
+            assert ctrl["domain"] in domain_ids, (
+                f"Control {ctrl['control_id']} references unknown domain: {ctrl['domain']}"
+            )
+
+    def test_open_anchor_structure(self, controls_data):
+        for ctrl in controls_data["controls"]:
+            for i, anchor in enumerate(ctrl.get("open_anchors", [])):
+                assert "framework" in anchor, (
+                    f"Control {ctrl['control_id']}: anchor[{i}] missing 'framework'"
+                )
+                assert "ref" in anchor, (
+                    f"Control {ctrl['control_id']}: anchor[{i}] missing 'ref'"
+                )
+                assert "url" in anchor, (
+                    f"Control {ctrl['control_id']}: anchor[{i}] missing 'url'"
+                )
+                assert anchor["url"].startswith("https://"), (
+                    f"Control {ctrl['control_id']}: anchor[{i}] URL not HTTPS"
+                )
+
+    def test_evidence_structure(self, controls_data):
+        for ctrl in controls_data["controls"]:
+            for i, ev in enumerate(ctrl.get("evidence", [])):
+                assert "type" in ev, (
+                    f"Control {ctrl['control_id']}: evidence[{i}] missing 'type'"
+                )
+                assert "description" in ev, (
+                    f"Control {ctrl['control_id']}: evidence[{i}] missing 'description'"
+                )
+
+    def test_risk_scores_in_range(self, controls_data):
+        for ctrl in controls_data["controls"]:
+            if ctrl.get("risk_score") is not None:
+                assert 0 <= ctrl["risk_score"] <= 10, (
+                    f"Control {ctrl['control_id']}: risk_score {ctrl['risk_score']} out of range"
+                )
+
+    def test_total_controls_matches(self, controls_data):
+        assert controls_data["total_controls"] == len(controls_data["controls"])
+
+    def test_independent_taxonomy_no_tr_reference(self, controls_data):
+        """Verify controls don't reference BSI TR documents in product text."""
+        import re
+        tr_pattern = re.compile(r"TR-03161|BSI-TR-")
+        for ctrl in controls_data["controls"]:
+            for field in ["objective", "rationale", "title"]:
+                text = ctrl.get(field, "")
+                match = tr_pattern.search(text)
+                assert match is None, (
+                    f"Control {ctrl['control_id']}.{field} references BSI TR: {match.group()}"
+                )