""" Golden Test Suite — pytest implementation of golden_controls.yaml. Tests Pre-LLM classification (evidence, container, framework detection), title quality rules, and negative obligation handling via control_ontology. """ import sys import os import yaml import pytest # Ensure control-pipeline is in the path sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from services.control_ontology import ( classify_obligation, classify_action, build_canonical_key, ) # --------------------------------------------------------------------------- # Load YAML once # --------------------------------------------------------------------------- GOLDEN_PATH = os.path.join(os.path.dirname(__file__), "golden_controls.yaml") with open(GOLDEN_PATH) as f: _GOLDEN = yaml.safe_load(f) TESTS = _GOLDEN["tests"] QUALITY_GATES = _GOLDEN["global_quality_gates"] def _tests_by_category(cat: str) -> list: return [t for t in TESTS if t["category"] == cat] # ============================================================================ # D. Container Detection (5 tests) # ============================================================================ class TestContainerDetection: """GT-CONTAINER-001..005: composite obligations must be routed as composite.""" @pytest.mark.parametrize("case", _tests_by_category("container_control_detection"), ids=lambda c: c["id"]) def test_container_routed_composite(self, case): inp = case["input"] result = classify_obligation(inp, "") expected_routing = case["expected"].get("routing_type", "composite") assert result["routing"] == expected_routing, ( f"{case['id']}: expected routing={expected_routing}, " f"got {result['routing']} for: {inp}" ) # ============================================================================ # E. Framework Decomposition (5 tests) # ============================================================================ class TestFrameworkDetection: """GT-FRAMEWORK-001..005: framework references must be detected.""" @pytest.mark.parametrize("case", _tests_by_category("framework_decomposition"), ids=lambda c: c["id"]) def test_framework_routed(self, case): inp = case["input"] result = classify_obligation(inp, "") expected = case["expected"].get("routing_type", "framework_container") assert result["routing"] == expected, ( f"{case['id']}: expected routing={expected}, " f"got {result['routing']} for: {inp}" ) # ============================================================================ # F. Evidence Leakage (5 tests) # ============================================================================ class TestEvidenceDetection: """GT-EVIDENCE-001..005: evidence obligations must not become controls.""" @pytest.mark.parametrize("case", _tests_by_category("evidence_not_control"), ids=lambda c: c["id"]) def test_evidence_detected(self, case): inp = case["input"] result = classify_obligation(inp, "") assert result["routing"] == "evidence", ( f"{case['id']}: expected routing=evidence, " f"got {result['routing']} for: {inp}" ) # ============================================================================ # C. Negative Obligation Handling (5 tests) # ============================================================================ class TestNegativeObligations: """GT-NEG-001..005: negative patterns produce correct action_type.""" @pytest.mark.parametrize("case", _tests_by_category("negative_obligation_handling"), ids=lambda c: c["id"]) def test_negative_action_type(self, case): inp = case["input"] expected_action = case["expected"].get("action_type") if not expected_action: pytest.skip("No expected action_type specified") result = classify_action(inp) assert result == expected_action, ( f"{case['id']}: expected action_type={expected_action}, " f"got {result} for: {inp}" ) # ============================================================================ # H. Title Quality (structural tests — no LLM needed) # ============================================================================ class TestTitleQuality: """GT-TITLE-001..005: structural title rules.""" def test_gt_title_001_no_truncated_endings(self): """Truncated titles are forbidden globally.""" assert QUALITY_GATES["truncated_titles_allowed"] is False def test_gt_title_005_composite_not_atomic(self): """'Token-Schutz muss umgesetzt werden' is a composite, not atomic.""" case = next(t for t in TESTS if t["id"] == "GT-TITLE-005") result = classify_obligation(case["input"], "") assert result["routing"] == "composite", ( f"GT-TITLE-005: 'Token-Schutz' should be composite, got {result['routing']}" ) # ============================================================================ # B. Compound Action Split (structural — classify_action only) # ============================================================================ class TestCompoundActionSplit: """Test that compound inputs contain recognizable actions.""" def test_gt_split_001_define_and_enforce(self): """'definieren und durchsetzen' should yield define action.""" result = classify_action("Maximale Payload-Größen definieren") assert result == "define" def test_gt_split_001_enforce(self): result = classify_action("Payload-Größen technisch durchsetzen") assert result == "enforce" def test_gt_split_003_identify(self): result = classify_action("Schwachstellen identifizieren") assert result == "identify" def test_gt_split_003_assess(self): result = classify_action("Schwachstellen bewerten") assert result == "assess" def test_gt_split_003_monitor(self): result = classify_action("Schwachstellen überwachen") assert result == "monitor" # ============================================================================ # A. Duplicate Explosion (merge_key structure tests) # ============================================================================ class TestMergeKeyStructure: """Verify canonical key format: action_type:object:phase:scope.""" def test_canonical_key_format(self): key = build_canonical_key( action_type="implement", normalized_object="api_rate_limiting", phase="implementation", asset_scope="api_endpoints", ) assert key == "implement:api_rate_limiting:implementation:api_endpoints" def test_canonical_key_no_empty_parts(self): key = build_canonical_key( action_type="define", normalized_object="payload_limits", ) assert key.startswith("define:payload_limits") def test_canonical_key_colon_separated(self): key = build_canonical_key("test", "obj", "phase", "scope") parts = key.split(":") assert len(parts) == 4 # ============================================================================ # G. Scope Dimension (structural — these need dedup to fully verify) # ============================================================================ class TestScopeDimension: """Structural checks: different actor scopes should classify as atomic.""" def test_gt_scope_001_employee_atomic(self): result = classify_obligation("Mitarbeiter müssen Vertraulichkeit wahren.", "") assert result["routing"] == "atomic" def test_gt_scope_001_subcontractor_atomic(self): result = classify_obligation("Unterauftragnehmer müssen Vertraulichkeit wahren.", "") assert result["routing"] == "atomic" def test_gt_scope_005_admin_mfa_atomic(self): result = classify_obligation("Privilegierte Accounts müssen MFA verwenden.", "") assert result["routing"] == "atomic" def test_gt_scope_005_all_users_mfa_atomic(self): result = classify_obligation("Alle Nutzer müssen MFA verwenden.", "") assert result["routing"] == "atomic" # ============================================================================ # Quality gate assertions # ============================================================================ class TestQualityGates: """Verify global quality gate values from YAML.""" def test_max_controls_per_obligation(self): assert QUALITY_GATES["max_controls_per_single_obligation"] == 6 def test_no_evidence_as_control(self): assert QUALITY_GATES["evidence_as_atomic_control_allowed"] is False def test_no_framework_container_as_atomic(self): assert QUALITY_GATES["framework_container_as_atomic_allowed"] is False def test_no_composite_as_atomic(self): assert QUALITY_GATES["composite_control_as_atomic_allowed"] is False