""" Master Control Quality Tests. Regression tests to ensure MC assignment quality stays above 90%. Uses golden dataset of manually verified controls. """ import os import yaml import pytest from sqlalchemy import create_engine, text DB_URL = os.getenv( "DATABASE_URL", "postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db", ) _engine = None def get_engine(): global _engine if _engine is None: _engine = create_engine( DB_URL, connect_args={"options": "-c search_path=compliance,public"}, ) return _engine def load_golden(): path = os.path.join(os.path.dirname(__file__), "golden_mc_assignments.yaml") with open(path) as f: return yaml.safe_load(f) # ── Golden Dataset Tests ── class TestGoldenMCAssignments: """Each golden control must be in the correct MC.""" @pytest.fixture(autouse=True) def setup(self): self.golden = load_golden() self.engine = get_engine() def test_golden_controls_correctly_assigned(self): """All golden controls must be in an MC matching their expected topic prefix.""" errors = [] with self.engine.connect() as c: for gc in self.golden["golden_controls"]: row = c.execute(text(""" SELECT mc.canonical_name FROM master_controls mc JOIN master_control_members mcm ON mcm.master_control_uuid = mc.id JOIN canonical_controls cc ON cc.id = mcm.control_uuid WHERE cc.control_id = :cid LIMIT 1 """), {"cid": gc["control_id"]}).fetchone() if row is None: errors.append(f"{gc['control_id']}: not found in any MC") elif not row[0].startswith(gc["expected_topic_prefix"]): errors.append( f"{gc['control_id']}: expected {gc['expected_topic_prefix']}*, " f"got {row[0]}" ) if errors: pytest.fail( f"{len(errors)} golden controls misassigned:\n" + "\n".join(f" - {e}" for e in errors) ) # ── Structural Quality Tests ── class TestMCStructuralQuality: """Structural invariants for Master Controls.""" @pytest.fixture(autouse=True) def setup(self): self.golden = load_golden() self.thresholds = self.golden["quality_thresholds"] self.engine = get_engine() def test_minimum_master_controls(self): """Must have at least 10K Master Controls.""" with self.engine.connect() as c: count = c.execute( text("SELECT count(*) FROM master_controls") ).scalar() assert count >= self.thresholds["min_master_controls"], ( f"Only {count} MCs, expected >= {self.thresholds['min_master_controls']}" ) def test_max_controls_per_mc(self): """No MC should have more than 300 controls.""" with self.engine.connect() as c: max_mc = c.execute( text("SELECT max(total_controls) FROM master_controls") ).scalar() assert max_mc <= self.thresholds["max_controls_per_mc"], ( f"Max MC has {max_mc} controls, limit is {self.thresholds['max_controls_per_mc']}" ) def test_no_empty_master_controls(self): """Every MC must have at least 1 member.""" with self.engine.connect() as c: empty = c.execute(text(""" SELECT count(*) FROM master_controls WHERE total_controls = 0 """)).scalar() assert empty == 0, f"{empty} empty MCs found" def test_all_members_reference_valid_controls(self): """Every MC member must reference an existing control.""" with self.engine.connect() as c: orphans = c.execute(text(""" SELECT count(*) FROM master_control_members mcm LEFT JOIN canonical_controls cc ON cc.id = mcm.control_uuid WHERE cc.id IS NULL """)).scalar() assert orphans == 0, f"{orphans} orphan members found" # ── Doc Check Controls Tests ── class TestDocCheckControls: """Validate doc_check_controls table.""" @pytest.fixture(autouse=True) def setup(self): self.engine = get_engine() def test_doc_check_controls_exist(self): """Must have doc_check_controls.""" with self.engine.connect() as c: count = c.execute( text("SELECT count(*) FROM doc_check_controls") ).scalar() assert count > 100, f"Only {count} doc_check_controls" def test_all_doc_types_covered(self): """All 8 document types must have controls.""" expected = {"dse", "cookie", "impressum", "widerruf", "agb", "dsfa", "avv", "loeschkonzept"} with self.engine.connect() as c: rows = c.execute(text( "SELECT DISTINCT doc_type FROM doc_check_controls" )).fetchall() actual = {r[0] for r in rows} missing = expected - actual assert not missing, f"Missing doc types: {missing}" def test_check_questions_not_empty(self): """Every doc_check_control must have a check_question.""" with self.engine.connect() as c: empty = c.execute(text(""" SELECT count(*) FROM doc_check_controls WHERE check_question IS NULL OR check_question = '' """)).scalar() assert empty == 0, f"{empty} controls without check_question"