"""Tests for V1 Control Enrichment (Eigenentwicklung matching).""" import sys sys.path.insert(0, ".") import pytest from unittest.mock import AsyncMock, MagicMock, patch from compliance.services.v1_enrichment import ( enrich_v1_matches, get_v1_matches, count_v1_controls, ) class TestV1EnrichmentDryRun: """Dry-run mode should return statistics without touching DB.""" @pytest.mark.asyncio async def test_dry_run_returns_stats(self): mock_v1 = [ MagicMock( id="uuid-v1-1", control_id="ACC-013", title="Zugriffskontrolle", objective="Zugriff einschraenken", category="access", ), MagicMock( id="uuid-v1-2", control_id="SEC-005", title="Verschluesselung", objective="Daten verschluesseln", category="encryption", ), ] mock_count = MagicMock(cnt=863) with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session: db = MagicMock() mock_session.return_value.__enter__ = MagicMock(return_value=db) mock_session.return_value.__exit__ = MagicMock(return_value=False) # First call: v1 controls, second call: count db.execute.return_value.fetchall.return_value = mock_v1 db.execute.return_value.fetchone.return_value = mock_count result = await enrich_v1_matches(dry_run=True, batch_size=100, offset=0) assert result["dry_run"] is True assert result["total_v1"] == 863 assert len(result["sample_controls"]) == 2 assert result["sample_controls"][0]["control_id"] == "ACC-013" class TestV1EnrichmentExecution: """Execution mode should find matches and insert them.""" @pytest.mark.asyncio async def test_processes_and_inserts_matches(self): mock_v1 = [ MagicMock( id="uuid-v1-1", control_id="ACC-013", title="Zugriffskontrolle", objective="Zugriff auf Systeme einschraenken", category="access", ), ] mock_count = MagicMock(cnt=1) # Atomic control found in Qdrant (has parent, no source_citation) mock_atomic_row = MagicMock( id="uuid-atomic-1", control_id="SEC-042-A01", title="Verschluesselung (atomar)", source_citation=None, # Atomic controls don't have source_citation parent_control_uuid="uuid-reg-1", severity="high", category="encryption", ) # Parent control (has source_citation) mock_parent_row = MagicMock( id="uuid-reg-1", control_id="SEC-042", title="Verschluesselung personenbezogener Daten", source_citation={"source": "DSGVO (EU) 2016/679", "article": "Art. 32"}, parent_control_uuid=None, severity="high", category="encryption", ) mock_qdrant_results = [ { "score": 0.89, "payload": { "control_uuid": "uuid-atomic-1", "control_id": "SEC-042-A01", "title": "Verschluesselung (atomar)", }, }, { "score": 0.65, # Below threshold "payload": { "control_uuid": "uuid-reg-2", "control_id": "SEC-100", }, }, ] with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session: db = MagicMock() mock_session.return_value.__enter__ = MagicMock(return_value=db) mock_session.return_value.__exit__ = MagicMock(return_value=False) # Route queries to correct mock data def side_effect_execute(query, params=None): result = MagicMock() query_str = str(query) result.fetchall.return_value = mock_v1 if "COUNT" in query_str: result.fetchone.return_value = mock_count elif "source_citation IS NOT NULL" in query_str: # Parent lookup result.fetchone.return_value = mock_parent_row elif "c.id = CAST" in query_str or "canonical_controls c" in query_str: # Direct atomic control lookup result.fetchone.return_value = mock_atomic_row else: result.fetchone.return_value = mock_count return result db.execute.side_effect = side_effect_execute with patch("compliance.services.v1_enrichment.get_embedding") as mock_embed, \ patch("compliance.services.v1_enrichment.qdrant_search_cross_regulation") as mock_qdrant: mock_embed.return_value = [0.1] * 1024 mock_qdrant.return_value = mock_qdrant_results result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=0) assert result["dry_run"] is False assert result["processed"] == 1 assert result["matches_inserted"] == 1 assert len(result["sample_matches"]) == 1 assert result["sample_matches"][0]["matched_control_id"] == "SEC-042" assert result["sample_matches"][0]["similarity_score"] == 0.89 @pytest.mark.asyncio async def test_empty_batch_returns_done(self): mock_count = MagicMock(cnt=863) with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session: db = MagicMock() mock_session.return_value.__enter__ = MagicMock(return_value=db) mock_session.return_value.__exit__ = MagicMock(return_value=False) db.execute.return_value.fetchall.return_value = [] db.execute.return_value.fetchone.return_value = mock_count result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=9999) assert result["processed"] == 0 assert "alle v1 Controls verarbeitet" in result["message"] class TestV1MatchesEndpoint: """Test the matches retrieval.""" @pytest.mark.asyncio async def test_returns_matches(self): mock_rows = [ MagicMock( matched_control_id="SEC-042", matched_title="Verschluesselung", matched_objective="Daten verschluesseln", matched_severity="high", matched_category="encryption", matched_source="DSGVO (EU) 2016/679", matched_article="Art. 32", matched_source_citation={"source": "DSGVO (EU) 2016/679"}, similarity_score=0.89, match_rank=1, match_method="embedding", ), ] with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session: db = MagicMock() mock_session.return_value.__enter__ = MagicMock(return_value=db) mock_session.return_value.__exit__ = MagicMock(return_value=False) db.execute.return_value.fetchall.return_value = mock_rows result = await get_v1_matches("uuid-v1-1") assert len(result) == 1 assert result[0]["matched_control_id"] == "SEC-042" assert result[0]["similarity_score"] == 0.89 assert result[0]["matched_source"] == "DSGVO (EU) 2016/679" @pytest.mark.asyncio async def test_empty_matches(self): with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session: db = MagicMock() mock_session.return_value.__enter__ = MagicMock(return_value=db) mock_session.return_value.__exit__ = MagicMock(return_value=False) db.execute.return_value.fetchall.return_value = [] result = await get_v1_matches("uuid-nonexistent") assert result == [] class TestEigenentwicklungDetection: """Verify the Eigenentwicklung detection query.""" @pytest.mark.asyncio async def test_count_v1_controls(self): mock_count = MagicMock(cnt=863) with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session: db = MagicMock() mock_session.return_value.__enter__ = MagicMock(return_value=db) mock_session.return_value.__exit__ = MagicMock(return_value=False) db.execute.return_value.fetchone.return_value = mock_count result = await count_v1_controls() assert result == 863 # Verify the query includes all conditions call_args = db.execute.call_args[0][0] query_str = str(call_args) assert "generation_strategy = 'ungrouped'" in query_str assert "source_citation IS NULL" in query_str assert "parent_control_uuid IS NULL" in query_str