Files
breakpilot-compliance/backend-compliance/tests/test_v1_enrichment.py
Benjamin Admin 81c9ce5de3
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 33s
CI/CD / test-python-backend-compliance (push) Successful in 30s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 16s
CI/CD / validate-canonical-controls (push) Successful in 9s
CI/CD / Deploy (push) Successful in 1s
fix: V1 Enrichment — Qdrant Collection + Parent-Resolution fuer regulatorische Matches
Die atomic_controls_dedup Collection (51k Punkte) enthaelt nur atomare
Controls ohne source_citation. Jetzt wird der Parent-Control aufgeloest,
der die Rechtsgrundlage traegt. Deduplizierung nach Parent-UUID verhindert
mehrfache Eintraege fuer die gleiche Regulation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 10:52:41 +01:00

235 lines
8.8 KiB
Python

"""Tests for V1 Control Enrichment (Eigenentwicklung matching)."""
import sys
sys.path.insert(0, ".")
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from compliance.services.v1_enrichment import (
enrich_v1_matches,
get_v1_matches,
count_v1_controls,
)
class TestV1EnrichmentDryRun:
"""Dry-run mode should return statistics without touching DB."""
@pytest.mark.asyncio
async def test_dry_run_returns_stats(self):
mock_v1 = [
MagicMock(
id="uuid-v1-1",
control_id="ACC-013",
title="Zugriffskontrolle",
objective="Zugriff einschraenken",
category="access",
),
MagicMock(
id="uuid-v1-2",
control_id="SEC-005",
title="Verschluesselung",
objective="Daten verschluesseln",
category="encryption",
),
]
mock_count = MagicMock(cnt=863)
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
db = MagicMock()
mock_session.return_value.__enter__ = MagicMock(return_value=db)
mock_session.return_value.__exit__ = MagicMock(return_value=False)
# First call: v1 controls, second call: count
db.execute.return_value.fetchall.return_value = mock_v1
db.execute.return_value.fetchone.return_value = mock_count
result = await enrich_v1_matches(dry_run=True, batch_size=100, offset=0)
assert result["dry_run"] is True
assert result["total_v1"] == 863
assert len(result["sample_controls"]) == 2
assert result["sample_controls"][0]["control_id"] == "ACC-013"
class TestV1EnrichmentExecution:
"""Execution mode should find matches and insert them."""
@pytest.mark.asyncio
async def test_processes_and_inserts_matches(self):
mock_v1 = [
MagicMock(
id="uuid-v1-1",
control_id="ACC-013",
title="Zugriffskontrolle",
objective="Zugriff auf Systeme einschraenken",
category="access",
),
]
mock_count = MagicMock(cnt=1)
# Atomic control found in Qdrant (has parent, no source_citation)
mock_atomic_row = MagicMock(
id="uuid-atomic-1",
control_id="SEC-042-A01",
title="Verschluesselung (atomar)",
source_citation=None, # Atomic controls don't have source_citation
parent_control_uuid="uuid-reg-1",
severity="high",
category="encryption",
)
# Parent control (has source_citation)
mock_parent_row = MagicMock(
id="uuid-reg-1",
control_id="SEC-042",
title="Verschluesselung personenbezogener Daten",
source_citation={"source": "DSGVO (EU) 2016/679", "article": "Art. 32"},
parent_control_uuid=None,
severity="high",
category="encryption",
)
mock_qdrant_results = [
{
"score": 0.89,
"payload": {
"control_uuid": "uuid-atomic-1",
"control_id": "SEC-042-A01",
"title": "Verschluesselung (atomar)",
},
},
{
"score": 0.65, # Below threshold
"payload": {
"control_uuid": "uuid-reg-2",
"control_id": "SEC-100",
},
},
]
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
db = MagicMock()
mock_session.return_value.__enter__ = MagicMock(return_value=db)
mock_session.return_value.__exit__ = MagicMock(return_value=False)
# Route queries to correct mock data
def side_effect_execute(query, params=None):
result = MagicMock()
query_str = str(query)
result.fetchall.return_value = mock_v1
if "COUNT" in query_str:
result.fetchone.return_value = mock_count
elif "source_citation IS NOT NULL" in query_str:
# Parent lookup
result.fetchone.return_value = mock_parent_row
elif "c.id = CAST" in query_str or "canonical_controls c" in query_str:
# Direct atomic control lookup
result.fetchone.return_value = mock_atomic_row
else:
result.fetchone.return_value = mock_count
return result
db.execute.side_effect = side_effect_execute
with patch("compliance.services.v1_enrichment.get_embedding") as mock_embed, \
patch("compliance.services.v1_enrichment.qdrant_search_cross_regulation") as mock_qdrant:
mock_embed.return_value = [0.1] * 1024
mock_qdrant.return_value = mock_qdrant_results
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=0)
assert result["dry_run"] is False
assert result["processed"] == 1
assert result["matches_inserted"] == 1
assert len(result["sample_matches"]) == 1
assert result["sample_matches"][0]["matched_control_id"] == "SEC-042"
assert result["sample_matches"][0]["similarity_score"] == 0.89
@pytest.mark.asyncio
async def test_empty_batch_returns_done(self):
mock_count = MagicMock(cnt=863)
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
db = MagicMock()
mock_session.return_value.__enter__ = MagicMock(return_value=db)
mock_session.return_value.__exit__ = MagicMock(return_value=False)
db.execute.return_value.fetchall.return_value = []
db.execute.return_value.fetchone.return_value = mock_count
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=9999)
assert result["processed"] == 0
assert "alle v1 Controls verarbeitet" in result["message"]
class TestV1MatchesEndpoint:
"""Test the matches retrieval."""
@pytest.mark.asyncio
async def test_returns_matches(self):
mock_rows = [
MagicMock(
matched_control_id="SEC-042",
matched_title="Verschluesselung",
matched_objective="Daten verschluesseln",
matched_severity="high",
matched_category="encryption",
matched_source="DSGVO (EU) 2016/679",
matched_article="Art. 32",
matched_source_citation={"source": "DSGVO (EU) 2016/679"},
similarity_score=0.89,
match_rank=1,
match_method="embedding",
),
]
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
db = MagicMock()
mock_session.return_value.__enter__ = MagicMock(return_value=db)
mock_session.return_value.__exit__ = MagicMock(return_value=False)
db.execute.return_value.fetchall.return_value = mock_rows
result = await get_v1_matches("uuid-v1-1")
assert len(result) == 1
assert result[0]["matched_control_id"] == "SEC-042"
assert result[0]["similarity_score"] == 0.89
assert result[0]["matched_source"] == "DSGVO (EU) 2016/679"
@pytest.mark.asyncio
async def test_empty_matches(self):
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
db = MagicMock()
mock_session.return_value.__enter__ = MagicMock(return_value=db)
mock_session.return_value.__exit__ = MagicMock(return_value=False)
db.execute.return_value.fetchall.return_value = []
result = await get_v1_matches("uuid-nonexistent")
assert result == []
class TestEigenentwicklungDetection:
"""Verify the Eigenentwicklung detection query."""
@pytest.mark.asyncio
async def test_count_v1_controls(self):
mock_count = MagicMock(cnt=863)
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
db = MagicMock()
mock_session.return_value.__enter__ = MagicMock(return_value=db)
mock_session.return_value.__exit__ = MagicMock(return_value=False)
db.execute.return_value.fetchone.return_value = mock_count
result = await count_v1_controls()
assert result == 863
# Verify the query includes all conditions
call_args = db.execute.call_args[0][0]
query_str = str(call_args)
assert "generation_strategy = 'ungrouped'" in query_str
assert "source_citation IS NULL" in query_str
assert "parent_control_uuid IS NULL" in query_str