All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 33s
CI/CD / test-python-backend-compliance (push) Successful in 30s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 16s
CI/CD / validate-canonical-controls (push) Successful in 9s
CI/CD / Deploy (push) Successful in 1s
Die atomic_controls_dedup Collection (51k Punkte) enthaelt nur atomare Controls ohne source_citation. Jetzt wird der Parent-Control aufgeloest, der die Rechtsgrundlage traegt. Deduplizierung nach Parent-UUID verhindert mehrfache Eintraege fuer die gleiche Regulation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
235 lines
8.8 KiB
Python
235 lines
8.8 KiB
Python
"""Tests for V1 Control Enrichment (Eigenentwicklung matching)."""
|
|
import sys
|
|
sys.path.insert(0, ".")
|
|
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
from compliance.services.v1_enrichment import (
|
|
enrich_v1_matches,
|
|
get_v1_matches,
|
|
count_v1_controls,
|
|
)
|
|
|
|
|
|
class TestV1EnrichmentDryRun:
|
|
"""Dry-run mode should return statistics without touching DB."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dry_run_returns_stats(self):
|
|
mock_v1 = [
|
|
MagicMock(
|
|
id="uuid-v1-1",
|
|
control_id="ACC-013",
|
|
title="Zugriffskontrolle",
|
|
objective="Zugriff einschraenken",
|
|
category="access",
|
|
),
|
|
MagicMock(
|
|
id="uuid-v1-2",
|
|
control_id="SEC-005",
|
|
title="Verschluesselung",
|
|
objective="Daten verschluesseln",
|
|
category="encryption",
|
|
),
|
|
]
|
|
|
|
mock_count = MagicMock(cnt=863)
|
|
|
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
|
db = MagicMock()
|
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
|
# First call: v1 controls, second call: count
|
|
db.execute.return_value.fetchall.return_value = mock_v1
|
|
db.execute.return_value.fetchone.return_value = mock_count
|
|
|
|
result = await enrich_v1_matches(dry_run=True, batch_size=100, offset=0)
|
|
|
|
assert result["dry_run"] is True
|
|
assert result["total_v1"] == 863
|
|
assert len(result["sample_controls"]) == 2
|
|
assert result["sample_controls"][0]["control_id"] == "ACC-013"
|
|
|
|
|
|
class TestV1EnrichmentExecution:
|
|
"""Execution mode should find matches and insert them."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_processes_and_inserts_matches(self):
|
|
mock_v1 = [
|
|
MagicMock(
|
|
id="uuid-v1-1",
|
|
control_id="ACC-013",
|
|
title="Zugriffskontrolle",
|
|
objective="Zugriff auf Systeme einschraenken",
|
|
category="access",
|
|
),
|
|
]
|
|
|
|
mock_count = MagicMock(cnt=1)
|
|
|
|
# Atomic control found in Qdrant (has parent, no source_citation)
|
|
mock_atomic_row = MagicMock(
|
|
id="uuid-atomic-1",
|
|
control_id="SEC-042-A01",
|
|
title="Verschluesselung (atomar)",
|
|
source_citation=None, # Atomic controls don't have source_citation
|
|
parent_control_uuid="uuid-reg-1",
|
|
severity="high",
|
|
category="encryption",
|
|
)
|
|
# Parent control (has source_citation)
|
|
mock_parent_row = MagicMock(
|
|
id="uuid-reg-1",
|
|
control_id="SEC-042",
|
|
title="Verschluesselung personenbezogener Daten",
|
|
source_citation={"source": "DSGVO (EU) 2016/679", "article": "Art. 32"},
|
|
parent_control_uuid=None,
|
|
severity="high",
|
|
category="encryption",
|
|
)
|
|
|
|
mock_qdrant_results = [
|
|
{
|
|
"score": 0.89,
|
|
"payload": {
|
|
"control_uuid": "uuid-atomic-1",
|
|
"control_id": "SEC-042-A01",
|
|
"title": "Verschluesselung (atomar)",
|
|
},
|
|
},
|
|
{
|
|
"score": 0.65, # Below threshold
|
|
"payload": {
|
|
"control_uuid": "uuid-reg-2",
|
|
"control_id": "SEC-100",
|
|
},
|
|
},
|
|
]
|
|
|
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
|
db = MagicMock()
|
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
|
|
|
# Route queries to correct mock data
|
|
def side_effect_execute(query, params=None):
|
|
result = MagicMock()
|
|
query_str = str(query)
|
|
result.fetchall.return_value = mock_v1
|
|
if "COUNT" in query_str:
|
|
result.fetchone.return_value = mock_count
|
|
elif "source_citation IS NOT NULL" in query_str:
|
|
# Parent lookup
|
|
result.fetchone.return_value = mock_parent_row
|
|
elif "c.id = CAST" in query_str or "canonical_controls c" in query_str:
|
|
# Direct atomic control lookup
|
|
result.fetchone.return_value = mock_atomic_row
|
|
else:
|
|
result.fetchone.return_value = mock_count
|
|
return result
|
|
|
|
db.execute.side_effect = side_effect_execute
|
|
|
|
with patch("compliance.services.v1_enrichment.get_embedding") as mock_embed, \
|
|
patch("compliance.services.v1_enrichment.qdrant_search_cross_regulation") as mock_qdrant:
|
|
mock_embed.return_value = [0.1] * 1024
|
|
mock_qdrant.return_value = mock_qdrant_results
|
|
|
|
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=0)
|
|
|
|
assert result["dry_run"] is False
|
|
assert result["processed"] == 1
|
|
assert result["matches_inserted"] == 1
|
|
assert len(result["sample_matches"]) == 1
|
|
assert result["sample_matches"][0]["matched_control_id"] == "SEC-042"
|
|
assert result["sample_matches"][0]["similarity_score"] == 0.89
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_batch_returns_done(self):
|
|
mock_count = MagicMock(cnt=863)
|
|
|
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
|
db = MagicMock()
|
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
|
db.execute.return_value.fetchall.return_value = []
|
|
db.execute.return_value.fetchone.return_value = mock_count
|
|
|
|
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=9999)
|
|
|
|
assert result["processed"] == 0
|
|
assert "alle v1 Controls verarbeitet" in result["message"]
|
|
|
|
|
|
class TestV1MatchesEndpoint:
|
|
"""Test the matches retrieval."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_returns_matches(self):
|
|
mock_rows = [
|
|
MagicMock(
|
|
matched_control_id="SEC-042",
|
|
matched_title="Verschluesselung",
|
|
matched_objective="Daten verschluesseln",
|
|
matched_severity="high",
|
|
matched_category="encryption",
|
|
matched_source="DSGVO (EU) 2016/679",
|
|
matched_article="Art. 32",
|
|
matched_source_citation={"source": "DSGVO (EU) 2016/679"},
|
|
similarity_score=0.89,
|
|
match_rank=1,
|
|
match_method="embedding",
|
|
),
|
|
]
|
|
|
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
|
db = MagicMock()
|
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
|
db.execute.return_value.fetchall.return_value = mock_rows
|
|
|
|
result = await get_v1_matches("uuid-v1-1")
|
|
|
|
assert len(result) == 1
|
|
assert result[0]["matched_control_id"] == "SEC-042"
|
|
assert result[0]["similarity_score"] == 0.89
|
|
assert result[0]["matched_source"] == "DSGVO (EU) 2016/679"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_matches(self):
|
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
|
db = MagicMock()
|
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
|
db.execute.return_value.fetchall.return_value = []
|
|
|
|
result = await get_v1_matches("uuid-nonexistent")
|
|
|
|
assert result == []
|
|
|
|
|
|
class TestEigenentwicklungDetection:
|
|
"""Verify the Eigenentwicklung detection query."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_count_v1_controls(self):
|
|
mock_count = MagicMock(cnt=863)
|
|
|
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
|
db = MagicMock()
|
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
|
db.execute.return_value.fetchone.return_value = mock_count
|
|
|
|
result = await count_v1_controls()
|
|
|
|
assert result == 863
|
|
# Verify the query includes all conditions
|
|
call_args = db.execute.call_args[0][0]
|
|
query_str = str(call_args)
|
|
assert "generation_strategy = 'ungrouped'" in query_str
|
|
assert "source_citation IS NULL" in query_str
|
|
assert "parent_control_uuid IS NULL" in query_str
|