feat(decomposition): add merge pass, enrichment, and Pass 0b refinements
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 51s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 23s
CI/CD / test-python-dsms-gateway (push) Successful in 20s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 51s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 23s
CI/CD / test-python-dsms-gateway (push) Successful in 20s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Add obligation refinement pipeline between Pass 0a and 0b: - Merge pass: rule-based dedup of implementation-level duplicate obligations within the same parent control (Jaccard similarity on action+object) - Enrich pass: classify trigger_type (event/periodic/continuous) and detect is_implementation_specific from obligation text (regex-based, no LLM) - Pass 0b: skip merged obligations, cap severity for impl-specific, override category to 'testing' for test obligations - Migration 075: merged_into_id, trigger_type, is_implementation_specific - Two new API endpoints: merge-obligations, enrich-obligations - 30+ new tests (122 total, all passing) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -49,6 +49,10 @@ from compliance.services.decomposition_pass import (
|
||||
_PASS0A_SYSTEM_PROMPT,
|
||||
_PASS0B_SYSTEM_PROMPT,
|
||||
DecompositionPass,
|
||||
_classify_trigger_type,
|
||||
_is_implementation_specific_text,
|
||||
_text_similar,
|
||||
_is_more_implementation_specific,
|
||||
)
|
||||
|
||||
|
||||
@@ -757,6 +761,7 @@ class TestDecompositionPassRun0b:
|
||||
"Service Continuity", "finance",
|
||||
'{"source": "MiCA", "article": "Art. 8"}',
|
||||
"high", "FIN-001",
|
||||
"continuous", False, # trigger_type, is_implementation_specific
|
||||
),
|
||||
]
|
||||
|
||||
@@ -809,6 +814,7 @@ class TestDecompositionPassRun0b:
|
||||
False, False,
|
||||
"Auth Controls", "authentication",
|
||||
"", "high", "AUTH-001",
|
||||
"continuous", False,
|
||||
),
|
||||
]
|
||||
|
||||
@@ -842,7 +848,8 @@ class TestDecompositionStatus:
|
||||
def test_returns_status(self):
|
||||
mock_db = MagicMock()
|
||||
mock_result = MagicMock()
|
||||
mock_result.fetchone.return_value = (5000, 1000, 3000, 2500, 200, 2000, 1800)
|
||||
# 9 columns: rich, decomposed, total, validated, rejected, composed, atomic, merged, enriched
|
||||
mock_result.fetchone.return_value = (5000, 1000, 3000, 2500, 200, 2000, 1800, 100, 2400)
|
||||
mock_db.execute.return_value = mock_result
|
||||
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
@@ -855,13 +862,17 @@ class TestDecompositionStatus:
|
||||
assert status["rejected"] == 200
|
||||
assert status["composed"] == 2000
|
||||
assert status["atomic_controls"] == 1800
|
||||
assert status["merged"] == 100
|
||||
assert status["enriched"] == 2400
|
||||
assert status["ready_for_pass0b"] == 2400 # 2500 validated - 100 merged
|
||||
assert status["decomposition_pct"] == 20.0
|
||||
assert status["composition_pct"] == 80.0
|
||||
# composition_pct: 2000 composed / 2400 ready_for_pass0b
|
||||
assert status["composition_pct"] == 83.3
|
||||
|
||||
def test_handles_zero_division(self):
|
||||
mock_db = MagicMock()
|
||||
mock_result = MagicMock()
|
||||
mock_result.fetchone.return_value = (0, 0, 0, 0, 0, 0, 0)
|
||||
mock_result.fetchone.return_value = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
mock_db.execute.return_value = mock_result
|
||||
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
@@ -1089,12 +1100,14 @@ class TestDecompositionPassAnthropicBatch:
|
||||
"MFA implementieren", "implementieren", "MFA",
|
||||
False, False, "Auth", "security",
|
||||
'{"source": "DSGVO", "article": "Art. 32"}',
|
||||
"high", "CTRL-001"),
|
||||
"high", "CTRL-001",
|
||||
"continuous", False),
|
||||
("oc-uuid-2", "OC-CTRL-001-02", "parent-uuid-1",
|
||||
"MFA testen", "testen", "MFA",
|
||||
True, False, "Auth", "security",
|
||||
'{"source": "DSGVO", "article": "Art. 32"}',
|
||||
"high", "CTRL-001"),
|
||||
"high", "CTRL-001",
|
||||
"periodic", False),
|
||||
]
|
||||
|
||||
mock_seq = MagicMock()
|
||||
@@ -1232,3 +1245,441 @@ class TestSourceFilter:
|
||||
query_str = str(call_args[0][0])
|
||||
assert "IN :cats" in query_str
|
||||
assert "ILIKE" in query_str
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TRIGGER TYPE CLASSIFICATION TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClassifyTriggerType:
|
||||
"""Tests for _classify_trigger_type helper."""
|
||||
|
||||
def test_event_trigger_vorfall(self):
|
||||
assert _classify_trigger_type(
|
||||
"Bei einem Sicherheitsvorfall muss gemeldet werden", ""
|
||||
) == "event"
|
||||
|
||||
def test_event_trigger_condition_field(self):
|
||||
assert _classify_trigger_type(
|
||||
"Melden", "wenn ein Datenverlust festgestellt wird"
|
||||
) == "event"
|
||||
|
||||
def test_event_trigger_breach(self):
|
||||
assert _classify_trigger_type(
|
||||
"In case of a data breach, notify authorities", ""
|
||||
) == "event"
|
||||
|
||||
def test_periodic_trigger_jaehrlich(self):
|
||||
assert _classify_trigger_type(
|
||||
"Jährlich ist eine Überprüfung durchzuführen", ""
|
||||
) == "periodic"
|
||||
|
||||
def test_periodic_trigger_regelmaessig(self):
|
||||
assert _classify_trigger_type(
|
||||
"Regelmäßig muss ein Audit stattfinden", ""
|
||||
) == "periodic"
|
||||
|
||||
def test_periodic_trigger_quarterly(self):
|
||||
assert _classify_trigger_type(
|
||||
"Quarterly review of access controls", ""
|
||||
) == "periodic"
|
||||
|
||||
def test_continuous_default(self):
|
||||
assert _classify_trigger_type(
|
||||
"Betreiber müssen Zugangskontrollen implementieren", ""
|
||||
) == "continuous"
|
||||
|
||||
def test_continuous_empty_text(self):
|
||||
assert _classify_trigger_type("", "") == "continuous"
|
||||
|
||||
def test_event_takes_precedence_over_periodic(self):
|
||||
# "Vorfall" + "regelmäßig" → event wins
|
||||
assert _classify_trigger_type(
|
||||
"Bei einem Vorfall ist regelmäßig zu prüfen", ""
|
||||
) == "event"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# IMPLEMENTATION-SPECIFIC DETECTION TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsImplementationSpecific:
|
||||
"""Tests for _is_implementation_specific_text helper."""
|
||||
|
||||
def test_tls_is_implementation_specific(self):
|
||||
assert _is_implementation_specific_text(
|
||||
"Verschlüsselung mittels TLS 1.3 sicherstellen",
|
||||
"sicherstellen", "Verschlüsselung"
|
||||
)
|
||||
|
||||
def test_mfa_is_implementation_specific(self):
|
||||
assert _is_implementation_specific_text(
|
||||
"MFA muss für alle Konten aktiviert werden",
|
||||
"aktivieren", "MFA"
|
||||
)
|
||||
|
||||
def test_siem_is_implementation_specific(self):
|
||||
assert _is_implementation_specific_text(
|
||||
"Ein SIEM-System muss betrieben werden",
|
||||
"betreiben", "SIEM-System"
|
||||
)
|
||||
|
||||
def test_abstract_obligation_not_specific(self):
|
||||
assert not _is_implementation_specific_text(
|
||||
"Zugriffskontrollen müssen implementiert werden",
|
||||
"implementieren", "Zugriffskontrollen"
|
||||
)
|
||||
|
||||
def test_generic_encryption_not_specific(self):
|
||||
assert not _is_implementation_specific_text(
|
||||
"Daten müssen verschlüsselt gespeichert werden",
|
||||
"verschlüsseln", "Daten"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TEXT SIMILARITY TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTextSimilar:
|
||||
"""Tests for _text_similar Jaccard helper."""
|
||||
|
||||
def test_identical_strings(self):
|
||||
assert _text_similar("implementieren mfa", "implementieren mfa")
|
||||
|
||||
def test_similar_strings(self):
|
||||
assert _text_similar(
|
||||
"implementieren zugangskontrolle",
|
||||
"implementieren zugangskontrolle system",
|
||||
threshold=0.60,
|
||||
)
|
||||
|
||||
def test_different_strings(self):
|
||||
assert not _text_similar(
|
||||
"implementieren mfa",
|
||||
"dokumentieren audit",
|
||||
threshold=0.75,
|
||||
)
|
||||
|
||||
def test_empty_string(self):
|
||||
assert not _text_similar("", "something")
|
||||
|
||||
def test_both_empty(self):
|
||||
assert not _text_similar("", "")
|
||||
|
||||
|
||||
class TestIsMoreImplementationSpecific:
|
||||
"""Tests for _is_more_implementation_specific."""
|
||||
|
||||
def test_concrete_vs_abstract(self):
|
||||
concrete = "SMS-Versand muss über TLS verschlüsselt werden"
|
||||
abstract = "Kommunikation muss verschlüsselt werden"
|
||||
assert _is_more_implementation_specific(concrete, abstract)
|
||||
|
||||
def test_abstract_vs_concrete(self):
|
||||
concrete = "Firewall-Regeln müssen konfiguriert werden"
|
||||
abstract = "Netzwerksicherheit muss gewährleistet werden"
|
||||
assert not _is_more_implementation_specific(abstract, concrete)
|
||||
|
||||
def test_equal_specificity_longer_wins(self):
|
||||
a = "Zugriffskontrollen müssen implementiert werden und dokumentiert werden"
|
||||
b = "Zugriffskontrollen implementieren"
|
||||
assert _is_more_implementation_specific(a, b)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MERGE PASS TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMergePass:
|
||||
"""Tests for DecompositionPass.run_merge_pass."""
|
||||
|
||||
def test_merge_pass_merges_similar_obligations(self):
|
||||
mock_db = MagicMock()
|
||||
|
||||
# Step 1: Parents with >1 validated obligation
|
||||
mock_parents = MagicMock()
|
||||
mock_parents.fetchall.return_value = [
|
||||
("parent-uuid-1", 3),
|
||||
]
|
||||
|
||||
# Step 2: Obligations for that parent
|
||||
mock_obligs = MagicMock()
|
||||
mock_obligs.fetchall.return_value = [
|
||||
("obl-1", "OC-001-01",
|
||||
"Betreiber müssen Verschlüsselung implementieren",
|
||||
"implementieren", "verschlüsselung"),
|
||||
("obl-2", "OC-001-02",
|
||||
"Betreiber müssen Verschlüsselung mittels TLS implementieren",
|
||||
"implementieren", "verschlüsselung"),
|
||||
("obl-3", "OC-001-03",
|
||||
"Betreiber müssen Zugriffsprotokolle führen",
|
||||
"führen", "zugriffsprotokolle"),
|
||||
]
|
||||
|
||||
# Step 3: Final count
|
||||
mock_count = MagicMock()
|
||||
mock_count.fetchone.return_value = (2,)
|
||||
|
||||
call_count = [0]
|
||||
def side_effect(*args, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return mock_parents
|
||||
if call_count[0] == 2:
|
||||
return mock_obligs
|
||||
if call_count[0] == 3:
|
||||
return MagicMock() # UPDATE
|
||||
if call_count[0] == 4:
|
||||
return mock_count # Final count
|
||||
return MagicMock()
|
||||
mock_db.execute.side_effect = side_effect
|
||||
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
stats = decomp.run_merge_pass()
|
||||
|
||||
assert stats["parents_checked"] == 1
|
||||
assert stats["obligations_merged"] == 1 # obl-2 merged into obl-1
|
||||
assert stats["obligations_kept"] == 2
|
||||
|
||||
def test_merge_pass_no_merge_when_different_actions(self):
|
||||
mock_db = MagicMock()
|
||||
|
||||
mock_parents = MagicMock()
|
||||
mock_parents.fetchall.return_value = [
|
||||
("parent-uuid-1", 2),
|
||||
]
|
||||
|
||||
mock_obligs = MagicMock()
|
||||
mock_obligs.fetchall.return_value = [
|
||||
("obl-1", "OC-001-01",
|
||||
"Verschlüsselung implementieren",
|
||||
"implementieren", "verschlüsselung"),
|
||||
("obl-2", "OC-001-02",
|
||||
"Zugriffsprotokolle dokumentieren",
|
||||
"dokumentieren", "zugriffsprotokolle"),
|
||||
]
|
||||
|
||||
mock_count = MagicMock()
|
||||
mock_count.fetchone.return_value = (2,)
|
||||
|
||||
call_count = [0]
|
||||
def side_effect(*args, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return mock_parents
|
||||
if call_count[0] == 2:
|
||||
return mock_obligs
|
||||
if call_count[0] == 3:
|
||||
return mock_count
|
||||
return MagicMock()
|
||||
mock_db.execute.side_effect = side_effect
|
||||
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
stats = decomp.run_merge_pass()
|
||||
|
||||
assert stats["obligations_merged"] == 0
|
||||
assert stats["obligations_kept"] == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ENRICH PASS TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEnrichPass:
|
||||
"""Tests for DecompositionPass.enrich_obligations."""
|
||||
|
||||
def test_enrich_classifies_trigger_types(self):
|
||||
mock_db = MagicMock()
|
||||
|
||||
mock_obligs = MagicMock()
|
||||
mock_obligs.fetchall.return_value = [
|
||||
("obl-1", "Bei Vorfall melden", "Sicherheitsvorfall",
|
||||
"melden", "Vorfall"),
|
||||
("obl-2", "Jährlich Audit durchführen", "",
|
||||
"durchführen", "Audit"),
|
||||
("obl-3", "Verschlüsselung mittels TLS implementieren", "",
|
||||
"implementieren", "Verschlüsselung"),
|
||||
]
|
||||
|
||||
call_count = [0]
|
||||
def side_effect(*args, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return mock_obligs
|
||||
return MagicMock() # UPDATE statements
|
||||
mock_db.execute.side_effect = side_effect
|
||||
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
stats = decomp.enrich_obligations()
|
||||
|
||||
assert stats["enriched"] == 3
|
||||
assert stats["trigger_event"] == 1
|
||||
assert stats["trigger_periodic"] == 1
|
||||
assert stats["trigger_continuous"] == 1
|
||||
assert stats["implementation_specific"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MIGRATION 075 TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMigration075:
|
||||
"""Tests for migration 075 SQL file."""
|
||||
|
||||
def test_migration_file_exists(self):
|
||||
from pathlib import Path
|
||||
migration = Path(__file__).parent.parent / "migrations" / "075_obligation_refinement.sql"
|
||||
assert migration.exists(), "Migration 075 file missing"
|
||||
|
||||
def test_migration_contains_required_fields(self):
|
||||
from pathlib import Path
|
||||
migration = Path(__file__).parent.parent / "migrations" / "075_obligation_refinement.sql"
|
||||
content = migration.read_text()
|
||||
assert "merged_into_id" in content
|
||||
assert "trigger_type" in content
|
||||
assert "is_implementation_specific" in content
|
||||
assert "'merged'" in content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PASS 0B ENRICHMENT INTEGRATION TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPass0bWithEnrichment:
|
||||
"""Tests that Pass 0b uses enrichment metadata correctly."""
|
||||
|
||||
def test_pass0b_query_skips_merged(self):
|
||||
"""Verify Pass 0b query includes merged_into_id IS NULL filter."""
|
||||
mock_db = MagicMock()
|
||||
mock_rows = MagicMock()
|
||||
mock_rows.fetchall.return_value = []
|
||||
mock_db.execute.return_value = mock_rows
|
||||
|
||||
import asyncio
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
stats = asyncio.get_event_loop().run_until_complete(
|
||||
decomp.run_pass0b(limit=10, use_anthropic=True)
|
||||
)
|
||||
|
||||
call_args = mock_db.execute.call_args_list[0]
|
||||
query_str = str(call_args[0][0])
|
||||
assert "merged_into_id IS NULL" in query_str
|
||||
|
||||
def test_severity_capped_for_implementation_specific(self):
|
||||
"""Implementation-specific obligations get max severity=medium."""
|
||||
obl = {
|
||||
"oc_id": "oc-1",
|
||||
"candidate_id": "OC-001-01",
|
||||
"parent_uuid": "p-uuid",
|
||||
"obligation_text": "TLS implementieren",
|
||||
"action": "implementieren",
|
||||
"object": "TLS",
|
||||
"is_test": False,
|
||||
"is_reporting": False,
|
||||
"parent_title": "Encryption",
|
||||
"parent_category": "security",
|
||||
"parent_citation": "",
|
||||
"parent_severity": "high",
|
||||
"parent_control_id": "SEC-001",
|
||||
"source_ref": "",
|
||||
"trigger_type": "continuous",
|
||||
"is_implementation_specific": True,
|
||||
}
|
||||
parsed = {
|
||||
"title": "TLS implementieren",
|
||||
"objective": "TLS für alle Verbindungen",
|
||||
"requirements": ["TLS 1.3"],
|
||||
"test_procedure": ["Scan"],
|
||||
"evidence": ["Zertifikat"],
|
||||
"severity": "critical",
|
||||
"category": "security",
|
||||
}
|
||||
stats = {"controls_created": 0, "candidates_processed": 0,
|
||||
"llm_failures": 0, "dedup_linked": 0, "dedup_review": 0}
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_seq = MagicMock()
|
||||
mock_seq.fetchone.return_value = (0,)
|
||||
|
||||
call_count = [0]
|
||||
def side_effect(*args, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return mock_seq # _next_atomic_seq
|
||||
return MagicMock()
|
||||
mock_db.execute.side_effect = side_effect
|
||||
|
||||
import asyncio
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
decomp._process_pass0b_control(obl, parsed, stats)
|
||||
)
|
||||
|
||||
# _write_atomic_control is call #2: db.execute(text(...), {params})
|
||||
insert_call = mock_db.execute.call_args_list[1]
|
||||
# positional args: (text_obj, params_dict)
|
||||
insert_params = insert_call[0][1]
|
||||
assert insert_params["severity"] == "medium"
|
||||
|
||||
def test_test_obligation_gets_testing_category(self):
|
||||
"""Test obligations should get category='testing'."""
|
||||
obl = {
|
||||
"oc_id": "oc-1",
|
||||
"candidate_id": "OC-001-01",
|
||||
"parent_uuid": "p-uuid",
|
||||
"obligation_text": "MFA testen",
|
||||
"action": "testen",
|
||||
"object": "MFA",
|
||||
"is_test": True,
|
||||
"is_reporting": False,
|
||||
"parent_title": "Auth",
|
||||
"parent_category": "security",
|
||||
"parent_citation": "",
|
||||
"parent_severity": "high",
|
||||
"parent_control_id": "AUTH-001",
|
||||
"source_ref": "",
|
||||
"trigger_type": "periodic",
|
||||
"is_implementation_specific": False,
|
||||
}
|
||||
parsed = {
|
||||
"title": "MFA-Wirksamkeit testen",
|
||||
"objective": "Regelmäßig MFA testen",
|
||||
"requirements": ["Testplan"],
|
||||
"test_procedure": ["Durchführung"],
|
||||
"evidence": ["Protokoll"],
|
||||
"severity": "high",
|
||||
"category": "security", # LLM says security
|
||||
}
|
||||
stats = {"controls_created": 0, "candidates_processed": 0,
|
||||
"llm_failures": 0, "dedup_linked": 0, "dedup_review": 0}
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_seq = MagicMock()
|
||||
mock_seq.fetchone.return_value = (0,)
|
||||
|
||||
call_count = [0]
|
||||
def side_effect(*args, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
return mock_seq
|
||||
return MagicMock()
|
||||
mock_db.execute.side_effect = side_effect
|
||||
|
||||
import asyncio
|
||||
decomp = DecompositionPass(db=mock_db)
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
decomp._process_pass0b_control(obl, parsed, stats)
|
||||
)
|
||||
|
||||
# _write_atomic_control is call #2: db.execute(text(...), {params})
|
||||
insert_call = mock_db.execute.call_args_list[1]
|
||||
insert_params = insert_call[0][1]
|
||||
assert insert_params["category"] == "testing"
|
||||
|
||||
Reference in New Issue
Block a user