Files
Benjamin Admin 22257a7ed8 feat(pipeline): F5 validation tests — verify DB matches hardcoded dicts
8 tests confirm all REGULATION_LICENSE_MAP, ACTION_TYPES, _NEGATIVE_PATTERNS,
_ACTION_SYNONYMS, and _OBJECT_SYNONYMS entries are correctly migrated to DB.
Dicts kept as fallback for DB-unavailability resilience.

Block F complete: F1-F5 all done.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-05 16:06:59 +02:00

123 lines
4.9 KiB
Python

"""F5 Validation: Verify DB-backed lookups match old hardcoded dicts."""
import pytest
class TestRegulationRegistryConsistency:
"""Ensure all old REGULATION_LICENSE_MAP entries are in the DB."""
def test_all_old_entries_in_db(self):
from services.control_generator import REGULATION_LICENSE_MAP
from scripts.f1_migrate_regulation_registry import build_rows
db_ids = {r["regulation_id"] for r in build_rows()}
for reg_id in REGULATION_LICENSE_MAP:
assert reg_id in db_ids, f"Missing from DB: {reg_id}"
def test_classify_regulation_matches_old(self):
"""DB-backed classify_regulation returns same rule as old dict."""
from services.control_generator import REGULATION_LICENSE_MAP
from services.regulation_registry import RegulationRegistry
from unittest.mock import patch, MagicMock
# Build mock DB with migration data
from scripts.f1_migrate_regulation_registry import build_rows
rows = build_rows()
mock_rows = [
(r["regulation_id"], r["regulation_name_de"], r["license_rule"],
r["license_type"], r.get("attribution"), r["source_type"],
r["jurisdiction"], r["status"])
for r in rows
]
reg = RegulationRegistry()
with patch("services.regulation_registry.SessionLocal") as mock_cls:
mock_session = MagicMock()
mock_result = MagicMock()
mock_result.fetchall.return_value = mock_rows
mock_session.execute.return_value = mock_result
mock_cls.return_value = mock_session
reg._load()
# Compare every entry
mismatches = []
for reg_id, info in REGULATION_LICENSE_MAP.items():
db_result = reg.classify_regulation(reg_id)
if db_result["rule"] != info["rule"]:
mismatches.append(f"{reg_id}: DB rule={db_result['rule']} vs dict rule={info['rule']}")
assert not mismatches, f"Rule mismatches:\n" + "\n".join(mismatches)
class TestActionOntologyConsistency:
"""Ensure all old ACTION_TYPES entries are in the DB."""
def test_all_action_types_migrated(self):
from services.control_ontology import ACTION_TYPES
from scripts.f2_migrate_actions import build_action_types
db_names = {t["canonical_name"] for t in build_action_types()}
for action in ACTION_TYPES:
assert action in db_names, f"Missing action_type: {action}"
def test_all_aliases_migrated(self):
from services.control_ontology import ACTION_TYPES
from scripts.f2_migrate_actions import build_action_synonyms
db_synonyms = {s["synonym"] for s in build_action_synonyms() if s["pattern_type"] == "alias"}
missing = []
for action, info in ACTION_TYPES.items():
for alias in info.get("aliases", []):
if alias.lower() not in db_synonyms:
missing.append(f"{action}: {alias}")
assert not missing, f"Missing aliases:\n" + "\n".join(missing)
def test_all_negative_patterns_migrated(self):
from services.control_ontology import _NEGATIVE_PATTERNS
from scripts.f2_migrate_actions import build_action_synonyms
db_patterns = {s["synonym"] for s in build_action_synonyms() if s["pattern_type"] == "negative_pattern"}
for pattern, _ in _NEGATIVE_PATTERNS:
assert pattern.lower() in db_patterns, f"Missing negative pattern: {pattern}"
class TestObjectSynonymsConsistency:
"""Ensure all old _OBJECT_SYNONYMS are in the DB."""
def test_all_objects_migrated(self):
from services.control_dedup import _OBJECT_SYNONYMS
from scripts.f3_migrate_objects import build_rows
db_synonyms = {r["synonym"] for r in build_rows()}
missing = []
for syn in _OBJECT_SYNONYMS:
if syn.lower() not in db_synonyms:
missing.append(syn)
assert not missing, f"Missing object synonyms:\n" + "\n".join(missing)
class TestLLMEnrichmentQuality:
"""Basic quality checks on LLM-generated synonyms."""
def test_no_empty_synonyms_in_db(self):
"""All synonyms should have content."""
from scripts.f2_migrate_actions import build_action_synonyms
for s in build_action_synonyms():
assert len(s["synonym"].strip()) >= 2, f"Too short: {s}"
def test_no_duplicate_canonical_in_actions(self):
"""Each synonym should map to exactly one canonical action."""
from scripts.f2_migrate_actions import build_action_synonyms
synonyms = build_action_synonyms()
seen = {}
for s in synonyms:
key = (s["synonym"], s["language"], s["pattern_type"])
if key in seen:
assert seen[key] == s["canonical_action"], (
f"Duplicate synonym '{s['synonym']}' maps to both "
f"'{seen[key]}' and '{s['canonical_action']}'"
)
seen[key] = s["canonical_action"]