feat(pipeline): F1 regulation registry — DB-backed license/source-type lookup
Migrates REGULATION_LICENSE_MAP (135 entries) and SOURCE_REGULATION_CLASSIFICATION (58 entries) from hardcoded Python dicts to compliance.regulation_registry table. - SQL migration: 002_regulation_registry.sql (table + indexes + trigger) - Migration script: f1_migrate_regulation_registry.py (162 rows, --dry-run) - RegulationRegistry cache: 5min TTL, prefix fallback, graceful degradation - control_generator._classify_regulation() delegates to DB with dict fallback - source_type_classification.classify_source_regulation() delegates to DB - 34 new tests (lookup, cache, degradation, migration data consistency) - 421 total tests pass, 0 regressions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -33,6 +33,7 @@ from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .rag_client import ComplianceRAGClient, RAGSearchResult, get_rag_client
|
||||
from .regulation_registry import get_registry as _get_regulation_registry
|
||||
from .similarity_detector import check_similarity
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -245,28 +246,21 @@ def _classify_regulation(regulation_code: str) -> dict:
|
||||
|
||||
Returns dict with keys: license, rule, name, source_type.
|
||||
source_type is one of: law, guideline, standard, restricted.
|
||||
|
||||
Delegates to DB-backed RegulationRegistry (with 5min cache).
|
||||
Falls back to REGULATION_LICENSE_MAP if DB is unavailable.
|
||||
"""
|
||||
code = regulation_code.lower().strip()
|
||||
registry = _get_regulation_registry()
|
||||
result = registry.classify_regulation(regulation_code)
|
||||
|
||||
# Exact match first
|
||||
if code in REGULATION_LICENSE_MAP:
|
||||
return REGULATION_LICENSE_MAP[code]
|
||||
# If registry returned the unknown fallback AND we have a local match,
|
||||
# prefer the local dict (graceful degradation during migration)
|
||||
if result.get("license") == "UNKNOWN":
|
||||
code = regulation_code.lower().strip()
|
||||
if code in REGULATION_LICENSE_MAP:
|
||||
return REGULATION_LICENSE_MAP[code]
|
||||
|
||||
# Prefix match for Rule 2 (ENISA = standard)
|
||||
for prefix in _RULE2_PREFIXES:
|
||||
if code.startswith(prefix):
|
||||
return {"license": "CC-BY-4.0", "rule": 2, "source_type": "standard",
|
||||
"name": "ENISA", "attribution": "ENISA, CC BY 4.0"}
|
||||
|
||||
# Prefix match for Rule 3 (BSI/ISO/ETSI = restricted)
|
||||
for prefix in _RULE3_PREFIXES:
|
||||
if code.startswith(prefix):
|
||||
return {"license": f"{prefix.rstrip('_').upper()}_RESTRICTED", "rule": 3,
|
||||
"source_type": "restricted", "name": "INTERNAL_ONLY"}
|
||||
|
||||
# Unknown → treat as restricted (safe default)
|
||||
logger.warning("Unknown regulation_code %r — defaulting to Rule 3 (restricted)", code)
|
||||
return {"license": "UNKNOWN", "rule": 3, "source_type": "restricted", "name": "INTERNAL_ONLY"}
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user