feat(pipeline): F1 regulation registry — DB-backed license/source-type lookup
Migrates REGULATION_LICENSE_MAP (135 entries) and SOURCE_REGULATION_CLASSIFICATION (58 entries) from hardcoded Python dicts to compliance.regulation_registry table. - SQL migration: 002_regulation_registry.sql (table + indexes + trigger) - Migration script: f1_migrate_regulation_registry.py (162 rows, --dry-run) - RegulationRegistry cache: 5min TTL, prefix fallback, graceful degradation - control_generator._classify_regulation() delegates to DB with dict fallback - source_type_classification.classify_source_regulation() delegates to DB - 34 new tests (lookup, cache, degradation, migration data consistency) - 421 total tests pass, 0 regressions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -165,21 +165,29 @@ def classify_source_regulation(source_regulation: str) -> str:
|
||||
"""
|
||||
Klassifiziert eine source_regulation als law, guideline oder framework.
|
||||
|
||||
Verwendet exaktes Matching gegen die Map. Bei unbekannten Quellen
|
||||
wird anhand von Schluesselwoertern geraten, Fallback ist 'framework'
|
||||
(konservativstes Ergebnis).
|
||||
Delegates to DB-backed RegulationRegistry (with 5min cache).
|
||||
Falls back to SOURCE_REGULATION_CLASSIFICATION dict + heuristic
|
||||
if DB is unavailable.
|
||||
"""
|
||||
if not source_regulation:
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
|
||||
# Exaktes Match
|
||||
# Try DB-backed registry first
|
||||
try:
|
||||
from services.regulation_registry import classify_source_regulation as _db_classify
|
||||
result = _db_classify(source_regulation)
|
||||
if result:
|
||||
return result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: local dict
|
||||
if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
|
||||
return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
|
||||
|
||||
# Heuristik fuer unbekannte Quellen
|
||||
lower = source_regulation.lower()
|
||||
|
||||
# Gesetze erkennen
|
||||
law_indicators = [
|
||||
"verordnung", "richtlinie", "gesetz", "directive", "regulation",
|
||||
"(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
|
||||
@@ -187,19 +195,16 @@ def classify_source_regulation(source_regulation: str) -> str:
|
||||
if any(ind in lower for ind in law_indicators):
|
||||
return SOURCE_TYPE_LAW
|
||||
|
||||
# Leitlinien erkennen
|
||||
guideline_indicators = [
|
||||
"edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
|
||||
]
|
||||
if any(ind in lower for ind in guideline_indicators):
|
||||
return SOURCE_TYPE_GUIDELINE
|
||||
|
||||
# Frameworks erkennen
|
||||
framework_indicators = [
|
||||
"enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
|
||||
]
|
||||
if any(ind in lower for ind in framework_indicators):
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
|
||||
# Konservativ: unbekannt = framework (geringste Verbindlichkeit)
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
|
||||
Reference in New Issue
Block a user