feat(pipeline): F1 regulation registry — DB-backed license/source-type lookup

Migrates REGULATION_LICENSE_MAP (135 entries) and SOURCE_REGULATION_CLASSIFICATION
(58 entries) from hardcoded Python dicts to compliance.regulation_registry table.

- SQL migration: 002_regulation_registry.sql (table + indexes + trigger)
- Migration script: f1_migrate_regulation_registry.py (162 rows, --dry-run)
- RegulationRegistry cache: 5min TTL, prefix fallback, graceful degradation
- control_generator._classify_regulation() delegates to DB with dict fallback
- source_type_classification.classify_source_regulation() delegates to DB
- 34 new tests (lookup, cache, degradation, migration data consistency)
- 421 total tests pass, 0 regressions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-03 23:14:06 +02:00
parent 4fd2bfefcd
commit 9437e029d0
7 changed files with 850 additions and 30 deletions
@@ -0,0 +1,72 @@
-- Migration 002: Regulation Registry (Block F1)
-- Schema: compliance
-- Run: ssh macmini "docker exec -i bp-core-postgres psql -U breakpilot -d breakpilot_db" < control-pipeline/migrations/002_regulation_registry.sql
SET search_path TO compliance, public;
-- ========================================
-- regulation_registry
-- ========================================
-- Central registry for all regulations, laws, guidelines, and frameworks
-- referenced by the control pipeline. Replaces hardcoded Python dicts
-- (REGULATION_LICENSE_MAP, SOURCE_REGULATION_CLASSIFICATION).
CREATE TABLE IF NOT EXISTS regulation_registry (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- regulation_id: machine key (e.g. "eu_2016_679", "nist_sp_800_53")
regulation_id VARCHAR(100) UNIQUE NOT NULL,
-- Display names
regulation_name_de TEXT,
regulation_name_en TEXT,
regulation_short VARCHAR(50),
-- License classification (3-rule system)
license_rule INTEGER NOT NULL DEFAULT 1
CHECK (license_rule IN (1, 2, 3)),
license_type VARCHAR(50), -- EU_LAW, DE_LAW, CC-BY-SA-4.0, etc.
attribution TEXT, -- Required for Rule 2 (CC-BY)
-- Source classification
source_type VARCHAR(20) NOT NULL DEFAULT 'law'
CHECK (source_type IN ('law', 'guideline', 'standard', 'framework', 'restricted')),
-- Metadata
jurisdiction VARCHAR(10), -- DE, EU, AT, CH, US, FR, ES, NL, IT, HU, INT
category VARCHAR(50),
celex VARCHAR(30), -- EU CELEX number if applicable
url TEXT,
-- Lifecycle
status VARCHAR(20) NOT NULL DEFAULT 'active'
CHECK (status IN ('active', 'needs_review', 'deprecated')),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_reg_registry_status
ON regulation_registry(status);
CREATE INDEX IF NOT EXISTS idx_reg_registry_jurisdiction
ON regulation_registry(jurisdiction);
CREATE INDEX IF NOT EXISTS idx_reg_registry_source_type
ON regulation_registry(source_type);
CREATE INDEX IF NOT EXISTS idx_reg_registry_license_rule
ON regulation_registry(license_rule);
-- Updated-at trigger
CREATE OR REPLACE FUNCTION update_regulation_registry_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trg_regulation_registry_updated_at ON regulation_registry;
CREATE TRIGGER trg_regulation_registry_updated_at
BEFORE UPDATE ON regulation_registry
FOR EACH ROW
EXECUTE FUNCTION update_regulation_registry_updated_at();