Files
breakpilot-compliance/backend-compliance/migrations/046_control_generator.sql
Benjamin Admin 637fab6fdb
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 33s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 22s
CI/CD / test-python-dsms-gateway (push) Successful in 21s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Successful in 2s
fix: migration runner strips BEGIN/COMMIT and guards missing tables
Root cause: migrations 046-047 used explicit BEGIN/COMMIT which
conflicts with psycopg2 implicit transactions, and ALTER TABLE
on canonical_controls fails when the table doesn't exist on
production. This blocked all subsequent migrations (048-053).

Changes:
- migration_runner.py: strip BEGIN/COMMIT from SQL before executing
- 046: wrap canonical_controls ALTER in DO $$ IF EXISTS block
- 047: wrap canonical_controls ALTER in DO $$ IF EXISTS block

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 21:59:10 +01:00

90 lines
4.4 KiB
SQL

-- Migration 046: Control Generator Pipeline
-- Adds job tracking, chunk tracking, blocked sources, and extends canonical_controls
-- for the 3-license-rule system (free_use, citation_required, restricted).
-- Transaction managed by migration_runner
-- =============================================================================
-- 1. Job-Tracking for Generator Runs
-- =============================================================================
CREATE TABLE IF NOT EXISTS canonical_generation_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
status VARCHAR(20) DEFAULT 'pending'
CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled')),
config JSONB NOT NULL,
total_chunks_scanned INTEGER DEFAULT 0,
controls_generated INTEGER DEFAULT 0,
controls_verified INTEGER DEFAULT 0,
controls_needs_review INTEGER DEFAULT 0,
controls_too_close INTEGER DEFAULT 0,
controls_duplicates_found INTEGER DEFAULT 0,
errors JSONB DEFAULT '[]',
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- =============================================================================
-- 2. Tracking which RAG chunks have been processed
-- =============================================================================
CREATE TABLE IF NOT EXISTS canonical_processed_chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chunk_hash VARCHAR(64) NOT NULL,
collection VARCHAR(100) NOT NULL,
regulation_code VARCHAR(100),
document_version VARCHAR(50),
source_license VARCHAR(50),
license_rule INTEGER NOT NULL
CHECK (license_rule IN (1, 2, 3)),
processing_path VARCHAR(20) NOT NULL
CHECK (processing_path IN ('structured', 'llm_reform', 'skipped')),
generated_control_ids JSONB DEFAULT '[]',
job_id UUID REFERENCES canonical_generation_jobs(id),
processed_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE (chunk_hash, collection, document_version)
);
CREATE INDEX IF NOT EXISTS idx_cpc_collection ON canonical_processed_chunks(collection);
CREATE INDEX IF NOT EXISTS idx_cpc_regulation ON canonical_processed_chunks(regulation_code);
CREATE INDEX IF NOT EXISTS idx_cpc_job ON canonical_processed_chunks(job_id);
-- =============================================================================
-- 3. Blocked Sources (Rule 3 documents to be deleted after generation)
-- =============================================================================
CREATE TABLE IF NOT EXISTS canonical_blocked_sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
regulation_code VARCHAR(100) NOT NULL UNIQUE,
document_title VARCHAR(500) NOT NULL,
reason VARCHAR(500) DEFAULT 'Kommerziell nicht nutzbar — darf nicht mit KI verarbeitet werden',
deletion_status VARCHAR(20) DEFAULT 'pending'
CHECK (deletion_status IN ('pending', 'marked_for_deletion', 'deleted')),
qdrant_collection VARCHAR(100),
original_file_path TEXT,
marked_at TIMESTAMPTZ DEFAULT NOW(),
deleted_at TIMESTAMPTZ
);
-- =============================================================================
-- 4. Extend canonical_controls: release_state + 3-rule columns
-- Safe: only runs if canonical_controls exists
-- =============================================================================
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
ALTER TABLE canonical_controls DROP CONSTRAINT IF EXISTS canonical_controls_release_state_check;
ALTER TABLE canonical_controls ADD CONSTRAINT canonical_controls_release_state_check
CHECK (release_state IN ('draft', 'review', 'approved', 'deprecated', 'needs_review', 'too_close', 'duplicate'));
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS license_rule INTEGER DEFAULT NULL;
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_original_text TEXT DEFAULT NULL;
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_citation JSONB DEFAULT NULL;
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS customer_visible BOOLEAN DEFAULT true;
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS generation_metadata JSONB DEFAULT NULL;
CREATE INDEX IF NOT EXISTS idx_canonical_controls_license_rule ON canonical_controls(license_rule);
CREATE INDEX IF NOT EXISTS idx_canonical_controls_customer_visible ON canonical_controls(customer_visible);
END IF;
END $$;