-- Migration 046: Control Generator Pipeline -- Adds job tracking, chunk tracking, blocked sources, and extends canonical_controls -- for the 3-license-rule system (free_use, citation_required, restricted). -- Transaction managed by migration_runner -- ============================================================================= -- 1. Job-Tracking for Generator Runs -- ============================================================================= CREATE TABLE IF NOT EXISTS canonical_generation_jobs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled')), config JSONB NOT NULL, total_chunks_scanned INTEGER DEFAULT 0, controls_generated INTEGER DEFAULT 0, controls_verified INTEGER DEFAULT 0, controls_needs_review INTEGER DEFAULT 0, controls_too_close INTEGER DEFAULT 0, controls_duplicates_found INTEGER DEFAULT 0, errors JSONB DEFAULT '[]', started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW() ); -- ============================================================================= -- 2. Tracking which RAG chunks have been processed -- ============================================================================= CREATE TABLE IF NOT EXISTS canonical_processed_chunks ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), chunk_hash VARCHAR(64) NOT NULL, collection VARCHAR(100) NOT NULL, regulation_code VARCHAR(100), document_version VARCHAR(50), source_license VARCHAR(50), license_rule INTEGER NOT NULL CHECK (license_rule IN (1, 2, 3)), processing_path VARCHAR(20) NOT NULL CHECK (processing_path IN ('structured', 'llm_reform', 'skipped')), generated_control_ids JSONB DEFAULT '[]', job_id UUID REFERENCES canonical_generation_jobs(id), processed_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE (chunk_hash, collection, document_version) ); CREATE INDEX IF NOT EXISTS idx_cpc_collection ON canonical_processed_chunks(collection); CREATE INDEX IF NOT EXISTS idx_cpc_regulation ON canonical_processed_chunks(regulation_code); CREATE INDEX IF NOT EXISTS idx_cpc_job ON canonical_processed_chunks(job_id); -- ============================================================================= -- 3. Blocked Sources (Rule 3 documents to be deleted after generation) -- ============================================================================= CREATE TABLE IF NOT EXISTS canonical_blocked_sources ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), regulation_code VARCHAR(100) NOT NULL UNIQUE, document_title VARCHAR(500) NOT NULL, reason VARCHAR(500) DEFAULT 'Kommerziell nicht nutzbar — darf nicht mit KI verarbeitet werden', deletion_status VARCHAR(20) DEFAULT 'pending' CHECK (deletion_status IN ('pending', 'marked_for_deletion', 'deleted')), qdrant_collection VARCHAR(100), original_file_path TEXT, marked_at TIMESTAMPTZ DEFAULT NOW(), deleted_at TIMESTAMPTZ ); -- ============================================================================= -- 4. Extend canonical_controls: release_state + 3-rule columns -- Safe: only runs if canonical_controls exists -- ============================================================================= DO $$ BEGIN IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN ALTER TABLE canonical_controls DROP CONSTRAINT IF EXISTS canonical_controls_release_state_check; ALTER TABLE canonical_controls ADD CONSTRAINT canonical_controls_release_state_check CHECK (release_state IN ('draft', 'review', 'approved', 'deprecated', 'needs_review', 'too_close', 'duplicate')); ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS license_rule INTEGER DEFAULT NULL; ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_original_text TEXT DEFAULT NULL; ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_citation JSONB DEFAULT NULL; ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS customer_visible BOOLEAN DEFAULT true; ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS generation_metadata JSONB DEFAULT NULL; CREATE INDEX IF NOT EXISTS idx_canonical_controls_license_rule ON canonical_controls(license_rule); CREATE INDEX IF NOT EXISTS idx_canonical_controls_customer_visible ON canonical_controls(customer_visible); END IF; END $$;