-- Migration 074: Control Dedup Engine — DB Schema -- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b). -- -- Tables: -- 1. control_parent_links — M:N parent linking (one control → many regulations) -- 2. control_dedup_reviews — Review queue for borderline matches (0.85-0.92) BEGIN; -- ============================================================================= -- 1. Control Parent Links (M:N) -- Enables "1 Control erfuellt 5 Gesetze" — the biggest USP. -- An atomic control can have multiple parent controls from different -- regulations/obligations. This replaces the 1:1 parent_control_uuid FK. -- ============================================================================= CREATE TABLE IF NOT EXISTS control_parent_links ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE, parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE, link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition' CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')), confidence NUMERIC(3,2) DEFAULT 1.0 CHECK (confidence >= 0 AND confidence <= 1), source_regulation VARCHAR(100), source_article VARCHAR(100), obligation_candidate_id UUID REFERENCES obligation_candidates(id), created_at TIMESTAMPTZ DEFAULT NOW(), CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid) ); CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid); CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid); CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type); COMMENT ON TABLE control_parent_links IS 'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"'; -- ============================================================================= -- 2. Control Dedup Reviews -- Queue for borderline matches (similarity 0.85-0.92) that need human review. -- Reviewed entries get status updated to accepted/rejected. -- ============================================================================= CREATE TABLE IF NOT EXISTS control_dedup_reviews ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), candidate_control_id VARCHAR(30) NOT NULL, candidate_title TEXT NOT NULL, candidate_objective TEXT, matched_control_uuid UUID REFERENCES canonical_controls(id), matched_control_id VARCHAR(30), similarity_score NUMERIC(4,3) DEFAULT 0.0, dedup_stage VARCHAR(40) NOT NULL, dedup_details JSONB DEFAULT '{}', parent_control_uuid UUID REFERENCES canonical_controls(id), obligation_candidate_id UUID REFERENCES obligation_candidates(id), review_status VARCHAR(20) DEFAULT 'pending' CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')), reviewed_by VARCHAR(100), reviewed_at TIMESTAMPTZ, review_notes TEXT, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status); CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid); CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid); CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage); COMMENT ON TABLE control_dedup_reviews IS 'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.'; COMMIT;