Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 31s
CI/CD / test-python-backend-compliance (push) Successful in 1m35s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped
- Control Library: parent control display, ObligationTypeBadge, GenerationStrategyBadge variants, evidence string fallback - API: expose parent_control_uuid/id/title in canonical controls - Fix: DSFA SQLAlchemy 2.0 Row._mapping compatibility - Migration 074: control_parent_links + control_dedup_reviews tables - QA scripts: benchmark, gap analysis, OSCAL import, OWASP cleanup, phase5 normalize, phase74 gap fill, sync_db, run_job - Docs: dedup engine, RAG benchmark, lessons learned, pipeline docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
74 lines
3.5 KiB
PL/PgSQL
74 lines
3.5 KiB
PL/PgSQL
-- Migration 074: Control Dedup Engine — DB Schema
|
|
-- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b).
|
|
--
|
|
-- Tables:
|
|
-- 1. control_parent_links — M:N parent linking (one control → many regulations)
|
|
-- 2. control_dedup_reviews — Review queue for borderline matches (0.85-0.92)
|
|
|
|
BEGIN;
|
|
|
|
-- =============================================================================
|
|
-- 1. Control Parent Links (M:N)
|
|
-- Enables "1 Control erfuellt 5 Gesetze" — the biggest USP.
|
|
-- An atomic control can have multiple parent controls from different
|
|
-- regulations/obligations. This replaces the 1:1 parent_control_uuid FK.
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS control_parent_links (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
|
parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
|
link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition'
|
|
CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')),
|
|
confidence NUMERIC(3,2) DEFAULT 1.0
|
|
CHECK (confidence >= 0 AND confidence <= 1),
|
|
source_regulation VARCHAR(100),
|
|
source_article VARCHAR(100),
|
|
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid);
|
|
CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid);
|
|
CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type);
|
|
|
|
COMMENT ON TABLE control_parent_links IS
|
|
'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"';
|
|
|
|
-- =============================================================================
|
|
-- 2. Control Dedup Reviews
|
|
-- Queue for borderline matches (similarity 0.85-0.92) that need human review.
|
|
-- Reviewed entries get status updated to accepted/rejected.
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS control_dedup_reviews (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
candidate_control_id VARCHAR(30) NOT NULL,
|
|
candidate_title TEXT NOT NULL,
|
|
candidate_objective TEXT,
|
|
matched_control_uuid UUID REFERENCES canonical_controls(id),
|
|
matched_control_id VARCHAR(30),
|
|
similarity_score NUMERIC(4,3) DEFAULT 0.0,
|
|
dedup_stage VARCHAR(40) NOT NULL,
|
|
dedup_details JSONB DEFAULT '{}',
|
|
parent_control_uuid UUID REFERENCES canonical_controls(id),
|
|
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
|
|
review_status VARCHAR(20) DEFAULT 'pending'
|
|
CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')),
|
|
reviewed_by VARCHAR(100),
|
|
reviewed_at TIMESTAMPTZ,
|
|
review_notes TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid);
|
|
CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage);
|
|
|
|
COMMENT ON TABLE control_dedup_reviews IS
|
|
'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.';
|
|
|
|
COMMIT;
|