Files
Benjamin Admin 643b26618f
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 31s
CI/CD / test-python-backend-compliance (push) Successful in 1m35s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped
feat: Control Library UI, dedup migration, QA tooling, docs
- Control Library: parent control display, ObligationTypeBadge,
  GenerationStrategyBadge variants, evidence string fallback
- API: expose parent_control_uuid/id/title in canonical controls
- Fix: DSFA SQLAlchemy 2.0 Row._mapping compatibility
- Migration 074: control_parent_links + control_dedup_reviews tables
- QA scripts: benchmark, gap analysis, OSCAL import, OWASP cleanup,
  phase5 normalize, phase74 gap fill, sync_db, run_job
- Docs: dedup engine, RAG benchmark, lessons learned, pipeline docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 11:56:08 +01:00

74 lines
3.5 KiB
PL/PgSQL

-- Migration 074: Control Dedup Engine — DB Schema
-- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b).
--
-- Tables:
-- 1. control_parent_links — M:N parent linking (one control → many regulations)
-- 2. control_dedup_reviews — Review queue for borderline matches (0.85-0.92)
BEGIN;
-- =============================================================================
-- 1. Control Parent Links (M:N)
-- Enables "1 Control erfuellt 5 Gesetze" — the biggest USP.
-- An atomic control can have multiple parent controls from different
-- regulations/obligations. This replaces the 1:1 parent_control_uuid FK.
-- =============================================================================
CREATE TABLE IF NOT EXISTS control_parent_links (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition'
CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')),
confidence NUMERIC(3,2) DEFAULT 1.0
CHECK (confidence >= 0 AND confidence <= 1),
source_regulation VARCHAR(100),
source_article VARCHAR(100),
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
created_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid)
);
CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid);
CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid);
CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type);
COMMENT ON TABLE control_parent_links IS
'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"';
-- =============================================================================
-- 2. Control Dedup Reviews
-- Queue for borderline matches (similarity 0.85-0.92) that need human review.
-- Reviewed entries get status updated to accepted/rejected.
-- =============================================================================
CREATE TABLE IF NOT EXISTS control_dedup_reviews (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
candidate_control_id VARCHAR(30) NOT NULL,
candidate_title TEXT NOT NULL,
candidate_objective TEXT,
matched_control_uuid UUID REFERENCES canonical_controls(id),
matched_control_id VARCHAR(30),
similarity_score NUMERIC(4,3) DEFAULT 0.0,
dedup_stage VARCHAR(40) NOT NULL,
dedup_details JSONB DEFAULT '{}',
parent_control_uuid UUID REFERENCES canonical_controls(id),
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
review_status VARCHAR(20) DEFAULT 'pending'
CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')),
reviewed_by VARCHAR(100),
reviewed_at TIMESTAMPTZ,
review_notes TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status);
CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid);
CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid);
CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage);
COMMENT ON TABLE control_dedup_reviews IS
'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.';
COMMIT;