Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 47s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 24s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Implements the full Multi-Layer Control Architecture for migrating ~25,000 Rich Controls into atomic, deduplicated Master Controls with full traceability. Architecture: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance New services: - ObligationExtractor: 3-tier extraction (exact → embedding → LLM) - PatternMatcher: 2-tier matching (keyword + embedding + domain-bonus) - ControlComposer: Pattern + Obligation → Master Control - PipelineAdapter: Pipeline integration + Migration Passes 1-5 - DecompositionPass: Pass 0a/0b — Rich Control → atomic Controls - CrosswalkRoutes: 15 API endpoints under /v1/canonical/ New DB schema: - Migration 060: obligation_extractions, control_patterns, crosswalk_matrix - Migration 061: obligation_candidates, parent_control_uuid tracking Pattern Library: 50 YAML patterns (30 core + 20 IT-security) Go SDK: Pattern loader with YAML validation and indexing Documentation: MkDocs updated with full architecture overview 500 Python tests passing across all components. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
121 lines
5.5 KiB
SQL
121 lines
5.5 KiB
SQL
-- Migration 060: Multi-Layer Control Architecture — DB Schema
|
|
-- Adds obligation_extractions, control_patterns, and crosswalk_matrix tables.
|
|
-- Extends canonical_controls with pattern_id and obligation_ids columns.
|
|
--
|
|
-- Part of the Multi-Layer Control Architecture (Phase 1 of 8).
|
|
-- See: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance
|
|
|
|
-- =============================================================================
|
|
-- 1. Obligation Extractions
|
|
-- Tracks how each RAG chunk was linked to an obligation (exact, embedding, LLM).
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS obligation_extractions (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
chunk_hash VARCHAR(64) NOT NULL,
|
|
collection VARCHAR(100) NOT NULL,
|
|
regulation_code VARCHAR(100) NOT NULL,
|
|
article VARCHAR(100),
|
|
paragraph VARCHAR(100),
|
|
obligation_id VARCHAR(50),
|
|
obligation_text TEXT,
|
|
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
|
|
extraction_method VARCHAR(30) NOT NULL
|
|
CHECK (extraction_method IN ('exact_match', 'embedding_match', 'llm_extracted', 'inferred')),
|
|
pattern_id VARCHAR(50),
|
|
pattern_match_score NUMERIC(3,2) CHECK (pattern_match_score >= 0 AND pattern_match_score <= 1),
|
|
control_uuid UUID REFERENCES canonical_controls(id),
|
|
job_id UUID REFERENCES canonical_generation_jobs(id),
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_oe_obligation ON obligation_extractions(obligation_id);
|
|
CREATE INDEX IF NOT EXISTS idx_oe_pattern ON obligation_extractions(pattern_id);
|
|
CREATE INDEX IF NOT EXISTS idx_oe_control ON obligation_extractions(control_uuid);
|
|
CREATE INDEX IF NOT EXISTS idx_oe_regulation ON obligation_extractions(regulation_code);
|
|
CREATE INDEX IF NOT EXISTS idx_oe_chunk ON obligation_extractions(chunk_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_oe_method ON obligation_extractions(extraction_method);
|
|
|
|
COMMENT ON TABLE obligation_extractions IS
|
|
'Tracks chunk-to-obligation linkage from the 3-tier extraction pipeline (exact/embedding/LLM)';
|
|
|
|
-- =============================================================================
|
|
-- 2. Control Patterns Registry
|
|
-- DB mirror of the YAML pattern library for SQL queries and joins.
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS control_patterns (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
pattern_id VARCHAR(50) UNIQUE NOT NULL,
|
|
name VARCHAR(255) NOT NULL,
|
|
name_de VARCHAR(255),
|
|
domain VARCHAR(10) NOT NULL,
|
|
category VARCHAR(50),
|
|
description TEXT,
|
|
template_objective TEXT,
|
|
template_rationale TEXT,
|
|
template_requirements JSONB DEFAULT '[]',
|
|
template_test_procedure JSONB DEFAULT '[]',
|
|
template_evidence JSONB DEFAULT '[]',
|
|
severity_default VARCHAR(20)
|
|
CHECK (severity_default IN ('low', 'medium', 'high', 'critical')),
|
|
implementation_effort_default VARCHAR(2)
|
|
CHECK (implementation_effort_default IN ('s', 'm', 'l', 'xl')),
|
|
obligation_match_keywords JSONB DEFAULT '[]',
|
|
tags JSONB DEFAULT '[]',
|
|
open_anchor_refs JSONB DEFAULT '[]',
|
|
composable_with JSONB DEFAULT '[]',
|
|
version VARCHAR(10) DEFAULT '1.0',
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_cp_domain ON control_patterns(domain);
|
|
CREATE INDEX IF NOT EXISTS idx_cp_category ON control_patterns(category);
|
|
CREATE INDEX IF NOT EXISTS idx_cp_pattern_id ON control_patterns(pattern_id);
|
|
|
|
COMMENT ON TABLE control_patterns IS
|
|
'Registry of control patterns (DB mirror of YAML library). Pattern ID format: CP-{DOMAIN}-{NNN}';
|
|
|
|
-- =============================================================================
|
|
-- 3. Crosswalk Matrix
|
|
-- The "golden thread" from legal source through to implementation.
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS crosswalk_matrix (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
regulation_code VARCHAR(100) NOT NULL,
|
|
article VARCHAR(100),
|
|
paragraph VARCHAR(100),
|
|
obligation_id VARCHAR(50),
|
|
pattern_id VARCHAR(50),
|
|
master_control_id VARCHAR(20),
|
|
master_control_uuid UUID REFERENCES canonical_controls(id),
|
|
tom_control_id VARCHAR(30),
|
|
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
|
|
source VARCHAR(30) DEFAULT 'auto'
|
|
CHECK (source IN ('manual', 'auto', 'migrated')),
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_cw_regulation ON crosswalk_matrix(regulation_code, article);
|
|
CREATE INDEX IF NOT EXISTS idx_cw_obligation ON crosswalk_matrix(obligation_id);
|
|
CREATE INDEX IF NOT EXISTS idx_cw_pattern ON crosswalk_matrix(pattern_id);
|
|
CREATE INDEX IF NOT EXISTS idx_cw_control ON crosswalk_matrix(master_control_id);
|
|
CREATE INDEX IF NOT EXISTS idx_cw_tom ON crosswalk_matrix(tom_control_id);
|
|
|
|
COMMENT ON TABLE crosswalk_matrix IS
|
|
'Golden thread: regulation → article → obligation → pattern → master control → TOM';
|
|
|
|
-- =============================================================================
|
|
-- 4. Extend canonical_controls with pattern + obligation linkage
|
|
-- =============================================================================
|
|
|
|
ALTER TABLE canonical_controls
|
|
ADD COLUMN IF NOT EXISTS pattern_id VARCHAR(50);
|
|
|
|
ALTER TABLE canonical_controls
|
|
ADD COLUMN IF NOT EXISTS obligation_ids JSONB DEFAULT '[]';
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_cc_pattern ON canonical_controls(pattern_id);
|