Files
breakpilot-compliance/backend-compliance/migrations/060_crosswalk_matrix.sql
Benjamin Admin 825e070ed9
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 47s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 24s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
feat(multi-layer): complete Multi-Layer Control Architecture (Phases 1-8 + Pass 0)
Implements the full Multi-Layer Control Architecture for migrating ~25,000
Rich Controls into atomic, deduplicated Master Controls with full traceability.

Architecture: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance

New services:
- ObligationExtractor: 3-tier extraction (exact → embedding → LLM)
- PatternMatcher: 2-tier matching (keyword + embedding + domain-bonus)
- ControlComposer: Pattern + Obligation → Master Control
- PipelineAdapter: Pipeline integration + Migration Passes 1-5
- DecompositionPass: Pass 0a/0b — Rich Control → atomic Controls
- CrosswalkRoutes: 15 API endpoints under /v1/canonical/

New DB schema:
- Migration 060: obligation_extractions, control_patterns, crosswalk_matrix
- Migration 061: obligation_candidates, parent_control_uuid tracking

Pattern Library: 50 YAML patterns (30 core + 20 IT-security)
Go SDK: Pattern loader with YAML validation and indexing
Documentation: MkDocs updated with full architecture overview

500 Python tests passing across all components.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 09:00:37 +01:00

121 lines
5.5 KiB
SQL

-- Migration 060: Multi-Layer Control Architecture — DB Schema
-- Adds obligation_extractions, control_patterns, and crosswalk_matrix tables.
-- Extends canonical_controls with pattern_id and obligation_ids columns.
--
-- Part of the Multi-Layer Control Architecture (Phase 1 of 8).
-- See: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance
-- =============================================================================
-- 1. Obligation Extractions
-- Tracks how each RAG chunk was linked to an obligation (exact, embedding, LLM).
-- =============================================================================
CREATE TABLE IF NOT EXISTS obligation_extractions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chunk_hash VARCHAR(64) NOT NULL,
collection VARCHAR(100) NOT NULL,
regulation_code VARCHAR(100) NOT NULL,
article VARCHAR(100),
paragraph VARCHAR(100),
obligation_id VARCHAR(50),
obligation_text TEXT,
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
extraction_method VARCHAR(30) NOT NULL
CHECK (extraction_method IN ('exact_match', 'embedding_match', 'llm_extracted', 'inferred')),
pattern_id VARCHAR(50),
pattern_match_score NUMERIC(3,2) CHECK (pattern_match_score >= 0 AND pattern_match_score <= 1),
control_uuid UUID REFERENCES canonical_controls(id),
job_id UUID REFERENCES canonical_generation_jobs(id),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_oe_obligation ON obligation_extractions(obligation_id);
CREATE INDEX IF NOT EXISTS idx_oe_pattern ON obligation_extractions(pattern_id);
CREATE INDEX IF NOT EXISTS idx_oe_control ON obligation_extractions(control_uuid);
CREATE INDEX IF NOT EXISTS idx_oe_regulation ON obligation_extractions(regulation_code);
CREATE INDEX IF NOT EXISTS idx_oe_chunk ON obligation_extractions(chunk_hash);
CREATE INDEX IF NOT EXISTS idx_oe_method ON obligation_extractions(extraction_method);
COMMENT ON TABLE obligation_extractions IS
'Tracks chunk-to-obligation linkage from the 3-tier extraction pipeline (exact/embedding/LLM)';
-- =============================================================================
-- 2. Control Patterns Registry
-- DB mirror of the YAML pattern library for SQL queries and joins.
-- =============================================================================
CREATE TABLE IF NOT EXISTS control_patterns (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
pattern_id VARCHAR(50) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
name_de VARCHAR(255),
domain VARCHAR(10) NOT NULL,
category VARCHAR(50),
description TEXT,
template_objective TEXT,
template_rationale TEXT,
template_requirements JSONB DEFAULT '[]',
template_test_procedure JSONB DEFAULT '[]',
template_evidence JSONB DEFAULT '[]',
severity_default VARCHAR(20)
CHECK (severity_default IN ('low', 'medium', 'high', 'critical')),
implementation_effort_default VARCHAR(2)
CHECK (implementation_effort_default IN ('s', 'm', 'l', 'xl')),
obligation_match_keywords JSONB DEFAULT '[]',
tags JSONB DEFAULT '[]',
open_anchor_refs JSONB DEFAULT '[]',
composable_with JSONB DEFAULT '[]',
version VARCHAR(10) DEFAULT '1.0',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_cp_domain ON control_patterns(domain);
CREATE INDEX IF NOT EXISTS idx_cp_category ON control_patterns(category);
CREATE INDEX IF NOT EXISTS idx_cp_pattern_id ON control_patterns(pattern_id);
COMMENT ON TABLE control_patterns IS
'Registry of control patterns (DB mirror of YAML library). Pattern ID format: CP-{DOMAIN}-{NNN}';
-- =============================================================================
-- 3. Crosswalk Matrix
-- The "golden thread" from legal source through to implementation.
-- =============================================================================
CREATE TABLE IF NOT EXISTS crosswalk_matrix (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
regulation_code VARCHAR(100) NOT NULL,
article VARCHAR(100),
paragraph VARCHAR(100),
obligation_id VARCHAR(50),
pattern_id VARCHAR(50),
master_control_id VARCHAR(20),
master_control_uuid UUID REFERENCES canonical_controls(id),
tom_control_id VARCHAR(30),
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
source VARCHAR(30) DEFAULT 'auto'
CHECK (source IN ('manual', 'auto', 'migrated')),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_cw_regulation ON crosswalk_matrix(regulation_code, article);
CREATE INDEX IF NOT EXISTS idx_cw_obligation ON crosswalk_matrix(obligation_id);
CREATE INDEX IF NOT EXISTS idx_cw_pattern ON crosswalk_matrix(pattern_id);
CREATE INDEX IF NOT EXISTS idx_cw_control ON crosswalk_matrix(master_control_id);
CREATE INDEX IF NOT EXISTS idx_cw_tom ON crosswalk_matrix(tom_control_id);
COMMENT ON TABLE crosswalk_matrix IS
'Golden thread: regulation → article → obligation → pattern → master control → TOM';
-- =============================================================================
-- 4. Extend canonical_controls with pattern + obligation linkage
-- =============================================================================
ALTER TABLE canonical_controls
ADD COLUMN IF NOT EXISTS pattern_id VARCHAR(50);
ALTER TABLE canonical_controls
ADD COLUMN IF NOT EXISTS obligation_ids JSONB DEFAULT '[]';
CREATE INDEX IF NOT EXISTS idx_cc_pattern ON canonical_controls(pattern_id);