Files
Benjamin Admin a9e0869205 feat(pipeline): pipeline_version v2, migration 062, docs + 71 tests
- Add PIPELINE_VERSION=2 constant and pipeline_version column to
  canonical_controls and canonical_processed_chunks (migration 062)
- Anthropic API decides chunk relevance via null-returns (skip_prefilter)
- Annex/appendix chunks explicitly protected in prompts
- Fix 6 failing tests (CRYP domain, _process_batch tuple return)
- Add TestPipelineVersion + TestRegulationFilter test classes (10 new tests)
- Add MkDocs page: control-generator-pipeline.md (541 lines)
- Update canonical-control-library.md with v2 pipeline diagram
- Update testing.md with 71-test breakdown table

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 17:31:11 +01:00

23 lines
1.1 KiB
SQL

-- Migration 062: Add pipeline_version to track which generation rules produced each control/chunk
--
-- v1 = Original pipeline (local LLM prefilter, old prompt without null-skip)
-- v2 = Improved pipeline (skip_prefilter, Anthropic decides relevance, annexes protected)
--
-- This allows identifying controls that may need reprocessing when pipeline rules change.
ALTER TABLE canonical_controls
ADD COLUMN IF NOT EXISTS pipeline_version smallint NOT NULL DEFAULT 1;
ALTER TABLE canonical_processed_chunks
ADD COLUMN IF NOT EXISTS pipeline_version smallint NOT NULL DEFAULT 1;
-- Index for efficient querying by version
CREATE INDEX IF NOT EXISTS idx_canonical_controls_pipeline_version
ON canonical_controls (pipeline_version);
CREATE INDEX IF NOT EXISTS idx_canonical_processed_chunks_pipeline_version
ON canonical_processed_chunks (pipeline_version);
COMMENT ON COLUMN canonical_controls.pipeline_version IS 'Generation pipeline version: 1=original (local prefilter), 2=improved (Anthropic decides relevance, annexes protected)';
COMMENT ON COLUMN canonical_processed_chunks.pipeline_version IS 'Pipeline version used when this chunk was processed';