Files
breakpilot-core/control-pipeline/migrations/011_derived_controls.sql
T
Benjamin Admin 7d721a6787
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 40s
CI / test-python-voice (push) Successful in 36s
CI / test-bqas (push) Successful in 33s
feat(control-pipeline): BSI QUAIDAL Clean-Room ingestion (AI Act Art. 10)
Clean-Room derivation of 195 controls from BSI QUAIDAL (10 criteria + 15
building blocks + 30 measures + 140 metrics) for EU AI Act Art. 10
training-data quality compliance.

- ingest_bsi_quaidal.py parses YAML frontmatter into a structural index
  (no protected prose stored on disk).
- derive_quaidal_mcs.py rewrites each entry via local LLM (qwen3.5:35b-a3b)
  with a hard 4-gram plagiarism gate < 20%; achieved mean overlap 0.5%.
- Migration 011 adds compliance.derived_controls table with full source
  provenance (framework, section, url, commit SHA, license note).
- apply_quaidal_to_db.py UPSERTs YAML into DB.
- Source repo (legal-sources/bsi-quaidal/) gitignored.

Same pattern as IACE module DIN-reference handling: name the norm and
section, never quote.

Backed by BSI license clarification 2026-05: § 5 UrhG anwendbar,
share:true im Frontmatter; Clean-Room derivation is the safe path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 13:02:49 +02:00

59 lines
2.9 KiB
PL/PgSQL

-- Migration 011: Derived Controls Library (Clean-Room MCs from external sources)
-- Schema: compliance
--
-- Holds Master Controls + atomic controls + mitigations + metrics that were
-- derived Clean-Room from external regulatory sources (BSI QUAIDAL today,
-- Grundschutz++/CRA/NIST AI RMF next). Kept separate from the gpre2
-- master_controls table because:
-- 1) The shape is different (no object_group/phase concepts).
-- 2) Source-Layer-Trennung: derivations from external IP must be cleanly
-- separable from internally-generated artifacts.
-- 3) Each row carries the licence + provenance for due diligence.
--
-- Run: ssh macmini "docker exec -i bp-core-postgres psql -U breakpilot -d breakpilot_db" \
-- < control-pipeline/migrations/011_derived_controls.sql
SET search_path TO compliance, public;
CREATE TABLE IF NOT EXISTS derived_controls (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
derived_id VARCHAR(200) UNIQUE NOT NULL, -- e.g. MC-AI-DATA-QKB-01-repraesentativitaet
kind VARCHAR(30) NOT NULL, -- criterion | building_block | measure | metric
canonical_name VARCHAR(300) NOT NULL,
description TEXT NOT NULL, -- our own wording, never the original
regulation_anchor TEXT, -- e.g. "EU AI Act Art. 10"
related_quaidal_ids JSONB NOT NULL DEFAULT '[]', -- ["QB-03", "QB-04", ...]
external_refs JSONB NOT NULL DEFAULT '[]', -- [{framework, citation}, ...]
source_framework VARCHAR(80) NOT NULL, -- "BSI QUAIDAL"
source_section VARCHAR(80) NOT NULL, -- "QKB-01"
source_url TEXT,
source_commit_sha VARCHAR(80),
source_title_original TEXT, -- original title (label, not protected)
source_license_note TEXT,
plagiarism_score_at_generation NUMERIC(5,4), -- 0..1; gate was 0.20
generated_by_model VARCHAR(80),
yaml_path TEXT, -- pointer back to source YAML
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_derived_controls_kind ON derived_controls(kind);
CREATE INDEX IF NOT EXISTS idx_derived_controls_source_framework ON derived_controls(source_framework);
CREATE INDEX IF NOT EXISTS idx_derived_controls_source_section ON derived_controls(source_section);
CREATE INDEX IF NOT EXISTS idx_derived_controls_related_quaidal_gin
ON derived_controls USING GIN(related_quaidal_ids);
-- Trigger to keep updated_at fresh
CREATE OR REPLACE FUNCTION trg_derived_controls_set_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS derived_controls_updated_at ON derived_controls;
CREATE TRIGGER derived_controls_updated_at
BEFORE UPDATE ON derived_controls
FOR EACH ROW EXECUTE FUNCTION trg_derived_controls_set_updated_at();