feat(control-pipeline): BSI QUAIDAL Clean-Room ingestion (AI Act Art. 10)
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 40s
CI / test-python-voice (push) Successful in 36s
CI / test-bqas (push) Successful in 33s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 40s
CI / test-python-voice (push) Successful in 36s
CI / test-bqas (push) Successful in 33s
Clean-Room derivation of 195 controls from BSI QUAIDAL (10 criteria + 15 building blocks + 30 measures + 140 metrics) for EU AI Act Art. 10 training-data quality compliance. - ingest_bsi_quaidal.py parses YAML frontmatter into a structural index (no protected prose stored on disk). - derive_quaidal_mcs.py rewrites each entry via local LLM (qwen3.5:35b-a3b) with a hard 4-gram plagiarism gate < 20%; achieved mean overlap 0.5%. - Migration 011 adds compliance.derived_controls table with full source provenance (framework, section, url, commit SHA, license note). - apply_quaidal_to_db.py UPSERTs YAML into DB. - Source repo (legal-sources/bsi-quaidal/) gitignored. Same pattern as IACE module DIN-reference handling: name the norm and section, never quote. Backed by BSI license clarification 2026-05: § 5 UrhG anwendbar, share:true im Frontmatter; Clean-Room derivation is the safe path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
-- Migration 011: Derived Controls Library (Clean-Room MCs from external sources)
|
||||
-- Schema: compliance
|
||||
--
|
||||
-- Holds Master Controls + atomic controls + mitigations + metrics that were
|
||||
-- derived Clean-Room from external regulatory sources (BSI QUAIDAL today,
|
||||
-- Grundschutz++/CRA/NIST AI RMF next). Kept separate from the gpre2
|
||||
-- master_controls table because:
|
||||
-- 1) The shape is different (no object_group/phase concepts).
|
||||
-- 2) Source-Layer-Trennung: derivations from external IP must be cleanly
|
||||
-- separable from internally-generated artifacts.
|
||||
-- 3) Each row carries the licence + provenance for due diligence.
|
||||
--
|
||||
-- Run: ssh macmini "docker exec -i bp-core-postgres psql -U breakpilot -d breakpilot_db" \
|
||||
-- < control-pipeline/migrations/011_derived_controls.sql
|
||||
|
||||
SET search_path TO compliance, public;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS derived_controls (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
derived_id VARCHAR(200) UNIQUE NOT NULL, -- e.g. MC-AI-DATA-QKB-01-repraesentativitaet
|
||||
kind VARCHAR(30) NOT NULL, -- criterion | building_block | measure | metric
|
||||
canonical_name VARCHAR(300) NOT NULL,
|
||||
description TEXT NOT NULL, -- our own wording, never the original
|
||||
regulation_anchor TEXT, -- e.g. "EU AI Act Art. 10"
|
||||
related_quaidal_ids JSONB NOT NULL DEFAULT '[]', -- ["QB-03", "QB-04", ...]
|
||||
external_refs JSONB NOT NULL DEFAULT '[]', -- [{framework, citation}, ...]
|
||||
source_framework VARCHAR(80) NOT NULL, -- "BSI QUAIDAL"
|
||||
source_section VARCHAR(80) NOT NULL, -- "QKB-01"
|
||||
source_url TEXT,
|
||||
source_commit_sha VARCHAR(80),
|
||||
source_title_original TEXT, -- original title (label, not protected)
|
||||
source_license_note TEXT,
|
||||
plagiarism_score_at_generation NUMERIC(5,4), -- 0..1; gate was 0.20
|
||||
generated_by_model VARCHAR(80),
|
||||
yaml_path TEXT, -- pointer back to source YAML
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_kind ON derived_controls(kind);
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_source_framework ON derived_controls(source_framework);
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_source_section ON derived_controls(source_section);
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_related_quaidal_gin
|
||||
ON derived_controls USING GIN(related_quaidal_ids);
|
||||
|
||||
-- Trigger to keep updated_at fresh
|
||||
CREATE OR REPLACE FUNCTION trg_derived_controls_set_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS derived_controls_updated_at ON derived_controls;
|
||||
CREATE TRIGGER derived_controls_updated_at
|
||||
BEFORE UPDATE ON derived_controls
|
||||
FOR EACH ROW EXECUTE FUNCTION trg_derived_controls_set_updated_at();
|
||||
Reference in New Issue
Block a user