docs: update Architecture + SDK Flow with Control Pipeline + Dependency Engine

Architecture (architecture-data.ts):
- Replace document-crawler with control-pipeline (Port 8098)
- Add 9 DB tables, 5 RAG collections, 10 API endpoints
- Add edges: control-pipeline → PostgreSQL, Qdrant, Ollama

SDK Flow (steps-betrieb.ts):
- Add 4 new steps (seq 5200-5500):
  - Canonical Control Library (7-stage generation pipeline)
  - Pass 0a: Obligation Extraction (181k obligations)
  - Pass 0b: Atomic Composition (MCP-taugliche controls)
  - Dependency Engine + Evaluation (5 types, auto-generation)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-26 21:04:11 +02:00
parent ef7742cd44
commit a2205abea1
2 changed files with 123 additions and 20 deletions

View File

@@ -228,24 +228,39 @@ export const ARCH_SERVICES: ArchService[] = [
dependsOn: ['qdrant', 'ollama', 'postgresql'], dependsOn: ['qdrant', 'ollama', 'postgresql'],
}, },
{ {
id: 'document-crawler', id: 'control-pipeline',
name: 'Document Crawler', name: 'Control Pipeline',
nameShort: 'Crawler', nameShort: 'Pipeline',
layer: 'backend', layer: 'backend',
tech: 'Python / FastAPI', tech: 'Python / FastAPI',
port: 8098, port: 8098,
url: 'https://macmini:8098', url: 'https://macmini:8098',
container: 'bp-compliance-document-crawler', container: 'bp-core-control-pipeline',
description: 'Dokument-Analyse (PDF, DOCX, XLSX, PPTX), Gap-Analyse, IPFS-Archivierung.', description: 'RAG-zu-Controls Pipeline: Control Generation, Pass 0a/0b, Ontology, Dedup, Dependency Engine, Applicability.',
descriptionLong: 'Der Document Crawler nimmt hochgeladene Dokumente (PDF, DOCX, XLSX, PPTX) entgegen, extrahiert deren Inhalt und fuehrt eine Gap-Analyse gegen bestehende Compliance-Anforderungen durch. Dafuer leitet er die Textinhalte an den AI Compliance SDK weiter, der die semantische Analyse uebernimmt. Abgeschlossene Dokumente koennen ueber den DSMS-Service dezentral auf IPFS archiviert werden.', descriptionLong: 'Die Control Pipeline ist das Herzsttueck der automatisierten Compliance-Control-Generierung. Sie verarbeitet ~105.000 RAG-Chunks aus EU/DE-Regulierungen in 6 Phasen: (1) RAG Ingestion, (2) 7-Stufen Control Generation (Lizenz-Gate + Claude LLM), (3) Pass 0a Obligation Extraction (~181k Obligations), (4) Pass 0b Atomic Composition (MCP-taugliche Controls mit assertion/pass_criteria/fail_criteria), (5) Embedding-basierte Deduplizierung mit LLM-Verifikation, (6) Dependency Engine (5 Typen: supersedes, prerequisite, compensating_control, scope_exclusion, conditional_requirement) mit automatischer Generierung via Ontology, Pattern-Regeln und Domain Packs (DSGVO, AI Act, CRA, Security, Arbeitsrecht). 126+ Tests, alle bestanden.',
dbTables: [], dbTables: [
ragCollections: [], 'canonical_controls', 'obligation_candidates', 'control_parent_links',
apiEndpoints: [ 'control_dependencies', 'control_evaluation_results',
'POST /analyze', 'canonical_processed_chunks', 'canonical_generation_jobs',
'POST /gap-analysis', 'control_dedup_reviews', 'control_patterns',
'POST /archive',
], ],
dependsOn: ['ai-compliance-sdk', 'dsms'], ragCollections: [
'bp_compliance_gesetze', 'bp_compliance_datenschutz',
'bp_compliance_ce', 'bp_dsfa_corpus', 'bp_legal_templates',
],
apiEndpoints: [
'POST /v1/canonical/generate',
'GET /v1/canonical/controls',
'POST /v1/canonical/controls/applicable',
'POST /v1/canonical/generate/submit-pass0b',
'POST /v1/canonical/generate/process-batch',
'GET /v1/canonical/generate/quality-metrics',
'POST /v1/dependencies/generate',
'POST /v1/dependencies/evaluate',
'GET /v1/dependencies/graph',
'POST /v1/document-compliance/required',
],
dependsOn: ['postgresql', 'qdrant', 'ollama'],
}, },
{ {
id: 'compliance-tts', id: 'compliance-tts',
@@ -383,7 +398,7 @@ export const ARCH_EDGES: ArchEdge[] = [
// Frontend → Backend // Frontend → Backend
{ source: 'admin-compliance', target: 'backend-compliance', label: 'REST API' }, { source: 'admin-compliance', target: 'backend-compliance', label: 'REST API' },
{ source: 'admin-compliance', target: 'ai-compliance-sdk', label: 'REST API' }, { source: 'admin-compliance', target: 'ai-compliance-sdk', label: 'REST API' },
{ source: 'admin-compliance', target: 'document-crawler', label: 'REST API' }, { source: 'admin-compliance', target: 'control-pipeline', label: 'REST API' },
// Backend → Infrastructure // Backend → Infrastructure
{ source: 'backend-compliance', target: 'postgresql', label: 'SQLAlchemy' }, { source: 'backend-compliance', target: 'postgresql', label: 'SQLAlchemy' },
@@ -392,12 +407,9 @@ export const ARCH_EDGES: ArchEdge[] = [
{ source: 'ai-compliance-sdk', target: 'ollama', label: 'LLM Inference' }, { source: 'ai-compliance-sdk', target: 'ollama', label: 'LLM Inference' },
{ source: 'ai-compliance-sdk', target: 'postgresql', label: 'GORM' }, { source: 'ai-compliance-sdk', target: 'postgresql', label: 'GORM' },
{ source: 'compliance-tts', target: 'minio', label: 'Audio/Video' }, { source: 'compliance-tts', target: 'minio', label: 'Audio/Video' },
{ source: 'control-pipeline', target: 'postgresql', label: 'SQLAlchemy' },
// Backend → Backend { source: 'control-pipeline', target: 'qdrant', label: 'Embedding + Dedup' },
{ source: 'document-crawler', target: 'ai-compliance-sdk', label: 'LLM Gateway' }, { source: 'control-pipeline', target: 'ollama', label: 'LLM Dedup (qwen3.5)' },
// Backend → Data Sovereignty
{ source: 'document-crawler', target: 'dsms', label: 'IPFS Archive' },
] ]
// ============================================================================= // =============================================================================

View File

@@ -250,4 +250,95 @@ export const STEPS_BETRIEB: SDKFlowStep[] = [
url: '/sdk/isms', url: '/sdk/isms',
completion: 100, completion: 100,
}, },
// ── Control Pipeline ─────────────────────────────────────────────────────
{
id: 'control-library',
name: 'Canonical Control Library',
nameShort: 'Control Library',
package: 'betrieb',
seq: 5200,
checkpointId: 'CP-CLIB',
checkpointType: 'REQUIRED',
checkpointReviewer: 'NONE',
description: 'Verwaltung der ~33.000 Rich Controls aus dem RAG-Korpus. 7-Stufen-Pipeline mit Lizenz-Gate.',
descriptionLong: 'Die Canonical Control Library ist das zentrale Verzeichnis aller aus Regulierungstexten generierten Compliance Controls. Die 7-Stufen-Pipeline verarbeitet ~105.000 RAG-Chunks: (1) RAG Scan, (2) Lizenz-Klassifikation (Rule 1/2/3), (3a) Strukturierung (Rule 1+2) oder (3b) Reformulierung (Rule 3), (4) Harmonisierung (Embedding-Dedup), (5) Anchor Search (Open-Source-Referenzen), (6) Speicherung, (7) Chunk-Tracking. Domains: AUTH, CRYP, NET, DATA, SEC, AI, COMP, GOV, LAB, FIN u.a.',
legalBasis: 'UrhG §44b (Text & Data Mining), UrhG §23 (Hinreichender Abstand)',
inputs: ['ragChunks'],
outputs: ['canonicalControls'],
prerequisiteSteps: [],
dbTables: ['canonical_controls', 'canonical_processed_chunks', 'canonical_generation_jobs'],
dbMode: 'read/write',
ragCollections: ['bp_compliance_gesetze', 'bp_compliance_datenschutz', 'bp_compliance_ce', 'bp_dsfa_corpus', 'bp_legal_templates'],
ragPurpose: 'Quelldokumente fuer Control-Generierung (Gesetze, Verordnungen, Standards)',
isOptional: false,
url: '/sdk/control-library',
completion: 100,
},
{
id: 'obligation-extraction',
name: 'Pass 0a: Obligation Extraction',
nameShort: 'Pass 0a',
package: 'betrieb',
seq: 5300,
checkpointId: 'CP-P0A',
checkpointType: 'REQUIRED',
checkpointReviewer: 'NONE',
description: 'Extraktion von ~181.000 normativen Pflichten aus Rich Controls via Claude Haiku (Batch API).',
descriptionLong: 'Pass 0a zerlegt jeden Rich Control in einzelne normative Obligations via Claude Haiku (Anthropic Batch API, 50% Kostenreduktion). Jede Obligation wird klassifiziert: Pflicht/Empfehlung/Kann, Test-Obligation ja/nein, Reporting-Obligation ja/nein. Quality Gate mit 6 Regeln: nur normative Aussagen, ein Hauptverb, Test/Reporting separat, kein Evidence-Level-Split. Ergebnis: ~181.000 validierte Obligations mit action, object, condition, normative_strength.',
legalBasis: 'Pipeline-intern (Normative Obligation Extraction)',
inputs: ['canonicalControls'],
outputs: ['obligationCandidates'],
prerequisiteSteps: ['control-library'],
dbTables: ['obligation_candidates'],
dbMode: 'read/write',
ragCollections: [],
isOptional: false,
url: '/sdk/control-library',
completion: 90,
},
{
id: 'atomic-composition',
name: 'Pass 0b: Atomic Composition',
nameShort: 'Pass 0b',
package: 'betrieb',
seq: 5400,
checkpointId: 'CP-P0B',
checkpointType: 'REQUIRED',
checkpointReviewer: 'NONE',
description: 'Komposition atomarer MCP-tauglicher Controls aus Obligations via Claude Sonnet + Pre-LLM Ontology-Filter.',
descriptionLong: 'Pass 0b verwandelt jede validierte Obligation in ein eigenstaendiges atomares Control via Claude Sonnet (Anthropic Batch API). Vor dem LLM-Call klassifiziert die Control Ontology (26 Action Types) jede Obligation: atomic (an LLM senden), composite (ueberspringen), evidence (ueberspringen), framework_container (ueberspringen). MCP-taugliche Output-Felder: assertion (pruefbare Aussage), pass_criteria, fail_criteria, check_type (technical_config_check, document_clause_check, code_pattern_check), dependency_hints, lifecycle_phase_order (1-13). Canonical Key Format: action_type:normalized_object:control_phase.',
legalBasis: 'Pipeline-intern (Atomic Control Composition)',
inputs: ['obligationCandidates'],
outputs: ['atomicControls'],
prerequisiteSteps: ['obligation-extraction'],
dbTables: ['canonical_controls', 'control_parent_links'],
dbMode: 'read/write',
ragCollections: [],
isOptional: false,
url: '/sdk/control-library',
completion: 80,
},
{
id: 'dependency-engine',
name: 'Dependency Engine + Evaluation',
nameShort: 'Dependencies',
package: 'betrieb',
seq: 5500,
checkpointId: 'CP-DEP',
checkpointType: 'REQUIRED',
checkpointReviewer: 'NONE',
description: '5 Dependency-Typen, generische Condition Language, automatische Generierung via Ontology + Domain Packs.',
descriptionLong: 'Die Dependency Engine modelliert logische Abhaengigkeiten zwischen Controls: supersedes (A ersetzt B), prerequisite (A muss vor B), compensating_control (A kompensiert B-Failure), scope_exclusion (A schliesst B aus), conditional_requirement (B nur unter Bedingung). Generische Condition Language (AND/OR/NOT + Feldoperatoren). Priority-basierte Konfliktloesung. Zykluserkennung (DFS). Automatische Generierung via: (1) Ontology (Phase-Sequenz), (2) Pattern-Regeln, (3) Domain Packs (DSGVO, AI Act, CRA, Security, Arbeitsrecht). MCP-Output mit dependency_resolution Trace.',
legalBasis: 'Pipeline-intern (Control Dependency Resolution)',
inputs: ['atomicControls'],
outputs: ['evaluatedControls', 'dependencyGraph'],
prerequisiteSteps: ['atomic-composition'],
dbTables: ['control_dependencies', 'control_evaluation_results'],
dbMode: 'read/write',
ragCollections: [],
isOptional: false,
url: '/sdk/control-library',
completion: 100,
},
] ]