From a2205abea19e262339a68de2c66e7048c992663c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 26 Apr 2026 21:04:11 +0200 Subject: [PATCH] docs: update Architecture + SDK Flow with Control Pipeline + Dependency Engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture (architecture-data.ts): - Replace document-crawler with control-pipeline (Port 8098) - Add 9 DB tables, 5 RAG collections, 10 API endpoints - Add edges: control-pipeline → PostgreSQL, Qdrant, Ollama SDK Flow (steps-betrieb.ts): - Add 4 new steps (seq 5200-5500): - Canonical Control Library (7-stage generation pipeline) - Pass 0a: Obligation Extraction (181k obligations) - Pass 0b: Atomic Composition (MCP-taugliche controls) - Dependency Engine + Evaluation (5 types, auto-generation) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/sdk/architecture/architecture-data.ts | 52 +++++++---- .../app/sdk/sdk-flow/steps-betrieb.ts | 91 +++++++++++++++++++ 2 files changed, 123 insertions(+), 20 deletions(-) diff --git a/admin-compliance/app/sdk/architecture/architecture-data.ts b/admin-compliance/app/sdk/architecture/architecture-data.ts index 71a6335..1ee1290 100644 --- a/admin-compliance/app/sdk/architecture/architecture-data.ts +++ b/admin-compliance/app/sdk/architecture/architecture-data.ts @@ -228,24 +228,39 @@ export const ARCH_SERVICES: ArchService[] = [ dependsOn: ['qdrant', 'ollama', 'postgresql'], }, { - id: 'document-crawler', - name: 'Document Crawler', - nameShort: 'Crawler', + id: 'control-pipeline', + name: 'Control Pipeline', + nameShort: 'Pipeline', layer: 'backend', tech: 'Python / FastAPI', port: 8098, url: 'https://macmini:8098', - container: 'bp-compliance-document-crawler', - description: 'Dokument-Analyse (PDF, DOCX, XLSX, PPTX), Gap-Analyse, IPFS-Archivierung.', - descriptionLong: 'Der Document Crawler nimmt hochgeladene Dokumente (PDF, DOCX, XLSX, PPTX) entgegen, extrahiert deren Inhalt und fuehrt eine Gap-Analyse gegen bestehende Compliance-Anforderungen durch. Dafuer leitet er die Textinhalte an den AI Compliance SDK weiter, der die semantische Analyse uebernimmt. Abgeschlossene Dokumente koennen ueber den DSMS-Service dezentral auf IPFS archiviert werden.', - dbTables: [], - ragCollections: [], - apiEndpoints: [ - 'POST /analyze', - 'POST /gap-analysis', - 'POST /archive', + container: 'bp-core-control-pipeline', + description: 'RAG-zu-Controls Pipeline: Control Generation, Pass 0a/0b, Ontology, Dedup, Dependency Engine, Applicability.', + descriptionLong: 'Die Control Pipeline ist das Herzsttueck der automatisierten Compliance-Control-Generierung. Sie verarbeitet ~105.000 RAG-Chunks aus EU/DE-Regulierungen in 6 Phasen: (1) RAG Ingestion, (2) 7-Stufen Control Generation (Lizenz-Gate + Claude LLM), (3) Pass 0a Obligation Extraction (~181k Obligations), (4) Pass 0b Atomic Composition (MCP-taugliche Controls mit assertion/pass_criteria/fail_criteria), (5) Embedding-basierte Deduplizierung mit LLM-Verifikation, (6) Dependency Engine (5 Typen: supersedes, prerequisite, compensating_control, scope_exclusion, conditional_requirement) mit automatischer Generierung via Ontology, Pattern-Regeln und Domain Packs (DSGVO, AI Act, CRA, Security, Arbeitsrecht). 126+ Tests, alle bestanden.', + dbTables: [ + 'canonical_controls', 'obligation_candidates', 'control_parent_links', + 'control_dependencies', 'control_evaluation_results', + 'canonical_processed_chunks', 'canonical_generation_jobs', + 'control_dedup_reviews', 'control_patterns', ], - dependsOn: ['ai-compliance-sdk', 'dsms'], + ragCollections: [ + 'bp_compliance_gesetze', 'bp_compliance_datenschutz', + 'bp_compliance_ce', 'bp_dsfa_corpus', 'bp_legal_templates', + ], + apiEndpoints: [ + 'POST /v1/canonical/generate', + 'GET /v1/canonical/controls', + 'POST /v1/canonical/controls/applicable', + 'POST /v1/canonical/generate/submit-pass0b', + 'POST /v1/canonical/generate/process-batch', + 'GET /v1/canonical/generate/quality-metrics', + 'POST /v1/dependencies/generate', + 'POST /v1/dependencies/evaluate', + 'GET /v1/dependencies/graph', + 'POST /v1/document-compliance/required', + ], + dependsOn: ['postgresql', 'qdrant', 'ollama'], }, { id: 'compliance-tts', @@ -383,7 +398,7 @@ export const ARCH_EDGES: ArchEdge[] = [ // Frontend → Backend { source: 'admin-compliance', target: 'backend-compliance', label: 'REST API' }, { source: 'admin-compliance', target: 'ai-compliance-sdk', label: 'REST API' }, - { source: 'admin-compliance', target: 'document-crawler', label: 'REST API' }, + { source: 'admin-compliance', target: 'control-pipeline', label: 'REST API' }, // Backend → Infrastructure { source: 'backend-compliance', target: 'postgresql', label: 'SQLAlchemy' }, @@ -392,12 +407,9 @@ export const ARCH_EDGES: ArchEdge[] = [ { source: 'ai-compliance-sdk', target: 'ollama', label: 'LLM Inference' }, { source: 'ai-compliance-sdk', target: 'postgresql', label: 'GORM' }, { source: 'compliance-tts', target: 'minio', label: 'Audio/Video' }, - - // Backend → Backend - { source: 'document-crawler', target: 'ai-compliance-sdk', label: 'LLM Gateway' }, - - // Backend → Data Sovereignty - { source: 'document-crawler', target: 'dsms', label: 'IPFS Archive' }, + { source: 'control-pipeline', target: 'postgresql', label: 'SQLAlchemy' }, + { source: 'control-pipeline', target: 'qdrant', label: 'Embedding + Dedup' }, + { source: 'control-pipeline', target: 'ollama', label: 'LLM Dedup (qwen3.5)' }, ] // ============================================================================= diff --git a/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts b/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts index 2b4e2b4..ac7386e 100644 --- a/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts +++ b/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts @@ -250,4 +250,95 @@ export const STEPS_BETRIEB: SDKFlowStep[] = [ url: '/sdk/isms', completion: 100, }, + + // ── Control Pipeline ───────────────────────────────────────────────────── + { + id: 'control-library', + name: 'Canonical Control Library', + nameShort: 'Control Library', + package: 'betrieb', + seq: 5200, + checkpointId: 'CP-CLIB', + checkpointType: 'REQUIRED', + checkpointReviewer: 'NONE', + description: 'Verwaltung der ~33.000 Rich Controls aus dem RAG-Korpus. 7-Stufen-Pipeline mit Lizenz-Gate.', + descriptionLong: 'Die Canonical Control Library ist das zentrale Verzeichnis aller aus Regulierungstexten generierten Compliance Controls. Die 7-Stufen-Pipeline verarbeitet ~105.000 RAG-Chunks: (1) RAG Scan, (2) Lizenz-Klassifikation (Rule 1/2/3), (3a) Strukturierung (Rule 1+2) oder (3b) Reformulierung (Rule 3), (4) Harmonisierung (Embedding-Dedup), (5) Anchor Search (Open-Source-Referenzen), (6) Speicherung, (7) Chunk-Tracking. Domains: AUTH, CRYP, NET, DATA, SEC, AI, COMP, GOV, LAB, FIN u.a.', + legalBasis: 'UrhG §44b (Text & Data Mining), UrhG §23 (Hinreichender Abstand)', + inputs: ['ragChunks'], + outputs: ['canonicalControls'], + prerequisiteSteps: [], + dbTables: ['canonical_controls', 'canonical_processed_chunks', 'canonical_generation_jobs'], + dbMode: 'read/write', + ragCollections: ['bp_compliance_gesetze', 'bp_compliance_datenschutz', 'bp_compliance_ce', 'bp_dsfa_corpus', 'bp_legal_templates'], + ragPurpose: 'Quelldokumente fuer Control-Generierung (Gesetze, Verordnungen, Standards)', + isOptional: false, + url: '/sdk/control-library', + completion: 100, + }, + { + id: 'obligation-extraction', + name: 'Pass 0a: Obligation Extraction', + nameShort: 'Pass 0a', + package: 'betrieb', + seq: 5300, + checkpointId: 'CP-P0A', + checkpointType: 'REQUIRED', + checkpointReviewer: 'NONE', + description: 'Extraktion von ~181.000 normativen Pflichten aus Rich Controls via Claude Haiku (Batch API).', + descriptionLong: 'Pass 0a zerlegt jeden Rich Control in einzelne normative Obligations via Claude Haiku (Anthropic Batch API, 50% Kostenreduktion). Jede Obligation wird klassifiziert: Pflicht/Empfehlung/Kann, Test-Obligation ja/nein, Reporting-Obligation ja/nein. Quality Gate mit 6 Regeln: nur normative Aussagen, ein Hauptverb, Test/Reporting separat, kein Evidence-Level-Split. Ergebnis: ~181.000 validierte Obligations mit action, object, condition, normative_strength.', + legalBasis: 'Pipeline-intern (Normative Obligation Extraction)', + inputs: ['canonicalControls'], + outputs: ['obligationCandidates'], + prerequisiteSteps: ['control-library'], + dbTables: ['obligation_candidates'], + dbMode: 'read/write', + ragCollections: [], + isOptional: false, + url: '/sdk/control-library', + completion: 90, + }, + { + id: 'atomic-composition', + name: 'Pass 0b: Atomic Composition', + nameShort: 'Pass 0b', + package: 'betrieb', + seq: 5400, + checkpointId: 'CP-P0B', + checkpointType: 'REQUIRED', + checkpointReviewer: 'NONE', + description: 'Komposition atomarer MCP-tauglicher Controls aus Obligations via Claude Sonnet + Pre-LLM Ontology-Filter.', + descriptionLong: 'Pass 0b verwandelt jede validierte Obligation in ein eigenstaendiges atomares Control via Claude Sonnet (Anthropic Batch API). Vor dem LLM-Call klassifiziert die Control Ontology (26 Action Types) jede Obligation: atomic (an LLM senden), composite (ueberspringen), evidence (ueberspringen), framework_container (ueberspringen). MCP-taugliche Output-Felder: assertion (pruefbare Aussage), pass_criteria, fail_criteria, check_type (technical_config_check, document_clause_check, code_pattern_check), dependency_hints, lifecycle_phase_order (1-13). Canonical Key Format: action_type:normalized_object:control_phase.', + legalBasis: 'Pipeline-intern (Atomic Control Composition)', + inputs: ['obligationCandidates'], + outputs: ['atomicControls'], + prerequisiteSteps: ['obligation-extraction'], + dbTables: ['canonical_controls', 'control_parent_links'], + dbMode: 'read/write', + ragCollections: [], + isOptional: false, + url: '/sdk/control-library', + completion: 80, + }, + { + id: 'dependency-engine', + name: 'Dependency Engine + Evaluation', + nameShort: 'Dependencies', + package: 'betrieb', + seq: 5500, + checkpointId: 'CP-DEP', + checkpointType: 'REQUIRED', + checkpointReviewer: 'NONE', + description: '5 Dependency-Typen, generische Condition Language, automatische Generierung via Ontology + Domain Packs.', + descriptionLong: 'Die Dependency Engine modelliert logische Abhaengigkeiten zwischen Controls: supersedes (A ersetzt B), prerequisite (A muss vor B), compensating_control (A kompensiert B-Failure), scope_exclusion (A schliesst B aus), conditional_requirement (B nur unter Bedingung). Generische Condition Language (AND/OR/NOT + Feldoperatoren). Priority-basierte Konfliktloesung. Zykluserkennung (DFS). Automatische Generierung via: (1) Ontology (Phase-Sequenz), (2) Pattern-Regeln, (3) Domain Packs (DSGVO, AI Act, CRA, Security, Arbeitsrecht). MCP-Output mit dependency_resolution Trace.', + legalBasis: 'Pipeline-intern (Control Dependency Resolution)', + inputs: ['atomicControls'], + outputs: ['evaluatedControls', 'dependencyGraph'], + prerequisiteSteps: ['atomic-composition'], + dbTables: ['control_dependencies', 'control_evaluation_results'], + dbMode: 'read/write', + ragCollections: [], + isOptional: false, + url: '/sdk/control-library', + completion: 100, + }, ]