docs: update Architecture + SDK Flow with Control Pipeline + Dependency Engine
Architecture (architecture-data.ts): - Replace document-crawler with control-pipeline (Port 8098) - Add 9 DB tables, 5 RAG collections, 10 API endpoints - Add edges: control-pipeline → PostgreSQL, Qdrant, Ollama SDK Flow (steps-betrieb.ts): - Add 4 new steps (seq 5200-5500): - Canonical Control Library (7-stage generation pipeline) - Pass 0a: Obligation Extraction (181k obligations) - Pass 0b: Atomic Composition (MCP-taugliche controls) - Dependency Engine + Evaluation (5 types, auto-generation) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -228,24 +228,39 @@ export const ARCH_SERVICES: ArchService[] = [
|
||||
dependsOn: ['qdrant', 'ollama', 'postgresql'],
|
||||
},
|
||||
{
|
||||
id: 'document-crawler',
|
||||
name: 'Document Crawler',
|
||||
nameShort: 'Crawler',
|
||||
id: 'control-pipeline',
|
||||
name: 'Control Pipeline',
|
||||
nameShort: 'Pipeline',
|
||||
layer: 'backend',
|
||||
tech: 'Python / FastAPI',
|
||||
port: 8098,
|
||||
url: 'https://macmini:8098',
|
||||
container: 'bp-compliance-document-crawler',
|
||||
description: 'Dokument-Analyse (PDF, DOCX, XLSX, PPTX), Gap-Analyse, IPFS-Archivierung.',
|
||||
descriptionLong: 'Der Document Crawler nimmt hochgeladene Dokumente (PDF, DOCX, XLSX, PPTX) entgegen, extrahiert deren Inhalt und fuehrt eine Gap-Analyse gegen bestehende Compliance-Anforderungen durch. Dafuer leitet er die Textinhalte an den AI Compliance SDK weiter, der die semantische Analyse uebernimmt. Abgeschlossene Dokumente koennen ueber den DSMS-Service dezentral auf IPFS archiviert werden.',
|
||||
dbTables: [],
|
||||
ragCollections: [],
|
||||
apiEndpoints: [
|
||||
'POST /analyze',
|
||||
'POST /gap-analysis',
|
||||
'POST /archive',
|
||||
container: 'bp-core-control-pipeline',
|
||||
description: 'RAG-zu-Controls Pipeline: Control Generation, Pass 0a/0b, Ontology, Dedup, Dependency Engine, Applicability.',
|
||||
descriptionLong: 'Die Control Pipeline ist das Herzsttueck der automatisierten Compliance-Control-Generierung. Sie verarbeitet ~105.000 RAG-Chunks aus EU/DE-Regulierungen in 6 Phasen: (1) RAG Ingestion, (2) 7-Stufen Control Generation (Lizenz-Gate + Claude LLM), (3) Pass 0a Obligation Extraction (~181k Obligations), (4) Pass 0b Atomic Composition (MCP-taugliche Controls mit assertion/pass_criteria/fail_criteria), (5) Embedding-basierte Deduplizierung mit LLM-Verifikation, (6) Dependency Engine (5 Typen: supersedes, prerequisite, compensating_control, scope_exclusion, conditional_requirement) mit automatischer Generierung via Ontology, Pattern-Regeln und Domain Packs (DSGVO, AI Act, CRA, Security, Arbeitsrecht). 126+ Tests, alle bestanden.',
|
||||
dbTables: [
|
||||
'canonical_controls', 'obligation_candidates', 'control_parent_links',
|
||||
'control_dependencies', 'control_evaluation_results',
|
||||
'canonical_processed_chunks', 'canonical_generation_jobs',
|
||||
'control_dedup_reviews', 'control_patterns',
|
||||
],
|
||||
dependsOn: ['ai-compliance-sdk', 'dsms'],
|
||||
ragCollections: [
|
||||
'bp_compliance_gesetze', 'bp_compliance_datenschutz',
|
||||
'bp_compliance_ce', 'bp_dsfa_corpus', 'bp_legal_templates',
|
||||
],
|
||||
apiEndpoints: [
|
||||
'POST /v1/canonical/generate',
|
||||
'GET /v1/canonical/controls',
|
||||
'POST /v1/canonical/controls/applicable',
|
||||
'POST /v1/canonical/generate/submit-pass0b',
|
||||
'POST /v1/canonical/generate/process-batch',
|
||||
'GET /v1/canonical/generate/quality-metrics',
|
||||
'POST /v1/dependencies/generate',
|
||||
'POST /v1/dependencies/evaluate',
|
||||
'GET /v1/dependencies/graph',
|
||||
'POST /v1/document-compliance/required',
|
||||
],
|
||||
dependsOn: ['postgresql', 'qdrant', 'ollama'],
|
||||
},
|
||||
{
|
||||
id: 'compliance-tts',
|
||||
@@ -383,7 +398,7 @@ export const ARCH_EDGES: ArchEdge[] = [
|
||||
// Frontend → Backend
|
||||
{ source: 'admin-compliance', target: 'backend-compliance', label: 'REST API' },
|
||||
{ source: 'admin-compliance', target: 'ai-compliance-sdk', label: 'REST API' },
|
||||
{ source: 'admin-compliance', target: 'document-crawler', label: 'REST API' },
|
||||
{ source: 'admin-compliance', target: 'control-pipeline', label: 'REST API' },
|
||||
|
||||
// Backend → Infrastructure
|
||||
{ source: 'backend-compliance', target: 'postgresql', label: 'SQLAlchemy' },
|
||||
@@ -392,12 +407,9 @@ export const ARCH_EDGES: ArchEdge[] = [
|
||||
{ source: 'ai-compliance-sdk', target: 'ollama', label: 'LLM Inference' },
|
||||
{ source: 'ai-compliance-sdk', target: 'postgresql', label: 'GORM' },
|
||||
{ source: 'compliance-tts', target: 'minio', label: 'Audio/Video' },
|
||||
|
||||
// Backend → Backend
|
||||
{ source: 'document-crawler', target: 'ai-compliance-sdk', label: 'LLM Gateway' },
|
||||
|
||||
// Backend → Data Sovereignty
|
||||
{ source: 'document-crawler', target: 'dsms', label: 'IPFS Archive' },
|
||||
{ source: 'control-pipeline', target: 'postgresql', label: 'SQLAlchemy' },
|
||||
{ source: 'control-pipeline', target: 'qdrant', label: 'Embedding + Dedup' },
|
||||
{ source: 'control-pipeline', target: 'ollama', label: 'LLM Dedup (qwen3.5)' },
|
||||
]
|
||||
|
||||
// =============================================================================
|
||||
|
||||
@@ -250,4 +250,95 @@ export const STEPS_BETRIEB: SDKFlowStep[] = [
|
||||
url: '/sdk/isms',
|
||||
completion: 100,
|
||||
},
|
||||
|
||||
// ── Control Pipeline ─────────────────────────────────────────────────────
|
||||
{
|
||||
id: 'control-library',
|
||||
name: 'Canonical Control Library',
|
||||
nameShort: 'Control Library',
|
||||
package: 'betrieb',
|
||||
seq: 5200,
|
||||
checkpointId: 'CP-CLIB',
|
||||
checkpointType: 'REQUIRED',
|
||||
checkpointReviewer: 'NONE',
|
||||
description: 'Verwaltung der ~33.000 Rich Controls aus dem RAG-Korpus. 7-Stufen-Pipeline mit Lizenz-Gate.',
|
||||
descriptionLong: 'Die Canonical Control Library ist das zentrale Verzeichnis aller aus Regulierungstexten generierten Compliance Controls. Die 7-Stufen-Pipeline verarbeitet ~105.000 RAG-Chunks: (1) RAG Scan, (2) Lizenz-Klassifikation (Rule 1/2/3), (3a) Strukturierung (Rule 1+2) oder (3b) Reformulierung (Rule 3), (4) Harmonisierung (Embedding-Dedup), (5) Anchor Search (Open-Source-Referenzen), (6) Speicherung, (7) Chunk-Tracking. Domains: AUTH, CRYP, NET, DATA, SEC, AI, COMP, GOV, LAB, FIN u.a.',
|
||||
legalBasis: 'UrhG §44b (Text & Data Mining), UrhG §23 (Hinreichender Abstand)',
|
||||
inputs: ['ragChunks'],
|
||||
outputs: ['canonicalControls'],
|
||||
prerequisiteSteps: [],
|
||||
dbTables: ['canonical_controls', 'canonical_processed_chunks', 'canonical_generation_jobs'],
|
||||
dbMode: 'read/write',
|
||||
ragCollections: ['bp_compliance_gesetze', 'bp_compliance_datenschutz', 'bp_compliance_ce', 'bp_dsfa_corpus', 'bp_legal_templates'],
|
||||
ragPurpose: 'Quelldokumente fuer Control-Generierung (Gesetze, Verordnungen, Standards)',
|
||||
isOptional: false,
|
||||
url: '/sdk/control-library',
|
||||
completion: 100,
|
||||
},
|
||||
{
|
||||
id: 'obligation-extraction',
|
||||
name: 'Pass 0a: Obligation Extraction',
|
||||
nameShort: 'Pass 0a',
|
||||
package: 'betrieb',
|
||||
seq: 5300,
|
||||
checkpointId: 'CP-P0A',
|
||||
checkpointType: 'REQUIRED',
|
||||
checkpointReviewer: 'NONE',
|
||||
description: 'Extraktion von ~181.000 normativen Pflichten aus Rich Controls via Claude Haiku (Batch API).',
|
||||
descriptionLong: 'Pass 0a zerlegt jeden Rich Control in einzelne normative Obligations via Claude Haiku (Anthropic Batch API, 50% Kostenreduktion). Jede Obligation wird klassifiziert: Pflicht/Empfehlung/Kann, Test-Obligation ja/nein, Reporting-Obligation ja/nein. Quality Gate mit 6 Regeln: nur normative Aussagen, ein Hauptverb, Test/Reporting separat, kein Evidence-Level-Split. Ergebnis: ~181.000 validierte Obligations mit action, object, condition, normative_strength.',
|
||||
legalBasis: 'Pipeline-intern (Normative Obligation Extraction)',
|
||||
inputs: ['canonicalControls'],
|
||||
outputs: ['obligationCandidates'],
|
||||
prerequisiteSteps: ['control-library'],
|
||||
dbTables: ['obligation_candidates'],
|
||||
dbMode: 'read/write',
|
||||
ragCollections: [],
|
||||
isOptional: false,
|
||||
url: '/sdk/control-library',
|
||||
completion: 90,
|
||||
},
|
||||
{
|
||||
id: 'atomic-composition',
|
||||
name: 'Pass 0b: Atomic Composition',
|
||||
nameShort: 'Pass 0b',
|
||||
package: 'betrieb',
|
||||
seq: 5400,
|
||||
checkpointId: 'CP-P0B',
|
||||
checkpointType: 'REQUIRED',
|
||||
checkpointReviewer: 'NONE',
|
||||
description: 'Komposition atomarer MCP-tauglicher Controls aus Obligations via Claude Sonnet + Pre-LLM Ontology-Filter.',
|
||||
descriptionLong: 'Pass 0b verwandelt jede validierte Obligation in ein eigenstaendiges atomares Control via Claude Sonnet (Anthropic Batch API). Vor dem LLM-Call klassifiziert die Control Ontology (26 Action Types) jede Obligation: atomic (an LLM senden), composite (ueberspringen), evidence (ueberspringen), framework_container (ueberspringen). MCP-taugliche Output-Felder: assertion (pruefbare Aussage), pass_criteria, fail_criteria, check_type (technical_config_check, document_clause_check, code_pattern_check), dependency_hints, lifecycle_phase_order (1-13). Canonical Key Format: action_type:normalized_object:control_phase.',
|
||||
legalBasis: 'Pipeline-intern (Atomic Control Composition)',
|
||||
inputs: ['obligationCandidates'],
|
||||
outputs: ['atomicControls'],
|
||||
prerequisiteSteps: ['obligation-extraction'],
|
||||
dbTables: ['canonical_controls', 'control_parent_links'],
|
||||
dbMode: 'read/write',
|
||||
ragCollections: [],
|
||||
isOptional: false,
|
||||
url: '/sdk/control-library',
|
||||
completion: 80,
|
||||
},
|
||||
{
|
||||
id: 'dependency-engine',
|
||||
name: 'Dependency Engine + Evaluation',
|
||||
nameShort: 'Dependencies',
|
||||
package: 'betrieb',
|
||||
seq: 5500,
|
||||
checkpointId: 'CP-DEP',
|
||||
checkpointType: 'REQUIRED',
|
||||
checkpointReviewer: 'NONE',
|
||||
description: '5 Dependency-Typen, generische Condition Language, automatische Generierung via Ontology + Domain Packs.',
|
||||
descriptionLong: 'Die Dependency Engine modelliert logische Abhaengigkeiten zwischen Controls: supersedes (A ersetzt B), prerequisite (A muss vor B), compensating_control (A kompensiert B-Failure), scope_exclusion (A schliesst B aus), conditional_requirement (B nur unter Bedingung). Generische Condition Language (AND/OR/NOT + Feldoperatoren). Priority-basierte Konfliktloesung. Zykluserkennung (DFS). Automatische Generierung via: (1) Ontology (Phase-Sequenz), (2) Pattern-Regeln, (3) Domain Packs (DSGVO, AI Act, CRA, Security, Arbeitsrecht). MCP-Output mit dependency_resolution Trace.',
|
||||
legalBasis: 'Pipeline-intern (Control Dependency Resolution)',
|
||||
inputs: ['atomicControls'],
|
||||
outputs: ['evaluatedControls', 'dependencyGraph'],
|
||||
prerequisiteSteps: ['atomic-composition'],
|
||||
dbTables: ['control_dependencies', 'control_evaluation_results'],
|
||||
dbMode: 'read/write',
|
||||
ragCollections: [],
|
||||
isOptional: false,
|
||||
url: '/sdk/control-library',
|
||||
completion: 100,
|
||||
},
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user