Files
breakpilot-compliance/admin-compliance/app/api/sdk/drafting-engine/draft/route.ts
Benjamin Admin 14a99322eb
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 35s
CI / test-python-backend-compliance (push) Successful in 26s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 19s
feat: Phase 2 — RAG integration in Requirements + DSFA Draft
Add legal context enrichment from Qdrant vector corpus to the two
highest-priority modules (Requirements AI assistant and DSFA drafting
engine).

Go SDK:
- Add SearchCollection() with collection override + whitelist validation
- Refactor Search() to delegate to shared searchInternal()

Python backend:
- New ComplianceRAGClient proxying POST /sdk/v1/rag/search (error-tolerant)
- AI assistant: enrich interpret_requirement() and suggest_controls() with RAG
- Requirements API: add ?include_legal_context=true query parameter

Admin (Next.js):
- Extract shared queryRAG() utility from chat route
- Inject RAG legal context into v1 and v2 draft pipelines

Tests for all three layers (Go, Python, TypeScript shared utility).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 08:57:39 +01:00

615 lines
22 KiB
TypeScript

/**
* Drafting Engine - Draft API v2
*
* Erstellt personalisierte Compliance-Dokument-Entwuerfe.
* Pipeline: Constraint → Context → Sanitize → LLM → Validate → Repair → Merge
*
* v1-Modus: ?v=1 oder fehlender v2-Kontext → Legacy-Pipeline
* v2-Modus: Standard — Personalisierte Prosa mit Governance
*/
import { NextRequest, NextResponse } from 'next/server'
const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434'
const LLM_MODEL = process.env.COMPLIANCE_LLM_MODEL || 'qwen2.5vl:32b'
// v1 imports (Legacy)
import { buildVVTDraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-vvt'
import { buildTOMDraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-tom'
import { buildDSFADraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-dsfa'
import { buildPrivacyPolicyDraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-privacy-policy'
import { buildLoeschfristenDraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-loeschfristen'
import type { DraftContext, DraftResponse, DraftRevision, DraftSection } from '@/lib/sdk/drafting-engine/types'
import type { ScopeDocumentType } from '@/lib/sdk/compliance-scope-types'
import { ConstraintEnforcer } from '@/lib/sdk/drafting-engine/constraint-enforcer'
// v2 imports (Personalisierte Pipeline)
import { deriveNarrativeTags, extractScoresFromDraftContext, narrativeTagsToPromptString } from '@/lib/sdk/drafting-engine/narrative-tags'
import type { NarrativeTags } from '@/lib/sdk/drafting-engine/narrative-tags'
import { buildAllowedFactsFromDraftContext, allowedFactsToPromptString, disallowedTopicsToPromptString } from '@/lib/sdk/drafting-engine/allowed-facts-v2'
import { sanitizeAllowedFacts, validateNoRemainingPII, SanitizationError } from '@/lib/sdk/drafting-engine/sanitizer'
import { terminologyToPromptString, styleContractToPromptString } from '@/lib/sdk/drafting-engine/terminology'
import { executeRepairLoop, type ProseBlockOutput, type RepairAudit } from '@/lib/sdk/drafting-engine/prose-validator'
import { ProseCacheManager, computeChecksumSync, type CacheKeyParams } from '@/lib/sdk/drafting-engine/cache'
import { queryRAG } from '@/lib/sdk/drafting-engine/rag-query'
// ============================================================================
// Shared State
// ============================================================================
const constraintEnforcer = new ConstraintEnforcer()
const proseCache = new ProseCacheManager({ maxEntries: 200, ttlHours: 24 })
// Template/Terminology Versionen (fuer Cache-Key)
const TEMPLATE_VERSION = '2.0.0'
const TERMINOLOGY_VERSION = '1.0.0'
const VALIDATOR_VERSION = '1.0.0'
// ============================================================================
// v1 Legacy Pipeline
// ============================================================================
const V1_SYSTEM_PROMPT = `Du bist ein DSGVO-Compliance-Experte und erstellst strukturierte Dokument-Entwuerfe.
Du MUSST immer im JSON-Format antworten mit einem "sections" Array.
Jede Section hat: id, title, content, schemaField.
Halte die Tiefe strikt am vorgegebenen Level.
Markiere fehlende Informationen mit [PLATZHALTER: Beschreibung].
Sprache: Deutsch.`
function buildPromptForDocumentType(
documentType: ScopeDocumentType,
context: DraftContext,
instructions?: string
): string {
switch (documentType) {
case 'vvt':
return buildVVTDraftPrompt({ context, instructions })
case 'tom':
return buildTOMDraftPrompt({ context, instructions })
case 'dsfa':
return buildDSFADraftPrompt({ context, instructions })
case 'dsi':
return buildPrivacyPolicyDraftPrompt({ context, instructions })
case 'lf':
return buildLoeschfristenDraftPrompt({ context, instructions })
default:
return `## Aufgabe: Entwurf fuer ${documentType}
### Level: ${context.decisions.level}
### Tiefe: ${context.constraints.depthRequirements.depth}
### Erforderliche Inhalte:
${context.constraints.depthRequirements.detailItems.map((item, i) => `${i + 1}. ${item}`).join('\n')}
${instructions ? `### Anweisungen: ${instructions}` : ''}
Antworte als JSON mit "sections" Array.`
}
}
async function handleV1Draft(body: Record<string, unknown>): Promise<NextResponse> {
const { documentType, draftContext, instructions, existingDraft } = body as {
documentType: ScopeDocumentType
draftContext: DraftContext
instructions?: string
existingDraft?: DraftRevision
}
const constraintCheck = constraintEnforcer.checkFromContext(documentType, draftContext)
if (!constraintCheck.allowed) {
return NextResponse.json({
draft: null,
constraintCheck,
tokensUsed: 0,
error: 'Constraint-Verletzung: ' + constraintCheck.violations.join('; '),
}, { status: 403 })
}
// RAG: Fetch relevant legal context
const ragQuery = documentType === 'dsfa'
? 'Datenschutz-Folgenabschaetzung Art. 35 DSGVO Risikobewertung'
: `${documentType} DSGVO Compliance Anforderungen`
const ragContext = await queryRAG(ragQuery)
let v1SystemPrompt = V1_SYSTEM_PROMPT
if (ragContext) {
v1SystemPrompt += `\n\n## Relevanter Rechtskontext\n${ragContext}`
}
const draftPrompt = buildPromptForDocumentType(documentType, draftContext, instructions)
const messages = [
{ role: 'system', content: v1SystemPrompt },
...(existingDraft ? [{
role: 'assistant',
content: `Bisheriger Entwurf:\n${JSON.stringify(existingDraft.sections, null, 2)}`,
}] : []),
{ role: 'user', content: draftPrompt },
]
const ollamaResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: LLM_MODEL,
messages,
stream: false,
options: { temperature: 0.15, num_predict: 16384 },
format: 'json',
}),
signal: AbortSignal.timeout(180000),
})
if (!ollamaResponse.ok) {
return NextResponse.json(
{ error: `LLM nicht erreichbar (Status ${ollamaResponse.status})` },
{ status: 502 }
)
}
const result = await ollamaResponse.json()
const content = result.message?.content || ''
let sections: DraftSection[] = []
try {
const parsed = JSON.parse(content)
sections = (parsed.sections || []).map((s: Record<string, unknown>, i: number) => ({
id: String(s.id || `section-${i}`),
title: String(s.title || ''),
content: String(s.content || ''),
schemaField: s.schemaField ? String(s.schemaField) : undefined,
}))
} catch {
sections = [{ id: 'raw', title: 'Entwurf', content }]
}
const draft: DraftRevision = {
id: `draft-${Date.now()}`,
content: sections.map(s => `## ${s.title}\n\n${s.content}`).join('\n\n'),
sections,
createdAt: new Date().toISOString(),
instruction: instructions as string | undefined,
}
return NextResponse.json({
draft,
constraintCheck,
tokensUsed: result.eval_count || 0,
} satisfies DraftResponse)
}
// ============================================================================
// v2 Personalisierte Pipeline
// ============================================================================
/** Prose block definitions per document type */
const DOCUMENT_PROSE_BLOCKS: Record<string, Array<{ blockId: string; blockType: ProseBlockOutput['blockType']; sectionName: string; targetWords: number }>> = {
tom: [
{ blockId: 'tom-intro', blockType: 'introduction', sectionName: 'Einleitung TOM', targetWords: 120 },
{ blockId: 'tom-transition', blockType: 'transition', sectionName: 'Ueberleitung Massnahmen', targetWords: 40 },
{ blockId: 'tom-conclusion', blockType: 'conclusion', sectionName: 'Fazit TOM', targetWords: 80 },
],
dsfa: [
{ blockId: 'dsfa-intro', blockType: 'introduction', sectionName: 'Einleitung DSFA', targetWords: 150 },
{ blockId: 'dsfa-transition', blockType: 'transition', sectionName: 'Ueberleitung Risikobewertung', targetWords: 40 },
{ blockId: 'dsfa-appreciation', blockType: 'appreciation', sectionName: 'Wuerdigung bestehender Massnahmen', targetWords: 60 },
{ blockId: 'dsfa-conclusion', blockType: 'conclusion', sectionName: 'Fazit DSFA', targetWords: 100 },
],
vvt: [
{ blockId: 'vvt-intro', blockType: 'introduction', sectionName: 'Einleitung VVT', targetWords: 120 },
{ blockId: 'vvt-conclusion', blockType: 'conclusion', sectionName: 'Fazit VVT', targetWords: 80 },
],
dsi: [
{ blockId: 'dsi-intro', blockType: 'introduction', sectionName: 'Einleitung Datenschutzerklaerung', targetWords: 130 },
{ blockId: 'dsi-conclusion', blockType: 'conclusion', sectionName: 'Fazit Datenschutzerklaerung', targetWords: 80 },
],
lf: [
{ blockId: 'lf-intro', blockType: 'introduction', sectionName: 'Einleitung Loeschfristen', targetWords: 100 },
{ blockId: 'lf-conclusion', blockType: 'conclusion', sectionName: 'Fazit Loeschfristen', targetWords: 60 },
],
}
function buildV2SystemPrompt(
sanitizedFactsString: string,
narrativeTagsString: string,
terminologyString: string,
styleString: string,
disallowedString: string,
companyName: string,
blockId: string,
blockType: string,
sectionName: string,
documentType: string,
targetWords: number
): string {
return `Du bist ein Compliance-Dokumenten-Redakteur.
Du schreibst einzelne Textabschnitte fuer offizielle Compliance-Dokumente.
KUNDENPROFIL (ERLAUBTE FAKTEN — nur diese darfst du verwenden):
${sanitizedFactsString}
BEWERTUNGSERGEBNIS (sprachliche Tags — verwende nur diese Begriffe):
${narrativeTagsString}
TERMINOLOGIE (verwende ausschliesslich diese Fachbegriffe):
${terminologyString}
STIL:
${styleString}
VERBOTENE INHALTE:
${disallowedString}
- Keine konkreten Prozentwerte, Scores oder Zahlen
- Keine Compliance-Level-Bezeichnungen (L1, L2, L3, L4)
- Keine direkte Ansprache ("Sie", "Ihr")
- Kein Denglisch, keine Marketing-Sprache, keine Superlative
STRIKTE REGELN:
1. Verwende den Firmennamen "${companyName}" — nie "Ihr Unternehmen"
2. Schreibe in der dritten Person ("Die ${companyName}...")
3. Beziehe dich auf die Branche und organisatorische Merkmale
4. Verwende NUR Fakten aus dem Kundenprofil oben
5. Verwende NUR die sprachlichen Tags aus dem Bewertungsergebnis
6. Erfinde KEINE zusaetzlichen Fakten oder Bewertungen
7. Halte dich an die Terminologie-Vorgaben
8. Dein Text wird ZWISCHEN feste Datentabellen eingefuegt
OUTPUT-FORMAT: Antworte ausschliesslich als JSON:
{
"blockId": "${blockId}",
"blockType": "${blockType}",
"language": "de",
"text": "...",
"assertions": {
"companyNameUsed": true/false,
"industryReferenced": true/false,
"structureReferenced": true/false,
"itLandscapeReferenced": true/false,
"narrativeTagsUsed": ["riskSummary", ...]
},
"forbiddenContentDetected": []
}
DOKUMENTENTYP: ${documentType}
SEKTION: ${sectionName}
BLOCK-TYP: ${blockType}
ZIEL-LAENGE: ${targetWords} Woerter`
}
function buildBlockSpecificPrompt(blockType: string, sectionName: string, documentType: string): string {
switch (blockType) {
case 'introduction':
return `Schreibe eine Einleitung fuer das Dokument "${documentType}" (Sektion: ${sectionName}).
Erklaere, warum dieses Dokument fuer das Unternehmen erstellt wurde.
Gehe auf die spezifische Situation des Unternehmens ein.
Erwaehne die Branche, die Organisationsform und die IT-Strategie.`
case 'transition':
return `Schreibe eine kurze Ueberleitung zur naechsten Sektion "${sectionName}".
Verknuepfe den vorherigen Abschnitt logisch mit dem folgenden.`
case 'conclusion':
return `Schreibe einen abschliessenden Absatz fuer die Sektion "${sectionName}".
Fasse die wesentlichen Punkte zusammen und verweise auf die fortlaufende Pflege.`
case 'appreciation':
return `Schreibe einen wertschaetzenden Satz ueber die bestehenden Massnahmen.
Verwende dabei die sprachlichen Tags aus dem Bewertungsergebnis.
Keine neuen Fakten erfinden — nur das Profil wuerdigen.`
default:
return `Schreibe einen Textabschnitt fuer "${sectionName}".`
}
}
async function callOllama(systemPrompt: string, userPrompt: string): Promise<string> {
const response = await fetch(`${OLLAMA_URL}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: LLM_MODEL,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
stream: false,
options: { temperature: 0.15, num_predict: 4096 },
format: 'json',
}),
signal: AbortSignal.timeout(120000),
})
if (!response.ok) {
throw new Error(`Ollama error: ${response.status}`)
}
const result = await response.json()
return result.message?.content || ''
}
async function handleV2Draft(body: Record<string, unknown>): Promise<NextResponse> {
const { documentType, draftContext, instructions } = body as {
documentType: ScopeDocumentType
draftContext: DraftContext
instructions?: string
}
// Step 1: Constraint Check (Hard Gate)
const constraintCheck = constraintEnforcer.checkFromContext(documentType, draftContext)
if (!constraintCheck.allowed) {
return NextResponse.json({
draft: null,
constraintCheck,
tokensUsed: 0,
error: 'Constraint-Verletzung: ' + constraintCheck.violations.join('; '),
}, { status: 403 })
}
// Step 2: Derive Narrative Tags (deterministisch)
const scores = extractScoresFromDraftContext(draftContext)
const narrativeTags: NarrativeTags = deriveNarrativeTags(scores)
// Step 3: Build Allowed Facts
const allowedFacts = buildAllowedFactsFromDraftContext(draftContext, narrativeTags)
// Step 4: PII Sanitization
let sanitizationResult
try {
sanitizationResult = sanitizeAllowedFacts(allowedFacts)
} catch (error) {
if (error instanceof SanitizationError) {
return NextResponse.json({
error: `Sanitization Hard Abort: ${error.message} (Feld: ${error.field})`,
draft: null,
constraintCheck,
tokensUsed: 0,
}, { status: 422 })
}
throw error
}
const sanitizedFacts = sanitizationResult.facts
// Verify no remaining PII
const piiWarnings = validateNoRemainingPII(sanitizedFacts)
if (piiWarnings.length > 0) {
console.warn('PII-Warnungen nach Sanitization:', piiWarnings)
}
// Step 5: Build prompt components
const factsString = allowedFactsToPromptString(sanitizedFacts)
const tagsString = narrativeTagsToPromptString(narrativeTags)
const termsString = terminologyToPromptString()
const styleString = styleContractToPromptString()
const disallowedString = disallowedTopicsToPromptString()
// Compute prompt hash for audit
const promptHash = computeChecksumSync({ factsString, tagsString, termsString, styleString, disallowedString })
// Step 5b: RAG Legal Context
const v2RagQuery = documentType === 'dsfa'
? 'DSFA Art. 35 DSGVO Risikobewertung Massnahmen Datenschutz-Folgenabschaetzung'
: `${documentType} DSGVO Compliance`
const v2RagContext = await queryRAG(v2RagQuery)
// Step 6: Generate Prose Blocks (with cache + repair loop)
const proseBlocks = DOCUMENT_PROSE_BLOCKS[documentType] || DOCUMENT_PROSE_BLOCKS.tom
const generatedBlocks: ProseBlockOutput[] = []
const repairAudits: RepairAudit[] = []
let totalTokens = 0
for (const blockDef of proseBlocks) {
// Check cache
const cacheParams: CacheKeyParams = {
allowedFacts: sanitizedFacts,
templateVersion: TEMPLATE_VERSION,
terminologyVersion: TERMINOLOGY_VERSION,
narrativeTags,
promptHash,
blockType: blockDef.blockType,
sectionName: blockDef.sectionName,
}
const cached = proseCache.getSync(cacheParams)
if (cached) {
generatedBlocks.push(cached)
repairAudits.push({
repairAttempts: 0,
validatorFailures: [],
repairSuccessful: true,
fallbackUsed: false,
})
continue
}
// Build prompts
let systemPrompt = buildV2SystemPrompt(
factsString, tagsString, termsString, styleString, disallowedString,
sanitizedFacts.companyName,
blockDef.blockId, blockDef.blockType, blockDef.sectionName,
documentType, blockDef.targetWords
)
if (v2RagContext) {
systemPrompt += `\n\nRECHTSKONTEXT (als Referenz, nicht woertlich uebernehmen):\n${v2RagContext}`
}
const userPrompt = buildBlockSpecificPrompt(
blockDef.blockType, blockDef.sectionName, documentType
) + (instructions ? `\n\nZusaetzliche Anweisungen: ${instructions}` : '')
// Call LLM + Repair Loop
try {
const rawOutput = await callOllama(systemPrompt, userPrompt)
totalTokens += rawOutput.length / 4 // Rough token estimate
const { block, audit } = await executeRepairLoop(
rawOutput,
sanitizedFacts,
narrativeTags,
blockDef.blockId,
blockDef.blockType,
async (repairPrompt) => callOllama(systemPrompt, repairPrompt),
documentType
)
generatedBlocks.push(block)
repairAudits.push(audit)
// Cache successful blocks (not fallbacks)
if (!audit.fallbackUsed) {
proseCache.setSync(cacheParams, block)
}
} catch (error) {
// LLM unreachable → Fallback
const { buildFallbackBlock } = await import('@/lib/sdk/drafting-engine/prose-validator')
generatedBlocks.push(
buildFallbackBlock(blockDef.blockId, blockDef.blockType, sanitizedFacts, documentType)
)
repairAudits.push({
repairAttempts: 0,
validatorFailures: [[(error as Error).message]],
repairSuccessful: false,
fallbackUsed: true,
fallbackReason: `LLM-Fehler: ${(error as Error).message}`,
})
}
}
// Step 7: Build v1-compatible draft sections from prose blocks + original prompt
const draftPrompt = buildPromptForDocumentType(documentType, draftContext, instructions)
// Also generate data sections via legacy pipeline
let dataSections: DraftSection[] = []
try {
const dataResponse = await callOllama(V1_SYSTEM_PROMPT, draftPrompt)
const parsed = JSON.parse(dataResponse)
dataSections = (parsed.sections || []).map((s: Record<string, unknown>, i: number) => ({
id: String(s.id || `section-${i}`),
title: String(s.title || ''),
content: String(s.content || ''),
schemaField: s.schemaField ? String(s.schemaField) : undefined,
}))
totalTokens += dataResponse.length / 4
} catch {
dataSections = []
}
// Merge: Prose intro → Data sections → Prose transitions/conclusion
const introBlock = generatedBlocks.find(b => b.blockType === 'introduction')
const transitionBlocks = generatedBlocks.filter(b => b.blockType === 'transition')
const appreciationBlocks = generatedBlocks.filter(b => b.blockType === 'appreciation')
const conclusionBlock = generatedBlocks.find(b => b.blockType === 'conclusion')
const mergedSections: DraftSection[] = []
if (introBlock) {
mergedSections.push({
id: introBlock.blockId,
title: 'Einleitung',
content: introBlock.text,
})
}
for (let i = 0; i < dataSections.length; i++) {
// Insert transition before data section (if available)
if (i > 0 && transitionBlocks[i - 1]) {
mergedSections.push({
id: transitionBlocks[i - 1].blockId,
title: '',
content: transitionBlocks[i - 1].text,
})
}
mergedSections.push(dataSections[i])
}
for (const block of appreciationBlocks) {
mergedSections.push({
id: block.blockId,
title: 'Wuerdigung',
content: block.text,
})
}
if (conclusionBlock) {
mergedSections.push({
id: conclusionBlock.blockId,
title: 'Fazit',
content: conclusionBlock.text,
})
}
// If no data sections generated, use prose blocks as sections
const finalSections = mergedSections.length > 0 ? mergedSections : generatedBlocks.map(b => ({
id: b.blockId,
title: b.blockType === 'introduction' ? 'Einleitung' :
b.blockType === 'conclusion' ? 'Fazit' :
b.blockType === 'appreciation' ? 'Wuerdigung' : 'Ueberleitung',
content: b.text,
}))
const draft: DraftRevision = {
id: `draft-v2-${Date.now()}`,
content: finalSections.map(s => s.title ? `## ${s.title}\n\n${s.content}` : s.content).join('\n\n'),
sections: finalSections,
createdAt: new Date().toISOString(),
instruction: instructions,
}
// Step 8: Build Audit Trail
const auditTrail = {
documentType,
templateVersion: TEMPLATE_VERSION,
terminologyVersion: TERMINOLOGY_VERSION,
validatorVersion: VALIDATOR_VERSION,
promptHash,
llmModel: LLM_MODEL,
llmTemperature: 0.15,
llmProvider: 'ollama',
narrativeTags,
sanitization: sanitizationResult.audit,
repairAudits,
proseBlocks: generatedBlocks.map((b, i) => ({
blockId: b.blockId,
blockType: b.blockType,
wordCount: b.text.split(/\s+/).filter(Boolean).length,
fallbackUsed: repairAudits[i]?.fallbackUsed ?? false,
repairAttempts: repairAudits[i]?.repairAttempts ?? 0,
})),
cacheStats: proseCache.getStats(),
}
return NextResponse.json({
draft,
constraintCheck,
tokensUsed: Math.round(totalTokens),
pipelineVersion: 'v2',
auditTrail,
})
}
// ============================================================================
// Route Handler
// ============================================================================
export async function POST(request: NextRequest) {
try {
const body = await request.json()
const { documentType, draftContext } = body
if (!documentType || !draftContext) {
return NextResponse.json(
{ error: 'documentType und draftContext sind erforderlich' },
{ status: 400 }
)
}
// v1 fallback: explicit ?v=1 parameter
const version = request.nextUrl.searchParams.get('v')
if (version === '1') {
return handleV1Draft(body)
}
// Default: v2 pipeline
return handleV2Draft(body)
} catch (error) {
console.error('Draft generation error:', error)
return NextResponse.json(
{ error: 'Draft-Generierung fehlgeschlagen.' },
{ status: 503 }
)
}
}