feat(sdk,iace): add Personalized Drafting Pipeline v2 and IACE engine
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
/**
|
||||
* Drafting Engine - Draft API
|
||||
* Drafting Engine - Draft API v2
|
||||
*
|
||||
* Erstellt strukturierte Compliance-Dokument-Entwuerfe.
|
||||
* Baut dokument-spezifische Prompts aus SOUL-Template + State-Projection.
|
||||
* Gibt strukturiertes JSON zurueck.
|
||||
* Erstellt personalisierte Compliance-Dokument-Entwuerfe.
|
||||
* Pipeline: Constraint → Context → Sanitize → LLM → Validate → Repair → Merge
|
||||
*
|
||||
* v1-Modus: ?v=1 oder fehlender v2-Kontext → Legacy-Pipeline
|
||||
* v2-Modus: Standard — Personalisierte Prosa mit Governance
|
||||
*/
|
||||
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
@@ -11,7 +13,7 @@ import { NextRequest, NextResponse } from 'next/server'
|
||||
const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434'
|
||||
const LLM_MODEL = process.env.COMPLIANCE_LLM_MODEL || 'qwen2.5vl:32b'
|
||||
|
||||
// Import prompt builders
|
||||
// v1 imports (Legacy)
|
||||
import { buildVVTDraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-vvt'
|
||||
import { buildTOMDraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-tom'
|
||||
import { buildDSFADraftPrompt } from '@/lib/sdk/drafting-engine/prompts/draft-dsfa'
|
||||
@@ -21,9 +23,32 @@ import type { DraftContext, DraftResponse, DraftRevision, DraftSection } from '@
|
||||
import type { ScopeDocumentType } from '@/lib/sdk/compliance-scope-types'
|
||||
import { ConstraintEnforcer } from '@/lib/sdk/drafting-engine/constraint-enforcer'
|
||||
|
||||
const constraintEnforcer = new ConstraintEnforcer()
|
||||
// v2 imports (Personalisierte Pipeline)
|
||||
import { deriveNarrativeTags, extractScoresFromDraftContext, narrativeTagsToPromptString } from '@/lib/sdk/drafting-engine/narrative-tags'
|
||||
import type { NarrativeTags } from '@/lib/sdk/drafting-engine/narrative-tags'
|
||||
import { buildAllowedFactsFromDraftContext, allowedFactsToPromptString, disallowedTopicsToPromptString } from '@/lib/sdk/drafting-engine/allowed-facts-v2'
|
||||
import { sanitizeAllowedFacts, validateNoRemainingPII, SanitizationError } from '@/lib/sdk/drafting-engine/sanitizer'
|
||||
import { terminologyToPromptString, styleContractToPromptString } from '@/lib/sdk/drafting-engine/terminology'
|
||||
import { executeRepairLoop, type ProseBlockOutput, type RepairAudit } from '@/lib/sdk/drafting-engine/prose-validator'
|
||||
import { ProseCacheManager, computeChecksumSync, type CacheKeyParams } from '@/lib/sdk/drafting-engine/cache'
|
||||
|
||||
const DRAFTING_SYSTEM_PROMPT = `Du bist ein DSGVO-Compliance-Experte und erstellst strukturierte Dokument-Entwuerfe.
|
||||
// ============================================================================
|
||||
// Shared State
|
||||
// ============================================================================
|
||||
|
||||
const constraintEnforcer = new ConstraintEnforcer()
|
||||
const proseCache = new ProseCacheManager({ maxEntries: 200, ttlHours: 24 })
|
||||
|
||||
// Template/Terminology Versionen (fuer Cache-Key)
|
||||
const TEMPLATE_VERSION = '2.0.0'
|
||||
const TERMINOLOGY_VERSION = '1.0.0'
|
||||
const VALIDATOR_VERSION = '1.0.0'
|
||||
|
||||
// ============================================================================
|
||||
// v1 Legacy Pipeline
|
||||
// ============================================================================
|
||||
|
||||
const V1_SYSTEM_PROMPT = `Du bist ein DSGVO-Compliance-Experte und erstellst strukturierte Dokument-Entwuerfe.
|
||||
Du MUSST immer im JSON-Format antworten mit einem "sections" Array.
|
||||
Jede Section hat: id, title, content, schemaField.
|
||||
Halte die Tiefe strikt am vorgegebenen Level.
|
||||
@@ -60,10 +85,488 @@ Antworte als JSON mit "sections" Array.`
|
||||
}
|
||||
}
|
||||
|
||||
async function handleV1Draft(body: Record<string, unknown>): Promise<NextResponse> {
|
||||
const { documentType, draftContext, instructions, existingDraft } = body as {
|
||||
documentType: ScopeDocumentType
|
||||
draftContext: DraftContext
|
||||
instructions?: string
|
||||
existingDraft?: DraftRevision
|
||||
}
|
||||
|
||||
const constraintCheck = constraintEnforcer.checkFromContext(documentType, draftContext)
|
||||
if (!constraintCheck.allowed) {
|
||||
return NextResponse.json({
|
||||
draft: null,
|
||||
constraintCheck,
|
||||
tokensUsed: 0,
|
||||
error: 'Constraint-Verletzung: ' + constraintCheck.violations.join('; '),
|
||||
}, { status: 403 })
|
||||
}
|
||||
|
||||
const draftPrompt = buildPromptForDocumentType(documentType, draftContext, instructions)
|
||||
const messages = [
|
||||
{ role: 'system', content: V1_SYSTEM_PROMPT },
|
||||
...(existingDraft ? [{
|
||||
role: 'assistant',
|
||||
content: `Bisheriger Entwurf:\n${JSON.stringify(existingDraft.sections, null, 2)}`,
|
||||
}] : []),
|
||||
{ role: 'user', content: draftPrompt },
|
||||
]
|
||||
|
||||
const ollamaResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: LLM_MODEL,
|
||||
messages,
|
||||
stream: false,
|
||||
options: { temperature: 0.15, num_predict: 16384 },
|
||||
format: 'json',
|
||||
}),
|
||||
signal: AbortSignal.timeout(180000),
|
||||
})
|
||||
|
||||
if (!ollamaResponse.ok) {
|
||||
return NextResponse.json(
|
||||
{ error: `LLM nicht erreichbar (Status ${ollamaResponse.status})` },
|
||||
{ status: 502 }
|
||||
)
|
||||
}
|
||||
|
||||
const result = await ollamaResponse.json()
|
||||
const content = result.message?.content || ''
|
||||
|
||||
let sections: DraftSection[] = []
|
||||
try {
|
||||
const parsed = JSON.parse(content)
|
||||
sections = (parsed.sections || []).map((s: Record<string, unknown>, i: number) => ({
|
||||
id: String(s.id || `section-${i}`),
|
||||
title: String(s.title || ''),
|
||||
content: String(s.content || ''),
|
||||
schemaField: s.schemaField ? String(s.schemaField) : undefined,
|
||||
}))
|
||||
} catch {
|
||||
sections = [{ id: 'raw', title: 'Entwurf', content }]
|
||||
}
|
||||
|
||||
const draft: DraftRevision = {
|
||||
id: `draft-${Date.now()}`,
|
||||
content: sections.map(s => `## ${s.title}\n\n${s.content}`).join('\n\n'),
|
||||
sections,
|
||||
createdAt: new Date().toISOString(),
|
||||
instruction: instructions as string | undefined,
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
draft,
|
||||
constraintCheck,
|
||||
tokensUsed: result.eval_count || 0,
|
||||
} satisfies DraftResponse)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// v2 Personalisierte Pipeline
|
||||
// ============================================================================
|
||||
|
||||
/** Prose block definitions per document type */
|
||||
const DOCUMENT_PROSE_BLOCKS: Record<string, Array<{ blockId: string; blockType: ProseBlockOutput['blockType']; sectionName: string; targetWords: number }>> = {
|
||||
tom: [
|
||||
{ blockId: 'tom-intro', blockType: 'introduction', sectionName: 'Einleitung TOM', targetWords: 120 },
|
||||
{ blockId: 'tom-transition', blockType: 'transition', sectionName: 'Ueberleitung Massnahmen', targetWords: 40 },
|
||||
{ blockId: 'tom-conclusion', blockType: 'conclusion', sectionName: 'Fazit TOM', targetWords: 80 },
|
||||
],
|
||||
dsfa: [
|
||||
{ blockId: 'dsfa-intro', blockType: 'introduction', sectionName: 'Einleitung DSFA', targetWords: 150 },
|
||||
{ blockId: 'dsfa-transition', blockType: 'transition', sectionName: 'Ueberleitung Risikobewertung', targetWords: 40 },
|
||||
{ blockId: 'dsfa-appreciation', blockType: 'appreciation', sectionName: 'Wuerdigung bestehender Massnahmen', targetWords: 60 },
|
||||
{ blockId: 'dsfa-conclusion', blockType: 'conclusion', sectionName: 'Fazit DSFA', targetWords: 100 },
|
||||
],
|
||||
vvt: [
|
||||
{ blockId: 'vvt-intro', blockType: 'introduction', sectionName: 'Einleitung VVT', targetWords: 120 },
|
||||
{ blockId: 'vvt-conclusion', blockType: 'conclusion', sectionName: 'Fazit VVT', targetWords: 80 },
|
||||
],
|
||||
dsi: [
|
||||
{ blockId: 'dsi-intro', blockType: 'introduction', sectionName: 'Einleitung Datenschutzerklaerung', targetWords: 130 },
|
||||
{ blockId: 'dsi-conclusion', blockType: 'conclusion', sectionName: 'Fazit Datenschutzerklaerung', targetWords: 80 },
|
||||
],
|
||||
lf: [
|
||||
{ blockId: 'lf-intro', blockType: 'introduction', sectionName: 'Einleitung Loeschfristen', targetWords: 100 },
|
||||
{ blockId: 'lf-conclusion', blockType: 'conclusion', sectionName: 'Fazit Loeschfristen', targetWords: 60 },
|
||||
],
|
||||
}
|
||||
|
||||
function buildV2SystemPrompt(
|
||||
sanitizedFactsString: string,
|
||||
narrativeTagsString: string,
|
||||
terminologyString: string,
|
||||
styleString: string,
|
||||
disallowedString: string,
|
||||
companyName: string,
|
||||
blockId: string,
|
||||
blockType: string,
|
||||
sectionName: string,
|
||||
documentType: string,
|
||||
targetWords: number
|
||||
): string {
|
||||
return `Du bist ein Compliance-Dokumenten-Redakteur.
|
||||
Du schreibst einzelne Textabschnitte fuer offizielle Compliance-Dokumente.
|
||||
|
||||
KUNDENPROFIL (ERLAUBTE FAKTEN — nur diese darfst du verwenden):
|
||||
${sanitizedFactsString}
|
||||
|
||||
BEWERTUNGSERGEBNIS (sprachliche Tags — verwende nur diese Begriffe):
|
||||
${narrativeTagsString}
|
||||
|
||||
TERMINOLOGIE (verwende ausschliesslich diese Fachbegriffe):
|
||||
${terminologyString}
|
||||
|
||||
STIL:
|
||||
${styleString}
|
||||
|
||||
VERBOTENE INHALTE:
|
||||
${disallowedString}
|
||||
- Keine konkreten Prozentwerte, Scores oder Zahlen
|
||||
- Keine Compliance-Level-Bezeichnungen (L1, L2, L3, L4)
|
||||
- Keine direkte Ansprache ("Sie", "Ihr")
|
||||
- Kein Denglisch, keine Marketing-Sprache, keine Superlative
|
||||
|
||||
STRIKTE REGELN:
|
||||
1. Verwende den Firmennamen "${companyName}" — nie "Ihr Unternehmen"
|
||||
2. Schreibe in der dritten Person ("Die ${companyName}...")
|
||||
3. Beziehe dich auf die Branche und organisatorische Merkmale
|
||||
4. Verwende NUR Fakten aus dem Kundenprofil oben
|
||||
5. Verwende NUR die sprachlichen Tags aus dem Bewertungsergebnis
|
||||
6. Erfinde KEINE zusaetzlichen Fakten oder Bewertungen
|
||||
7. Halte dich an die Terminologie-Vorgaben
|
||||
8. Dein Text wird ZWISCHEN feste Datentabellen eingefuegt
|
||||
|
||||
OUTPUT-FORMAT: Antworte ausschliesslich als JSON:
|
||||
{
|
||||
"blockId": "${blockId}",
|
||||
"blockType": "${blockType}",
|
||||
"language": "de",
|
||||
"text": "...",
|
||||
"assertions": {
|
||||
"companyNameUsed": true/false,
|
||||
"industryReferenced": true/false,
|
||||
"structureReferenced": true/false,
|
||||
"itLandscapeReferenced": true/false,
|
||||
"narrativeTagsUsed": ["riskSummary", ...]
|
||||
},
|
||||
"forbiddenContentDetected": []
|
||||
}
|
||||
|
||||
DOKUMENTENTYP: ${documentType}
|
||||
SEKTION: ${sectionName}
|
||||
BLOCK-TYP: ${blockType}
|
||||
ZIEL-LAENGE: ${targetWords} Woerter`
|
||||
}
|
||||
|
||||
function buildBlockSpecificPrompt(blockType: string, sectionName: string, documentType: string): string {
|
||||
switch (blockType) {
|
||||
case 'introduction':
|
||||
return `Schreibe eine Einleitung fuer das Dokument "${documentType}" (Sektion: ${sectionName}).
|
||||
Erklaere, warum dieses Dokument fuer das Unternehmen erstellt wurde.
|
||||
Gehe auf die spezifische Situation des Unternehmens ein.
|
||||
Erwaehne die Branche, die Organisationsform und die IT-Strategie.`
|
||||
case 'transition':
|
||||
return `Schreibe eine kurze Ueberleitung zur naechsten Sektion "${sectionName}".
|
||||
Verknuepfe den vorherigen Abschnitt logisch mit dem folgenden.`
|
||||
case 'conclusion':
|
||||
return `Schreibe einen abschliessenden Absatz fuer die Sektion "${sectionName}".
|
||||
Fasse die wesentlichen Punkte zusammen und verweise auf die fortlaufende Pflege.`
|
||||
case 'appreciation':
|
||||
return `Schreibe einen wertschaetzenden Satz ueber die bestehenden Massnahmen.
|
||||
Verwende dabei die sprachlichen Tags aus dem Bewertungsergebnis.
|
||||
Keine neuen Fakten erfinden — nur das Profil wuerdigen.`
|
||||
default:
|
||||
return `Schreibe einen Textabschnitt fuer "${sectionName}".`
|
||||
}
|
||||
}
|
||||
|
||||
async function callOllama(systemPrompt: string, userPrompt: string): Promise<string> {
|
||||
const response = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: LLM_MODEL,
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
stream: false,
|
||||
options: { temperature: 0.15, num_predict: 4096 },
|
||||
format: 'json',
|
||||
}),
|
||||
signal: AbortSignal.timeout(120000),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama error: ${response.status}`)
|
||||
}
|
||||
|
||||
const result = await response.json()
|
||||
return result.message?.content || ''
|
||||
}
|
||||
|
||||
async function handleV2Draft(body: Record<string, unknown>): Promise<NextResponse> {
|
||||
const { documentType, draftContext, instructions } = body as {
|
||||
documentType: ScopeDocumentType
|
||||
draftContext: DraftContext
|
||||
instructions?: string
|
||||
}
|
||||
|
||||
// Step 1: Constraint Check (Hard Gate)
|
||||
const constraintCheck = constraintEnforcer.checkFromContext(documentType, draftContext)
|
||||
if (!constraintCheck.allowed) {
|
||||
return NextResponse.json({
|
||||
draft: null,
|
||||
constraintCheck,
|
||||
tokensUsed: 0,
|
||||
error: 'Constraint-Verletzung: ' + constraintCheck.violations.join('; '),
|
||||
}, { status: 403 })
|
||||
}
|
||||
|
||||
// Step 2: Derive Narrative Tags (deterministisch)
|
||||
const scores = extractScoresFromDraftContext(draftContext)
|
||||
const narrativeTags: NarrativeTags = deriveNarrativeTags(scores)
|
||||
|
||||
// Step 3: Build Allowed Facts
|
||||
const allowedFacts = buildAllowedFactsFromDraftContext(draftContext, narrativeTags)
|
||||
|
||||
// Step 4: PII Sanitization
|
||||
let sanitizationResult
|
||||
try {
|
||||
sanitizationResult = sanitizeAllowedFacts(allowedFacts)
|
||||
} catch (error) {
|
||||
if (error instanceof SanitizationError) {
|
||||
return NextResponse.json({
|
||||
error: `Sanitization Hard Abort: ${error.message} (Feld: ${error.field})`,
|
||||
draft: null,
|
||||
constraintCheck,
|
||||
tokensUsed: 0,
|
||||
}, { status: 422 })
|
||||
}
|
||||
throw error
|
||||
}
|
||||
|
||||
const sanitizedFacts = sanitizationResult.facts
|
||||
|
||||
// Verify no remaining PII
|
||||
const piiWarnings = validateNoRemainingPII(sanitizedFacts)
|
||||
if (piiWarnings.length > 0) {
|
||||
console.warn('PII-Warnungen nach Sanitization:', piiWarnings)
|
||||
}
|
||||
|
||||
// Step 5: Build prompt components
|
||||
const factsString = allowedFactsToPromptString(sanitizedFacts)
|
||||
const tagsString = narrativeTagsToPromptString(narrativeTags)
|
||||
const termsString = terminologyToPromptString()
|
||||
const styleString = styleContractToPromptString()
|
||||
const disallowedString = disallowedTopicsToPromptString()
|
||||
|
||||
// Compute prompt hash for audit
|
||||
const promptHash = computeChecksumSync({ factsString, tagsString, termsString, styleString, disallowedString })
|
||||
|
||||
// Step 6: Generate Prose Blocks (with cache + repair loop)
|
||||
const proseBlocks = DOCUMENT_PROSE_BLOCKS[documentType] || DOCUMENT_PROSE_BLOCKS.tom
|
||||
const generatedBlocks: ProseBlockOutput[] = []
|
||||
const repairAudits: RepairAudit[] = []
|
||||
let totalTokens = 0
|
||||
|
||||
for (const blockDef of proseBlocks) {
|
||||
// Check cache
|
||||
const cacheParams: CacheKeyParams = {
|
||||
allowedFacts: sanitizedFacts,
|
||||
templateVersion: TEMPLATE_VERSION,
|
||||
terminologyVersion: TERMINOLOGY_VERSION,
|
||||
narrativeTags,
|
||||
promptHash,
|
||||
blockType: blockDef.blockType,
|
||||
sectionName: blockDef.sectionName,
|
||||
}
|
||||
|
||||
const cached = proseCache.getSync(cacheParams)
|
||||
if (cached) {
|
||||
generatedBlocks.push(cached)
|
||||
repairAudits.push({
|
||||
repairAttempts: 0,
|
||||
validatorFailures: [],
|
||||
repairSuccessful: true,
|
||||
fallbackUsed: false,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Build prompts
|
||||
const systemPrompt = buildV2SystemPrompt(
|
||||
factsString, tagsString, termsString, styleString, disallowedString,
|
||||
sanitizedFacts.companyName,
|
||||
blockDef.blockId, blockDef.blockType, blockDef.sectionName,
|
||||
documentType, blockDef.targetWords
|
||||
)
|
||||
const userPrompt = buildBlockSpecificPrompt(
|
||||
blockDef.blockType, blockDef.sectionName, documentType
|
||||
) + (instructions ? `\n\nZusaetzliche Anweisungen: ${instructions}` : '')
|
||||
|
||||
// Call LLM + Repair Loop
|
||||
try {
|
||||
const rawOutput = await callOllama(systemPrompt, userPrompt)
|
||||
totalTokens += rawOutput.length / 4 // Rough token estimate
|
||||
|
||||
const { block, audit } = await executeRepairLoop(
|
||||
rawOutput,
|
||||
sanitizedFacts,
|
||||
narrativeTags,
|
||||
blockDef.blockId,
|
||||
blockDef.blockType,
|
||||
async (repairPrompt) => callOllama(systemPrompt, repairPrompt),
|
||||
documentType
|
||||
)
|
||||
|
||||
generatedBlocks.push(block)
|
||||
repairAudits.push(audit)
|
||||
|
||||
// Cache successful blocks (not fallbacks)
|
||||
if (!audit.fallbackUsed) {
|
||||
proseCache.setSync(cacheParams, block)
|
||||
}
|
||||
} catch (error) {
|
||||
// LLM unreachable → Fallback
|
||||
const { buildFallbackBlock } = await import('@/lib/sdk/drafting-engine/prose-validator')
|
||||
generatedBlocks.push(
|
||||
buildFallbackBlock(blockDef.blockId, blockDef.blockType, sanitizedFacts, documentType)
|
||||
)
|
||||
repairAudits.push({
|
||||
repairAttempts: 0,
|
||||
validatorFailures: [[(error as Error).message]],
|
||||
repairSuccessful: false,
|
||||
fallbackUsed: true,
|
||||
fallbackReason: `LLM-Fehler: ${(error as Error).message}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Step 7: Build v1-compatible draft sections from prose blocks + original prompt
|
||||
const draftPrompt = buildPromptForDocumentType(documentType, draftContext, instructions)
|
||||
|
||||
// Also generate data sections via legacy pipeline
|
||||
let dataSections: DraftSection[] = []
|
||||
try {
|
||||
const dataResponse = await callOllama(V1_SYSTEM_PROMPT, draftPrompt)
|
||||
const parsed = JSON.parse(dataResponse)
|
||||
dataSections = (parsed.sections || []).map((s: Record<string, unknown>, i: number) => ({
|
||||
id: String(s.id || `section-${i}`),
|
||||
title: String(s.title || ''),
|
||||
content: String(s.content || ''),
|
||||
schemaField: s.schemaField ? String(s.schemaField) : undefined,
|
||||
}))
|
||||
totalTokens += dataResponse.length / 4
|
||||
} catch {
|
||||
dataSections = []
|
||||
}
|
||||
|
||||
// Merge: Prose intro → Data sections → Prose transitions/conclusion
|
||||
const introBlock = generatedBlocks.find(b => b.blockType === 'introduction')
|
||||
const transitionBlocks = generatedBlocks.filter(b => b.blockType === 'transition')
|
||||
const appreciationBlocks = generatedBlocks.filter(b => b.blockType === 'appreciation')
|
||||
const conclusionBlock = generatedBlocks.find(b => b.blockType === 'conclusion')
|
||||
|
||||
const mergedSections: DraftSection[] = []
|
||||
|
||||
if (introBlock) {
|
||||
mergedSections.push({
|
||||
id: introBlock.blockId,
|
||||
title: 'Einleitung',
|
||||
content: introBlock.text,
|
||||
})
|
||||
}
|
||||
|
||||
for (let i = 0; i < dataSections.length; i++) {
|
||||
// Insert transition before data section (if available)
|
||||
if (i > 0 && transitionBlocks[i - 1]) {
|
||||
mergedSections.push({
|
||||
id: transitionBlocks[i - 1].blockId,
|
||||
title: '',
|
||||
content: transitionBlocks[i - 1].text,
|
||||
})
|
||||
}
|
||||
mergedSections.push(dataSections[i])
|
||||
}
|
||||
|
||||
for (const block of appreciationBlocks) {
|
||||
mergedSections.push({
|
||||
id: block.blockId,
|
||||
title: 'Wuerdigung',
|
||||
content: block.text,
|
||||
})
|
||||
}
|
||||
|
||||
if (conclusionBlock) {
|
||||
mergedSections.push({
|
||||
id: conclusionBlock.blockId,
|
||||
title: 'Fazit',
|
||||
content: conclusionBlock.text,
|
||||
})
|
||||
}
|
||||
|
||||
// If no data sections generated, use prose blocks as sections
|
||||
const finalSections = mergedSections.length > 0 ? mergedSections : generatedBlocks.map(b => ({
|
||||
id: b.blockId,
|
||||
title: b.blockType === 'introduction' ? 'Einleitung' :
|
||||
b.blockType === 'conclusion' ? 'Fazit' :
|
||||
b.blockType === 'appreciation' ? 'Wuerdigung' : 'Ueberleitung',
|
||||
content: b.text,
|
||||
}))
|
||||
|
||||
const draft: DraftRevision = {
|
||||
id: `draft-v2-${Date.now()}`,
|
||||
content: finalSections.map(s => s.title ? `## ${s.title}\n\n${s.content}` : s.content).join('\n\n'),
|
||||
sections: finalSections,
|
||||
createdAt: new Date().toISOString(),
|
||||
instruction: instructions,
|
||||
}
|
||||
|
||||
// Step 8: Build Audit Trail
|
||||
const auditTrail = {
|
||||
documentType,
|
||||
templateVersion: TEMPLATE_VERSION,
|
||||
terminologyVersion: TERMINOLOGY_VERSION,
|
||||
validatorVersion: VALIDATOR_VERSION,
|
||||
promptHash,
|
||||
llmModel: LLM_MODEL,
|
||||
llmTemperature: 0.15,
|
||||
llmProvider: 'ollama',
|
||||
narrativeTags,
|
||||
sanitization: sanitizationResult.audit,
|
||||
repairAudits,
|
||||
proseBlocks: generatedBlocks.map((b, i) => ({
|
||||
blockId: b.blockId,
|
||||
blockType: b.blockType,
|
||||
wordCount: b.text.split(/\s+/).filter(Boolean).length,
|
||||
fallbackUsed: repairAudits[i]?.fallbackUsed ?? false,
|
||||
repairAttempts: repairAudits[i]?.repairAttempts ?? 0,
|
||||
})),
|
||||
cacheStats: proseCache.getStats(),
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
draft,
|
||||
constraintCheck,
|
||||
tokensUsed: Math.round(totalTokens),
|
||||
pipelineVersion: 'v2',
|
||||
auditTrail,
|
||||
})
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Route Handler
|
||||
// ============================================================================
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body = await request.json()
|
||||
const { documentType, draftContext, instructions, existingDraft } = body
|
||||
const { documentType, draftContext } = body
|
||||
|
||||
if (!documentType || !draftContext) {
|
||||
return NextResponse.json(
|
||||
@@ -72,92 +575,14 @@ export async function POST(request: NextRequest) {
|
||||
)
|
||||
}
|
||||
|
||||
// 1. Constraint Check (Hard Gate)
|
||||
const constraintCheck = constraintEnforcer.checkFromContext(documentType, draftContext)
|
||||
|
||||
if (!constraintCheck.allowed) {
|
||||
return NextResponse.json({
|
||||
draft: null,
|
||||
constraintCheck,
|
||||
tokensUsed: 0,
|
||||
error: 'Constraint-Verletzung: ' + constraintCheck.violations.join('; '),
|
||||
}, { status: 403 })
|
||||
// v1 fallback: explicit ?v=1 parameter
|
||||
const version = request.nextUrl.searchParams.get('v')
|
||||
if (version === '1') {
|
||||
return handleV1Draft(body)
|
||||
}
|
||||
|
||||
// 2. Build document-specific prompt
|
||||
const draftPrompt = buildPromptForDocumentType(documentType, draftContext, instructions)
|
||||
|
||||
// 3. Build messages
|
||||
const messages = [
|
||||
{ role: 'system', content: DRAFTING_SYSTEM_PROMPT },
|
||||
...(existingDraft ? [{
|
||||
role: 'assistant',
|
||||
content: `Bisheriger Entwurf:\n${JSON.stringify(existingDraft.sections, null, 2)}`,
|
||||
}] : []),
|
||||
{ role: 'user', content: draftPrompt },
|
||||
]
|
||||
|
||||
// 4. Call LLM (non-streaming for structured output)
|
||||
const ollamaResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: LLM_MODEL,
|
||||
messages,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.15,
|
||||
num_predict: 16384,
|
||||
},
|
||||
format: 'json',
|
||||
}),
|
||||
signal: AbortSignal.timeout(180000),
|
||||
})
|
||||
|
||||
if (!ollamaResponse.ok) {
|
||||
return NextResponse.json(
|
||||
{ error: `LLM nicht erreichbar (Status ${ollamaResponse.status})` },
|
||||
{ status: 502 }
|
||||
)
|
||||
}
|
||||
|
||||
const result = await ollamaResponse.json()
|
||||
const content = result.message?.content || ''
|
||||
|
||||
// 5. Parse JSON response
|
||||
let sections: DraftSection[] = []
|
||||
try {
|
||||
const parsed = JSON.parse(content)
|
||||
sections = (parsed.sections || []).map((s: Record<string, unknown>, i: number) => ({
|
||||
id: String(s.id || `section-${i}`),
|
||||
title: String(s.title || ''),
|
||||
content: String(s.content || ''),
|
||||
schemaField: s.schemaField ? String(s.schemaField) : undefined,
|
||||
}))
|
||||
} catch {
|
||||
// If not JSON, wrap raw content as single section
|
||||
sections = [{
|
||||
id: 'raw',
|
||||
title: 'Entwurf',
|
||||
content: content,
|
||||
}]
|
||||
}
|
||||
|
||||
const draft: DraftRevision = {
|
||||
id: `draft-${Date.now()}`,
|
||||
content: sections.map(s => `## ${s.title}\n\n${s.content}`).join('\n\n'),
|
||||
sections,
|
||||
createdAt: new Date().toISOString(),
|
||||
instruction: instructions,
|
||||
}
|
||||
|
||||
const response: DraftResponse = {
|
||||
draft,
|
||||
constraintCheck,
|
||||
tokensUsed: result.eval_count || 0,
|
||||
}
|
||||
|
||||
return NextResponse.json(response)
|
||||
// Default: v2 pipeline
|
||||
return handleV2Draft(body)
|
||||
} catch (error) {
|
||||
console.error('Draft generation error:', error)
|
||||
return NextResponse.json(
|
||||
|
||||
85
admin-compliance/lib/sdk/drafting-engine/allowed-facts-v2.ts
Normal file
85
admin-compliance/lib/sdk/drafting-engine/allowed-facts-v2.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* Allowed Facts v2 Adapter — Baut AllowedFacts aus DraftContext
|
||||
*
|
||||
* Die Haupt-AllowedFacts Datei (allowed-facts.ts) erwartet SDKState,
|
||||
* aber in der Draft API Route haben wir nur DraftContext.
|
||||
* Dieser Adapter ueberbrueckt die Luecke.
|
||||
*
|
||||
* Re-exportiert auch die Serialisierungs-/Validierungsfunktionen.
|
||||
*/
|
||||
|
||||
import type { AllowedFacts, FactPolicy } from './allowed-facts'
|
||||
import {
|
||||
DEFAULT_FACT_POLICY,
|
||||
allowedFactsToPromptString,
|
||||
disallowedTopicsToPromptString,
|
||||
checkForDisallowedContent,
|
||||
} from './allowed-facts'
|
||||
import type { NarrativeTags } from './narrative-tags'
|
||||
import type { DraftContext } from './types'
|
||||
|
||||
// Re-exports
|
||||
export { allowedFactsToPromptString, disallowedTopicsToPromptString, checkForDisallowedContent }
|
||||
|
||||
/**
|
||||
* Baut AllowedFacts aus einem DraftContext (API Route Kontext).
|
||||
* Der DraftContext hat bereits projizierte Firmendaten.
|
||||
*/
|
||||
export function buildAllowedFactsFromDraftContext(
|
||||
context: DraftContext,
|
||||
narrativeTags: NarrativeTags
|
||||
): AllowedFacts {
|
||||
const profile = context.companyProfile
|
||||
|
||||
return {
|
||||
companyName: profile.name || 'Unbekannt',
|
||||
legalForm: '', // Nicht im DraftContext enthalten
|
||||
industry: profile.industry || '',
|
||||
location: '', // Nicht im DraftContext enthalten
|
||||
employeeCount: profile.employeeCount || 0,
|
||||
|
||||
teamStructure: deriveTeamStructure(profile.employeeCount),
|
||||
itLandscape: deriveItLandscape(profile.businessModel, profile.isPublicSector),
|
||||
specialFeatures: deriveSpecialFeatures(profile),
|
||||
|
||||
triggeredRegulations: deriveRegulations(context),
|
||||
primaryUseCases: [], // Nicht im DraftContext enthalten
|
||||
|
||||
narrativeTags,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Private Helpers
|
||||
// ============================================================================
|
||||
|
||||
function deriveTeamStructure(employeeCount: number): string {
|
||||
if (employeeCount > 500) return 'Konzernstruktur'
|
||||
if (employeeCount > 50) return 'mittelstaendisch'
|
||||
return 'Kleinunternehmen'
|
||||
}
|
||||
|
||||
function deriveItLandscape(businessModel: string, isPublicSector: boolean): string {
|
||||
if (businessModel?.includes('SaaS') || businessModel?.includes('Cloud')) return 'Cloud-First'
|
||||
if (isPublicSector) return 'On-Premise'
|
||||
return 'Hybrid'
|
||||
}
|
||||
|
||||
function deriveSpecialFeatures(profile: DraftContext['companyProfile']): string[] {
|
||||
const features: string[] = []
|
||||
if (profile.isPublicSector) features.push('Oeffentlicher Sektor')
|
||||
if (profile.employeeCount > 250) features.push('Grossunternehmen')
|
||||
if (profile.dataProtectionOfficer) features.push('Interner DSB benannt')
|
||||
return features
|
||||
}
|
||||
|
||||
function deriveRegulations(context: DraftContext): string[] {
|
||||
const regs = new Set<string>(['DSGVO'])
|
||||
const triggers = context.decisions.hardTriggers || []
|
||||
for (const t of triggers) {
|
||||
if (t.id.includes('ai_act') || t.id.includes('ai-act')) regs.add('AI Act')
|
||||
if (t.id.includes('nis2') || t.id.includes('NIS2')) regs.add('NIS2')
|
||||
if (t.id.includes('ttdsg') || t.id.includes('TTDSG')) regs.add('TTDSG')
|
||||
}
|
||||
return Array.from(regs)
|
||||
}
|
||||
257
admin-compliance/lib/sdk/drafting-engine/allowed-facts.ts
Normal file
257
admin-compliance/lib/sdk/drafting-engine/allowed-facts.ts
Normal file
@@ -0,0 +1,257 @@
|
||||
/**
|
||||
* Allowed Facts Governance — Kontrolliertes Faktenbudget fuer LLM
|
||||
*
|
||||
* Definiert welche Fakten das LLM in Prosa-Bloecken verwenden darf
|
||||
* und welche Themen explizit verboten sind.
|
||||
*
|
||||
* Verhindert Halluzinationen durch explizite Whitelisting.
|
||||
*/
|
||||
|
||||
import type { SDKState, CompanyProfile } from '../types'
|
||||
import type { NarrativeTags } from './narrative-tags'
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
/** Explizites Faktenbudget fuer das LLM */
|
||||
export interface AllowedFacts {
|
||||
// Firmenprofil
|
||||
companyName: string
|
||||
legalForm: string
|
||||
industry: string
|
||||
location: string
|
||||
employeeCount: number
|
||||
|
||||
// Organisation
|
||||
teamStructure: string
|
||||
itLandscape: string
|
||||
specialFeatures: string[]
|
||||
|
||||
// Compliance-Kontext
|
||||
triggeredRegulations: string[]
|
||||
primaryUseCases: string[]
|
||||
|
||||
// Narrative Tags (deterministisch)
|
||||
narrativeTags: NarrativeTags
|
||||
}
|
||||
|
||||
/** Regeln welche Themen erlaubt/verboten sind */
|
||||
export interface FactPolicy {
|
||||
allowedTopics: string[]
|
||||
disallowedTopics: string[]
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Default Policy
|
||||
// ============================================================================
|
||||
|
||||
export const DEFAULT_FACT_POLICY: FactPolicy = {
|
||||
allowedTopics: [
|
||||
'Branche',
|
||||
'Unternehmensgroesse',
|
||||
'Teamstruktur',
|
||||
'IT-Strategie',
|
||||
'Regulatorischer Kontext',
|
||||
'Anwendungsfaelle',
|
||||
'Organisationsform',
|
||||
'Standort',
|
||||
'Rechtsform',
|
||||
],
|
||||
disallowedTopics: [
|
||||
'Umsatz',
|
||||
'Gewinn',
|
||||
'Kundenzahlen',
|
||||
'konkrete Zertifizierungen',
|
||||
'interne Tool-Namen',
|
||||
'Personennamen',
|
||||
'E-Mail-Adressen',
|
||||
'Telefonnummern',
|
||||
'IP-Adressen',
|
||||
'konkrete Prozentwerte',
|
||||
'konkrete Scores',
|
||||
'Compliance-Level-Bezeichnungen',
|
||||
'interne Projektnamen',
|
||||
'Passwoerter',
|
||||
'API-Keys',
|
||||
'Vertragsinhalte',
|
||||
'Gehaltsinformationen',
|
||||
],
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Builder
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Extrahiert AllowedFacts aus dem SDKState.
|
||||
* Nur explizit freigegebene Felder werden uebernommen.
|
||||
*/
|
||||
export function buildAllowedFacts(
|
||||
state: SDKState,
|
||||
narrativeTags: NarrativeTags
|
||||
): AllowedFacts {
|
||||
const profile = state.companyProfile
|
||||
const scope = state.complianceScope
|
||||
|
||||
return {
|
||||
companyName: profile?.name ?? 'Unbekannt',
|
||||
legalForm: profile?.legalForm ?? '',
|
||||
industry: profile?.industry ?? '',
|
||||
location: profile?.location ?? '',
|
||||
employeeCount: profile?.employeeCount ?? 0,
|
||||
|
||||
teamStructure: deriveTeamStructure(profile),
|
||||
itLandscape: deriveItLandscape(profile),
|
||||
specialFeatures: deriveSpecialFeatures(profile),
|
||||
|
||||
triggeredRegulations: deriveTriggeredRegulations(scope),
|
||||
primaryUseCases: derivePrimaryUseCases(state),
|
||||
|
||||
narrativeTags,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Serialization
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Serialisiert AllowedFacts fuer den LLM-Prompt.
|
||||
*/
|
||||
export function allowedFactsToPromptString(facts: AllowedFacts): string {
|
||||
const lines = [
|
||||
`- Firma: ${facts.companyName}${facts.legalForm ? ` (${facts.legalForm})` : ''}`,
|
||||
`- Branche: ${facts.industry || 'nicht angegeben'}`,
|
||||
`- Standort: ${facts.location || 'nicht angegeben'}`,
|
||||
`- Mitarbeiter: ${facts.employeeCount || 'nicht angegeben'}`,
|
||||
`- Teamstruktur: ${facts.teamStructure || 'nicht angegeben'}`,
|
||||
`- IT-Umgebung: ${facts.itLandscape || 'nicht angegeben'}`,
|
||||
]
|
||||
|
||||
if (facts.triggeredRegulations.length > 0) {
|
||||
lines.push(`- Relevante Regulierungen: ${facts.triggeredRegulations.join(', ')}`)
|
||||
}
|
||||
if (facts.primaryUseCases.length > 0) {
|
||||
lines.push(`- Anwendungsfaelle: ${facts.primaryUseCases.join(', ')}`)
|
||||
}
|
||||
if (facts.specialFeatures.length > 0) {
|
||||
lines.push(`- Besonderheiten: ${facts.specialFeatures.join(', ')}`)
|
||||
}
|
||||
|
||||
return lines.join('\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialisiert die Disallowed Topics fuer den LLM-Prompt.
|
||||
*/
|
||||
export function disallowedTopicsToPromptString(policy: FactPolicy = DEFAULT_FACT_POLICY): string {
|
||||
return policy.disallowedTopics.map(t => `- ${t}`).join('\n')
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Validation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Prueft ob ein Text potentiell verbotene Themen enthaelt.
|
||||
* Gibt eine Liste der erkannten Verstoesse zurueck.
|
||||
*/
|
||||
export function checkForDisallowedContent(
|
||||
text: string,
|
||||
policy: FactPolicy = DEFAULT_FACT_POLICY
|
||||
): string[] {
|
||||
const violations: string[] = []
|
||||
const lower = text.toLowerCase()
|
||||
|
||||
// Prozentwerte
|
||||
if (/\d+\s*%/.test(text)) {
|
||||
violations.push('Konkrete Prozentwerte gefunden')
|
||||
}
|
||||
|
||||
// Score-Muster
|
||||
if (/score[:\s]*\d+/i.test(text)) {
|
||||
violations.push('Konkrete Scores gefunden')
|
||||
}
|
||||
|
||||
// Compliance-Level Bezeichnungen
|
||||
if (/\b(L1|L2|L3|L4)\b/.test(text)) {
|
||||
violations.push('Compliance-Level-Bezeichnungen (L1-L4) gefunden')
|
||||
}
|
||||
|
||||
// E-Mail-Adressen
|
||||
if (/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/.test(text)) {
|
||||
violations.push('E-Mail-Adresse gefunden')
|
||||
}
|
||||
|
||||
// Telefonnummern
|
||||
if (/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/.test(text)) {
|
||||
// Nur wenn es nicht die Mitarbeiterzahl ist (einstellig/zweistellig)
|
||||
const matches = text.match(/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g) || []
|
||||
for (const m of matches) {
|
||||
if (m.replace(/\D/g, '').length >= 6) {
|
||||
violations.push('Telefonnummer gefunden')
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IP-Adressen
|
||||
if (/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/.test(text)) {
|
||||
violations.push('IP-Adresse gefunden')
|
||||
}
|
||||
|
||||
// Direkte Ansprache
|
||||
if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(text)) {
|
||||
violations.push('Direkte Ansprache (Sie/Ihr) gefunden')
|
||||
}
|
||||
|
||||
return violations
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Private Helpers
|
||||
// ============================================================================
|
||||
|
||||
function deriveTeamStructure(profile: CompanyProfile | null): string {
|
||||
if (!profile) return ''
|
||||
// Ableitung aus verfuegbaren Profildaten
|
||||
if (profile.employeeCount > 500) return 'Konzernstruktur'
|
||||
if (profile.employeeCount > 50) return 'mittelstaendisch'
|
||||
return 'Kleinunternehmen'
|
||||
}
|
||||
|
||||
function deriveItLandscape(profile: CompanyProfile | null): string {
|
||||
if (!profile) return ''
|
||||
return profile.businessModel?.includes('SaaS') ? 'Cloud-First' :
|
||||
profile.businessModel?.includes('Cloud') ? 'Cloud-First' :
|
||||
profile.isPublicSector ? 'On-Premise' : 'Hybrid'
|
||||
}
|
||||
|
||||
function deriveSpecialFeatures(profile: CompanyProfile | null): string[] {
|
||||
if (!profile) return []
|
||||
const features: string[] = []
|
||||
if (profile.isPublicSector) features.push('Oeffentlicher Sektor')
|
||||
if (profile.employeeCount > 250) features.push('Grossunternehmen')
|
||||
if (profile.dataProtectionOfficer) features.push('Interner DSB benannt')
|
||||
return features
|
||||
}
|
||||
|
||||
function deriveTriggeredRegulations(
|
||||
scope: import('../compliance-scope-types').ComplianceScopeState | null
|
||||
): string[] {
|
||||
if (!scope?.decision) return ['DSGVO']
|
||||
const regs = new Set<string>(['DSGVO'])
|
||||
const triggers = scope.decision.triggeredHardTriggers || []
|
||||
for (const t of triggers) {
|
||||
if (t.rule.id.includes('ai_act') || t.rule.id.includes('ai-act')) regs.add('AI Act')
|
||||
if (t.rule.id.includes('nis2') || t.rule.id.includes('NIS2')) regs.add('NIS2')
|
||||
if (t.rule.id.includes('ttdsg') || t.rule.id.includes('TTDSG')) regs.add('TTDSG')
|
||||
}
|
||||
return Array.from(regs)
|
||||
}
|
||||
|
||||
function derivePrimaryUseCases(state: SDKState): string[] {
|
||||
if (!state.useCases || state.useCases.length === 0) return []
|
||||
return state.useCases.slice(0, 3).map(uc => uc.name || uc.title || 'Unbenannt')
|
||||
}
|
||||
303
admin-compliance/lib/sdk/drafting-engine/cache.ts
Normal file
303
admin-compliance/lib/sdk/drafting-engine/cache.ts
Normal file
@@ -0,0 +1,303 @@
|
||||
/**
|
||||
* Cache Manager — Hash-basierte Prose-Block-Cache
|
||||
*
|
||||
* Deterministischer Cache fuer LLM-generierte Prosa-Bloecke.
|
||||
* Kein TTL-basiertes Raten — stattdessen Hash-basierte Invalidierung.
|
||||
*
|
||||
* Cache-Key = SHA-256 ueber alle Eingabeparameter.
|
||||
* Aendert sich ein Eingabewert → neuer Hash → Cache-Miss → Neu-Generierung.
|
||||
*/
|
||||
|
||||
import type { AllowedFacts } from './allowed-facts'
|
||||
import type { NarrativeTags } from './narrative-tags'
|
||||
import type { ProseBlockOutput } from './prose-validator'
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface CacheEntry {
|
||||
block: ProseBlockOutput
|
||||
createdAt: string
|
||||
hitCount: number
|
||||
cacheKey: string
|
||||
}
|
||||
|
||||
export interface CacheKeyParams {
|
||||
allowedFacts: AllowedFacts
|
||||
templateVersion: string
|
||||
terminologyVersion: string
|
||||
narrativeTags: NarrativeTags
|
||||
promptHash: string
|
||||
blockType: string
|
||||
sectionName: string
|
||||
}
|
||||
|
||||
export interface CacheStats {
|
||||
totalEntries: number
|
||||
totalHits: number
|
||||
totalMisses: number
|
||||
hitRate: number
|
||||
oldestEntry: string | null
|
||||
newestEntry: string | null
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SHA-256 (Browser-kompatibel via SubtleCrypto)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Berechnet SHA-256 Hash eines Strings.
|
||||
* Nutzt SubtleCrypto (verfuegbar in Node.js 15+ und allen modernen Browsern).
|
||||
*/
|
||||
async function sha256(input: string): Promise<string> {
|
||||
// In Next.js API Routes laeuft Node.js — nutze crypto
|
||||
if (typeof globalThis.crypto?.subtle !== 'undefined') {
|
||||
const encoder = new TextEncoder()
|
||||
const data = encoder.encode(input)
|
||||
const hashBuffer = await globalThis.crypto.subtle.digest('SHA-256', data)
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer))
|
||||
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('')
|
||||
}
|
||||
|
||||
// Fallback: Node.js crypto
|
||||
try {
|
||||
const { createHash } = await import('crypto')
|
||||
return createHash('sha256').update(input).digest('hex')
|
||||
} catch {
|
||||
// Letzer Fallback: Einfacher Hash (nicht kryptographisch)
|
||||
return simpleHash(input)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchrone SHA-256 Berechnung (Node.js only).
|
||||
*/
|
||||
function sha256Sync(input: string): string {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const crypto = require('crypto')
|
||||
return crypto.createHash('sha256').update(input).digest('hex')
|
||||
} catch {
|
||||
return simpleHash(input)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Einfacher nicht-kryptographischer Hash als Fallback.
|
||||
*/
|
||||
function simpleHash(input: string): string {
|
||||
let hash = 0
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
const char = input.charCodeAt(i)
|
||||
hash = ((hash << 5) - hash) + char
|
||||
hash = hash & hash
|
||||
}
|
||||
return Math.abs(hash).toString(16).padStart(16, '0')
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cache Key Computation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Berechnet den deterministischen Cache-Key.
|
||||
* Sortiert Keys um konsistente Serialisierung zu gewaehrleisten.
|
||||
*/
|
||||
export async function computeCacheKey(params: CacheKeyParams): Promise<string> {
|
||||
const payload = JSON.stringify(params, Object.keys(params).sort())
|
||||
return sha256(payload)
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchrone Variante fuer Cache-Key (Node.js).
|
||||
*/
|
||||
export function computeCacheKeySync(params: CacheKeyParams): string {
|
||||
const payload = JSON.stringify(params, Object.keys(params).sort())
|
||||
return sha256Sync(payload)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// In-Memory Cache
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* In-Memory Cache fuer Prose-Bloecke.
|
||||
*
|
||||
* Sicherheitsmechanismen:
|
||||
* - Max Eintraege (Speicher-Limit)
|
||||
* - TTL als zusaetzlicher Sicherheitsmechanismus (24h default)
|
||||
* - LRU-artige Bereinigung bei Overflow
|
||||
*/
|
||||
export class ProseCacheManager {
|
||||
private cache = new Map<string, CacheEntry>()
|
||||
private hits = 0
|
||||
private misses = 0
|
||||
private readonly maxEntries: number
|
||||
private readonly ttlMs: number
|
||||
|
||||
constructor(options?: { maxEntries?: number; ttlHours?: number }) {
|
||||
this.maxEntries = options?.maxEntries ?? 500
|
||||
this.ttlMs = (options?.ttlHours ?? 24) * 60 * 60 * 1000
|
||||
}
|
||||
|
||||
/**
|
||||
* Sucht einen gecachten Block.
|
||||
*/
|
||||
async get(params: CacheKeyParams): Promise<ProseBlockOutput | null> {
|
||||
const key = await computeCacheKey(params)
|
||||
return this.getByKey(key)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sucht synchron (Node.js).
|
||||
*/
|
||||
getSync(params: CacheKeyParams): ProseBlockOutput | null {
|
||||
const key = computeCacheKeySync(params)
|
||||
return this.getByKey(key)
|
||||
}
|
||||
|
||||
/**
|
||||
* Speichert einen Block im Cache.
|
||||
*/
|
||||
async set(params: CacheKeyParams, block: ProseBlockOutput): Promise<void> {
|
||||
const key = await computeCacheKey(params)
|
||||
this.setByKey(key, block)
|
||||
}
|
||||
|
||||
/**
|
||||
* Speichert synchron (Node.js).
|
||||
*/
|
||||
setSync(params: CacheKeyParams, block: ProseBlockOutput): void {
|
||||
const key = computeCacheKeySync(params)
|
||||
this.setByKey(key, block)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt Cache-Statistiken zurueck.
|
||||
*/
|
||||
getStats(): CacheStats {
|
||||
const entries = Array.from(this.cache.values())
|
||||
const total = this.hits + this.misses
|
||||
|
||||
return {
|
||||
totalEntries: this.cache.size,
|
||||
totalHits: this.hits,
|
||||
totalMisses: this.misses,
|
||||
hitRate: total > 0 ? this.hits / total : 0,
|
||||
oldestEntry: entries.length > 0
|
||||
? entries.reduce((a, b) => a.createdAt < b.createdAt ? a : b).createdAt
|
||||
: null,
|
||||
newestEntry: entries.length > 0
|
||||
? entries.reduce((a, b) => a.createdAt > b.createdAt ? a : b).createdAt
|
||||
: null,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loescht alle Eintraege.
|
||||
*/
|
||||
clear(): void {
|
||||
this.cache.clear()
|
||||
this.hits = 0
|
||||
this.misses = 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Loescht abgelaufene Eintraege.
|
||||
*/
|
||||
cleanup(): number {
|
||||
const now = Date.now()
|
||||
let removed = 0
|
||||
for (const [key, entry] of this.cache.entries()) {
|
||||
if (now - new Date(entry.createdAt).getTime() > this.ttlMs) {
|
||||
this.cache.delete(key)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
return removed
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Private
|
||||
// ========================================================================
|
||||
|
||||
private getByKey(key: string): ProseBlockOutput | null {
|
||||
const entry = this.cache.get(key)
|
||||
|
||||
if (!entry) {
|
||||
this.misses++
|
||||
return null
|
||||
}
|
||||
|
||||
// TTL pruefen
|
||||
if (Date.now() - new Date(entry.createdAt).getTime() > this.ttlMs) {
|
||||
this.cache.delete(key)
|
||||
this.misses++
|
||||
return null
|
||||
}
|
||||
|
||||
entry.hitCount++
|
||||
this.hits++
|
||||
return entry.block
|
||||
}
|
||||
|
||||
private setByKey(key: string, block: ProseBlockOutput): void {
|
||||
// Bei Overflow: aeltesten Eintrag entfernen
|
||||
if (this.cache.size >= this.maxEntries) {
|
||||
this.evictOldest()
|
||||
}
|
||||
|
||||
this.cache.set(key, {
|
||||
block,
|
||||
createdAt: new Date().toISOString(),
|
||||
hitCount: 0,
|
||||
cacheKey: key,
|
||||
})
|
||||
}
|
||||
|
||||
private evictOldest(): void {
|
||||
let oldestKey: string | null = null
|
||||
let oldestTime = Infinity
|
||||
|
||||
for (const [key, entry] of this.cache.entries()) {
|
||||
const time = new Date(entry.createdAt).getTime()
|
||||
if (time < oldestTime) {
|
||||
oldestTime = time
|
||||
oldestKey = key
|
||||
}
|
||||
}
|
||||
|
||||
if (oldestKey) {
|
||||
this.cache.delete(oldestKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Checksum Utils (fuer Data Block Integritaet)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Berechnet Integritaets-Checksum ueber Daten.
|
||||
*/
|
||||
export async function computeChecksum(data: unknown): Promise<string> {
|
||||
const serialized = JSON.stringify(data, Object.keys(data as Record<string, unknown>).sort())
|
||||
return sha256(serialized)
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchrone Checksum-Berechnung.
|
||||
*/
|
||||
export function computeChecksumSync(data: unknown): string {
|
||||
const serialized = JSON.stringify(data, Object.keys(data as Record<string, unknown>).sort())
|
||||
return sha256Sync(serialized)
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifiziert eine Checksum gegen Daten.
|
||||
*/
|
||||
export async function verifyChecksum(data: unknown, expectedChecksum: string): Promise<boolean> {
|
||||
const actual = await computeChecksum(data)
|
||||
return actual === expectedChecksum
|
||||
}
|
||||
139
admin-compliance/lib/sdk/drafting-engine/narrative-tags.ts
Normal file
139
admin-compliance/lib/sdk/drafting-engine/narrative-tags.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
/**
|
||||
* Narrative Tags — Deterministische Score-zu-Sprache Ableitung
|
||||
*
|
||||
* Der Data Layer erzeugt aus berechneten Scores sprachliche Tags.
|
||||
* Das LLM darf NUR diese Tags verwenden — niemals echte Scores oder Prozentwerte.
|
||||
*
|
||||
* Alle Funktionen sind 100% deterministisch: gleiche Eingabe = gleiche Ausgabe.
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface NarrativeTags {
|
||||
/** Sprachliche Risiko-Einschaetzung */
|
||||
riskSummary: 'niedrig' | 'moderat' | 'erhoht'
|
||||
/** Reifegrad der bestehenden Massnahmen */
|
||||
maturity: 'ausbaufahig' | 'solide' | 'hoch'
|
||||
/** Handlungsprioritaet */
|
||||
priority: 'kurzfristig' | 'mittelfristig' | 'langfristig'
|
||||
/** Abdeckungsgrad der Controls */
|
||||
coverageLevel: 'grundlegend' | 'umfassend' | 'vollstaendig'
|
||||
/** Dringlichkeit */
|
||||
urgency: 'planbar' | 'zeitnah' | 'dringend'
|
||||
}
|
||||
|
||||
/** Eingabe-Scores fuer die Tag-Ableitung */
|
||||
export interface NarrativeTagScores {
|
||||
/** Gesamt-Risikoscore (0-100) */
|
||||
overallRisk: number
|
||||
/** Reife-Score (0-100) */
|
||||
maturityScore: number
|
||||
/** Anzahl identifizierter Luecken */
|
||||
gapCount: number
|
||||
/** Anzahl kritischer Luecken */
|
||||
criticalGaps: number
|
||||
/** Control-Abdeckung (0-100) */
|
||||
controlCoverage: number
|
||||
/** Anzahl kritischer Findings */
|
||||
criticalFindings: number
|
||||
/** Anzahl hoher Findings */
|
||||
highFindings: number
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tag Derivation (deterministisch)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Leitet aus numerischen Scores sprachliche Narrative Tags ab.
|
||||
* 100% deterministisch — gleiche Scores = gleiche Tags.
|
||||
*/
|
||||
export function deriveNarrativeTags(scores: NarrativeTagScores): NarrativeTags {
|
||||
return {
|
||||
riskSummary:
|
||||
scores.overallRisk <= 30 ? 'niedrig' :
|
||||
scores.overallRisk <= 65 ? 'moderat' : 'erhoht',
|
||||
|
||||
maturity:
|
||||
scores.maturityScore <= 40 ? 'ausbaufahig' :
|
||||
scores.maturityScore <= 75 ? 'solide' : 'hoch',
|
||||
|
||||
priority:
|
||||
scores.gapCount === 0 ? 'langfristig' :
|
||||
scores.criticalGaps > 0 ? 'kurzfristig' : 'mittelfristig',
|
||||
|
||||
coverageLevel:
|
||||
scores.controlCoverage <= 50 ? 'grundlegend' :
|
||||
scores.controlCoverage <= 80 ? 'umfassend' : 'vollstaendig',
|
||||
|
||||
urgency:
|
||||
scores.criticalFindings > 0 ? 'dringend' :
|
||||
scores.highFindings > 0 ? 'zeitnah' : 'planbar',
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert NarrativeTagScores aus einem DraftContext.
|
||||
* Falls Werte fehlen, werden sichere Defaults (konservativ) verwendet.
|
||||
*/
|
||||
export function extractScoresFromDraftContext(context: {
|
||||
decisions: {
|
||||
scores: {
|
||||
risk_score: number
|
||||
complexity_score: number
|
||||
assurance_score: number
|
||||
composite_score: number
|
||||
}
|
||||
}
|
||||
constraints: {
|
||||
riskFlags: Array<{ severity: string }>
|
||||
}
|
||||
}): NarrativeTagScores {
|
||||
const { scores } = context.decisions
|
||||
const riskFlags = context.constraints.riskFlags
|
||||
|
||||
const criticalFindings = riskFlags.filter(f => f.severity === 'critical').length
|
||||
const highFindings = riskFlags.filter(f => f.severity === 'high').length
|
||||
|
||||
return {
|
||||
overallRisk: scores.risk_score ?? 50,
|
||||
maturityScore: scores.assurance_score ?? 50,
|
||||
gapCount: riskFlags.length,
|
||||
criticalGaps: criticalFindings,
|
||||
controlCoverage: scores.assurance_score ?? 50,
|
||||
criticalFindings,
|
||||
highFindings,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Serialization
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Serialisiert NarrativeTags fuer den LLM-Prompt.
|
||||
*/
|
||||
export function narrativeTagsToPromptString(tags: NarrativeTags): string {
|
||||
return [
|
||||
`- Risikoprofil: ${tags.riskSummary}`,
|
||||
`- Reifegrad: ${tags.maturity}`,
|
||||
`- Prioritaet: ${tags.priority}`,
|
||||
`- Abdeckungsgrad: ${tags.coverageLevel}`,
|
||||
`- Dringlichkeit: ${tags.urgency}`,
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt die erlaubten Tag-Werte als flache Liste zurueck (fuer Validierung).
|
||||
*/
|
||||
export function getAllAllowedTagValues(): string[] {
|
||||
return [
|
||||
'niedrig', 'moderat', 'erhoht',
|
||||
'ausbaufahig', 'solide', 'hoch',
|
||||
'kurzfristig', 'mittelfristig', 'langfristig',
|
||||
'grundlegend', 'umfassend', 'vollstaendig',
|
||||
'planbar', 'zeitnah', 'dringend',
|
||||
]
|
||||
}
|
||||
485
admin-compliance/lib/sdk/drafting-engine/prose-validator.ts
Normal file
485
admin-compliance/lib/sdk/drafting-engine/prose-validator.ts
Normal file
@@ -0,0 +1,485 @@
|
||||
/**
|
||||
* Prose Validator + Repair Loop — Governance Layer
|
||||
*
|
||||
* Validiert LLM-generierte Prosa-Bloecke gegen das Regelwerk.
|
||||
* Orchestriert den Repair-Loop (max 2 Versuche) mit Fallback.
|
||||
*
|
||||
* 12 Pruefregeln, davon 10 reparierbar und 2 Hard Aborts.
|
||||
*/
|
||||
|
||||
import type { NarrativeTags } from './narrative-tags'
|
||||
import { getAllAllowedTagValues } from './narrative-tags'
|
||||
import type { AllowedFacts } from './allowed-facts'
|
||||
import { checkForDisallowedContent } from './allowed-facts'
|
||||
import { checkStyleViolations, checkTerminologyUsage } from './terminology'
|
||||
import type { SanitizedFacts } from './sanitizer'
|
||||
import { isSanitized } from './sanitizer'
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
/** Strukturierter LLM-Output (Pflicht-Format) */
|
||||
export interface ProseBlockOutput {
|
||||
blockId: string
|
||||
blockType: 'introduction' | 'transition' | 'conclusion' | 'appreciation'
|
||||
language: 'de'
|
||||
text: string
|
||||
|
||||
assertions: {
|
||||
companyNameUsed: boolean
|
||||
industryReferenced: boolean
|
||||
structureReferenced: boolean
|
||||
itLandscapeReferenced: boolean
|
||||
narrativeTagsUsed: string[]
|
||||
}
|
||||
|
||||
forbiddenContentDetected: string[]
|
||||
}
|
||||
|
||||
/** Einzelner Validierungsfehler */
|
||||
export interface ProseValidationError {
|
||||
rule: string
|
||||
severity: 'error' | 'warning'
|
||||
message: string
|
||||
repairable: boolean
|
||||
}
|
||||
|
||||
/** Validierungsergebnis */
|
||||
export interface ProseValidatorResult {
|
||||
valid: boolean
|
||||
errors: ProseValidationError[]
|
||||
repairable: boolean
|
||||
}
|
||||
|
||||
/** Repair-Loop Audit */
|
||||
export interface RepairAudit {
|
||||
repairAttempts: number
|
||||
validatorFailures: string[][]
|
||||
repairSuccessful: boolean
|
||||
fallbackUsed: boolean
|
||||
fallbackReason?: string
|
||||
}
|
||||
|
||||
/** Word count limits per block type */
|
||||
const WORD_COUNT_LIMITS: Record<ProseBlockOutput['blockType'], { min: number; max: number }> = {
|
||||
introduction: { min: 30, max: 200 },
|
||||
transition: { min: 10, max: 80 },
|
||||
conclusion: { min: 20, max: 150 },
|
||||
appreciation: { min: 15, max: 100 },
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Prose Validator
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Validiert einen ProseBlockOutput gegen alle 12 Regeln.
|
||||
*/
|
||||
export function validateProseBlock(
|
||||
block: ProseBlockOutput,
|
||||
facts: AllowedFacts | SanitizedFacts,
|
||||
expectedTags: NarrativeTags
|
||||
): ProseValidatorResult {
|
||||
const errors: ProseValidationError[] = []
|
||||
|
||||
// Rule 1: JSON_VALID — wird extern geprueft (Parsing vor Aufruf)
|
||||
// Wenn wir hier sind, ist JSON bereits valide
|
||||
|
||||
// Rule 2: COMPANY_NAME_PRESENT
|
||||
if (!block.text.includes(facts.companyName) && facts.companyName !== 'Unbekannt') {
|
||||
errors.push({
|
||||
rule: 'COMPANY_NAME_PRESENT',
|
||||
severity: 'error',
|
||||
message: `Firmenname "${facts.companyName}" nicht im Text gefunden`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 3: INDUSTRY_REFERENCED
|
||||
if (facts.industry && !block.text.toLowerCase().includes(facts.industry.toLowerCase())) {
|
||||
errors.push({
|
||||
rule: 'INDUSTRY_REFERENCED',
|
||||
severity: 'warning',
|
||||
message: `Branche "${facts.industry}" nicht im Text referenziert`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 4: NO_NUMERIC_SCORES
|
||||
if (/\d+\s*%/.test(block.text)) {
|
||||
errors.push({
|
||||
rule: 'NO_NUMERIC_SCORES',
|
||||
severity: 'error',
|
||||
message: 'Prozentwerte im Text gefunden',
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
if (/score[:\s]*\d+/i.test(block.text)) {
|
||||
errors.push({
|
||||
rule: 'NO_NUMERIC_SCORES',
|
||||
severity: 'error',
|
||||
message: 'Score-Werte im Text gefunden',
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
if (/\b(L1|L2|L3|L4)\b/.test(block.text)) {
|
||||
errors.push({
|
||||
rule: 'NO_NUMERIC_SCORES',
|
||||
severity: 'error',
|
||||
message: 'Compliance-Level-Bezeichnungen (L1-L4) im Text gefunden',
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 5: NO_DISALLOWED_TOPICS
|
||||
const disallowedViolations = checkForDisallowedContent(block.text)
|
||||
for (const violation of disallowedViolations) {
|
||||
errors.push({
|
||||
rule: 'NO_DISALLOWED_TOPICS',
|
||||
severity: 'error',
|
||||
message: violation,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 6: WORD_COUNT_IN_RANGE
|
||||
const wordCount = block.text.split(/\s+/).filter(Boolean).length
|
||||
const limits = WORD_COUNT_LIMITS[block.blockType]
|
||||
if (limits) {
|
||||
if (wordCount < limits.min) {
|
||||
errors.push({
|
||||
rule: 'WORD_COUNT_IN_RANGE',
|
||||
severity: 'warning',
|
||||
message: `Wortanzahl ${wordCount} unter Minimum ${limits.min} fuer ${block.blockType}`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
if (wordCount > limits.max) {
|
||||
errors.push({
|
||||
rule: 'WORD_COUNT_IN_RANGE',
|
||||
severity: 'error',
|
||||
message: `Wortanzahl ${wordCount} ueber Maximum ${limits.max} fuer ${block.blockType}`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 7: NO_DIRECT_ADDRESS
|
||||
if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(block.text)) {
|
||||
errors.push({
|
||||
rule: 'NO_DIRECT_ADDRESS',
|
||||
severity: 'error',
|
||||
message: 'Direkte Ansprache (Sie/Ihr) gefunden',
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 8: NARRATIVE_TAGS_CONSISTENT
|
||||
const allowedTags = getAllAllowedTagValues()
|
||||
if (block.assertions.narrativeTagsUsed) {
|
||||
for (const tag of block.assertions.narrativeTagsUsed) {
|
||||
if (!allowedTags.includes(tag)) {
|
||||
errors.push({
|
||||
rule: 'NARRATIVE_TAGS_CONSISTENT',
|
||||
severity: 'error',
|
||||
message: `Unbekannter Narrative Tag "${tag}" in assertions`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pruefen ob Text Tags enthaelt die nicht zu den erwarteten gehoeren
|
||||
const expectedTagValues = Object.values(expectedTags)
|
||||
const allTagValues = getAllAllowedTagValues()
|
||||
for (const tagValue of allTagValues) {
|
||||
if (block.text.includes(tagValue) && !expectedTagValues.includes(tagValue)) {
|
||||
errors.push({
|
||||
rule: 'NARRATIVE_TAGS_CONSISTENT',
|
||||
severity: 'error',
|
||||
message: `Tag "${tagValue}" im Text, aber nicht im erwarteten Tag-Set`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 9: TERMINOLOGY_CORRECT
|
||||
const termViolations = checkTerminologyUsage(block.text)
|
||||
for (const warning of termViolations) {
|
||||
errors.push({
|
||||
rule: 'TERMINOLOGY_CORRECT',
|
||||
severity: 'warning',
|
||||
message: warning,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 10: Style violations
|
||||
const styleViolations = checkStyleViolations(block.text)
|
||||
for (const violation of styleViolations) {
|
||||
errors.push({
|
||||
rule: 'STYLE_VIOLATION',
|
||||
severity: 'warning',
|
||||
message: violation,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 11: SANITIZATION_PASSED (Hard Abort)
|
||||
if ('__sanitized' in facts && !isSanitized(facts)) {
|
||||
errors.push({
|
||||
rule: 'SANITIZATION_PASSED',
|
||||
severity: 'error',
|
||||
message: 'Sanitization-Flag gesetzt aber nicht valide',
|
||||
repairable: false,
|
||||
})
|
||||
}
|
||||
|
||||
// Rule 12: Self-reported forbidden content
|
||||
if (block.forbiddenContentDetected && block.forbiddenContentDetected.length > 0) {
|
||||
errors.push({
|
||||
rule: 'SELF_REPORTED_FORBIDDEN',
|
||||
severity: 'error',
|
||||
message: `LLM meldet verbotene Inhalte: ${block.forbiddenContentDetected.join(', ')}`,
|
||||
repairable: true,
|
||||
})
|
||||
}
|
||||
|
||||
const hasHardAbort = errors.some(e => !e.repairable)
|
||||
const hasErrors = errors.some(e => e.severity === 'error')
|
||||
|
||||
return {
|
||||
valid: !hasErrors,
|
||||
errors,
|
||||
repairable: hasErrors && !hasHardAbort,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// JSON Parsing
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Parst und validiert LLM-Output als ProseBlockOutput.
|
||||
* Gibt null zurueck wenn JSON nicht parsebar ist.
|
||||
*/
|
||||
export function parseProseBlockOutput(rawContent: string): ProseBlockOutput | null {
|
||||
try {
|
||||
const parsed = JSON.parse(rawContent)
|
||||
|
||||
// Pflichtfelder pruefen
|
||||
if (
|
||||
typeof parsed.blockId !== 'string' ||
|
||||
typeof parsed.text !== 'string' ||
|
||||
!['introduction', 'transition', 'conclusion', 'appreciation'].includes(parsed.blockType)
|
||||
) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
blockId: parsed.blockId,
|
||||
blockType: parsed.blockType,
|
||||
language: parsed.language || 'de',
|
||||
text: parsed.text,
|
||||
assertions: {
|
||||
companyNameUsed: parsed.assertions?.companyNameUsed ?? false,
|
||||
industryReferenced: parsed.assertions?.industryReferenced ?? false,
|
||||
structureReferenced: parsed.assertions?.structureReferenced ?? false,
|
||||
itLandscapeReferenced: parsed.assertions?.itLandscapeReferenced ?? false,
|
||||
narrativeTagsUsed: parsed.assertions?.narrativeTagsUsed ?? [],
|
||||
},
|
||||
forbiddenContentDetected: parsed.forbiddenContentDetected ?? [],
|
||||
}
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Repair Prompt Builder
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Baut den Repair-Prompt fuer einen fehlgeschlagenen Block.
|
||||
*/
|
||||
export function buildRepairPrompt(
|
||||
originalBlock: ProseBlockOutput,
|
||||
validationErrors: ProseValidationError[]
|
||||
): string {
|
||||
const errorList = validationErrors
|
||||
.filter(e => e.severity === 'error')
|
||||
.map(e => `- ${e.rule}: ${e.message}`)
|
||||
.join('\n')
|
||||
|
||||
return `Der vorherige Text enthielt Fehler. Ueberarbeite ihn unter Beibehaltung der Aussage.
|
||||
|
||||
FEHLER:
|
||||
${errorList}
|
||||
|
||||
REGELN:
|
||||
- Entferne alle unerlaubten Inhalte
|
||||
- Behalte den Firmenkontext bei
|
||||
- Erzeuge ausschliesslich JSON im vorgegebenen Format
|
||||
- Aendere KEINE Fakten, ergaenze KEINE neuen Informationen
|
||||
- Verwende KEINE direkte Ansprache (Sie/Ihr)
|
||||
- Verwende KEINE konkreten Prozentwerte oder Scores
|
||||
|
||||
ORIGINALTEXT:
|
||||
${JSON.stringify(originalBlock, null, 2)}`
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Fallback Templates
|
||||
// ============================================================================
|
||||
|
||||
const FALLBACK_TEMPLATES: Record<ProseBlockOutput['blockType'], string> = {
|
||||
introduction: 'Die {{companyName}} dokumentiert im Folgenden die {{documentType}}-relevanten Massnahmen und Bewertungen. Die nachstehenden Ausfuehrungen basieren auf der aktuellen Analyse der organisatorischen und technischen Gegebenheiten.',
|
||||
transition: 'Auf Grundlage der vorstehenden Daten ergeben sich die folgenden Detailbewertungen.',
|
||||
conclusion: 'Die {{companyName}} verfuegt ueber die dokumentierten Massnahmen und Strukturen. Die Einhaltung der regulatorischen Anforderungen wird fortlaufend ueberprueft und angepasst.',
|
||||
appreciation: 'Die bestehende Organisationsstruktur der {{companyName}} bildet eine {{maturity}} Grundlage fuer die nachfolgend dokumentierten Massnahmen.',
|
||||
}
|
||||
|
||||
/**
|
||||
* Erzeugt einen Fallback-Block wenn der Repair-Loop fehlschlaegt.
|
||||
*/
|
||||
export function buildFallbackBlock(
|
||||
blockId: string,
|
||||
blockType: ProseBlockOutput['blockType'],
|
||||
facts: AllowedFacts,
|
||||
documentType?: string
|
||||
): ProseBlockOutput {
|
||||
let text = FALLBACK_TEMPLATES[blockType]
|
||||
.replace(/\{\{companyName\}\}/g, facts.companyName)
|
||||
.replace(/\{\{maturity\}\}/g, facts.narrativeTags.maturity)
|
||||
.replace(/\{\{documentType\}\}/g, documentType || 'Compliance')
|
||||
|
||||
return {
|
||||
blockId,
|
||||
blockType,
|
||||
language: 'de',
|
||||
text,
|
||||
assertions: {
|
||||
companyNameUsed: true,
|
||||
industryReferenced: false,
|
||||
structureReferenced: false,
|
||||
itLandscapeReferenced: false,
|
||||
narrativeTagsUsed: blockType === 'appreciation' ? ['maturity'] : [],
|
||||
},
|
||||
forbiddenContentDetected: [],
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Repair Loop Orchestrator
|
||||
// ============================================================================
|
||||
|
||||
/** Callback fuer LLM-Aufruf (wird von der Route injiziert) */
|
||||
export type LLMCallFn = (prompt: string) => Promise<string>
|
||||
|
||||
/**
|
||||
* Orchestriert den Repair-Loop fuer einen einzelnen Prosa-Block.
|
||||
*
|
||||
* 1. Parse + Validate
|
||||
* 2. Bei Fehler: Repair-Prompt → LLM → Parse + Validate (max 2x)
|
||||
* 3. Bei weiterem Fehler: Fallback Template
|
||||
*
|
||||
* @returns Validierter ProseBlockOutput + RepairAudit
|
||||
*/
|
||||
export async function executeRepairLoop(
|
||||
rawLLMOutput: string,
|
||||
facts: AllowedFacts | SanitizedFacts,
|
||||
expectedTags: NarrativeTags,
|
||||
blockId: string,
|
||||
blockType: ProseBlockOutput['blockType'],
|
||||
llmCall: LLMCallFn,
|
||||
documentType?: string,
|
||||
maxRepairAttempts = 2
|
||||
): Promise<{ block: ProseBlockOutput; audit: RepairAudit }> {
|
||||
const audit: RepairAudit = {
|
||||
repairAttempts: 0,
|
||||
validatorFailures: [],
|
||||
repairSuccessful: false,
|
||||
fallbackUsed: false,
|
||||
}
|
||||
|
||||
// Versuch 0: Original-Output parsen + validieren
|
||||
let parsed = parseProseBlockOutput(rawLLMOutput)
|
||||
|
||||
if (!parsed) {
|
||||
// JSON invalid → Regeneration zaehlt als Repair-Versuch
|
||||
audit.validatorFailures.push(['JSON_VALID: LLM-Output konnte nicht als JSON geparst werden'])
|
||||
audit.repairAttempts++
|
||||
|
||||
if (audit.repairAttempts <= maxRepairAttempts) {
|
||||
const repairPrompt = `Der vorherige Output war kein valides JSON. Erzeuge ausschliesslich ein JSON-Objekt mit den Feldern: blockId, blockType, language, text, assertions, forbiddenContentDetected.\n\nOriginal-Output:\n${rawLLMOutput.slice(0, 500)}`
|
||||
try {
|
||||
const repaired = await llmCall(repairPrompt)
|
||||
parsed = parseProseBlockOutput(repaired)
|
||||
} catch {
|
||||
// LLM-Fehler → weiter zum Fallback
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!parsed) {
|
||||
audit.fallbackUsed = true
|
||||
audit.fallbackReason = 'JSON konnte nach Repair nicht geparst werden'
|
||||
return {
|
||||
block: buildFallbackBlock(blockId, blockType, facts, documentType),
|
||||
audit,
|
||||
}
|
||||
}
|
||||
|
||||
// Validierungs-Schleife
|
||||
for (let attempt = audit.repairAttempts; attempt <= maxRepairAttempts; attempt++) {
|
||||
const result = validateProseBlock(parsed, facts, expectedTags)
|
||||
|
||||
if (result.valid) {
|
||||
audit.repairSuccessful = attempt === 0 ? true : true
|
||||
return { block: parsed, audit }
|
||||
}
|
||||
|
||||
// Hard Abort? → Fallback sofort
|
||||
if (!result.repairable) {
|
||||
audit.fallbackUsed = true
|
||||
audit.fallbackReason = `Hard Abort: ${result.errors.filter(e => !e.repairable).map(e => e.rule).join(', ')}`
|
||||
audit.validatorFailures.push(result.errors.map(e => `${e.rule}: ${e.message}`))
|
||||
return {
|
||||
block: buildFallbackBlock(blockId, blockType, facts, documentType),
|
||||
audit,
|
||||
}
|
||||
}
|
||||
|
||||
// Fehler protokollieren
|
||||
audit.validatorFailures.push(result.errors.map(e => `${e.rule}: ${e.message}`))
|
||||
|
||||
// Noch Repair-Versuche uebrig?
|
||||
if (attempt >= maxRepairAttempts) {
|
||||
break
|
||||
}
|
||||
|
||||
// Repair-Prompt senden
|
||||
audit.repairAttempts++
|
||||
try {
|
||||
const repairPrompt = buildRepairPrompt(parsed, result.errors)
|
||||
const repairedOutput = await llmCall(repairPrompt)
|
||||
const repairedParsed = parseProseBlockOutput(repairedOutput)
|
||||
if (!repairedParsed) {
|
||||
// Parsing fehlgeschlagen nach Repair
|
||||
continue
|
||||
}
|
||||
parsed = repairedParsed
|
||||
} catch {
|
||||
// LLM-Fehler → naechster Versuch oder Fallback
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Alle Versuche erschoepft → Fallback
|
||||
audit.fallbackUsed = true
|
||||
audit.fallbackReason = `${maxRepairAttempts} Repair-Versuche erschoepft`
|
||||
return {
|
||||
block: buildFallbackBlock(blockId, blockType, facts, documentType),
|
||||
audit,
|
||||
}
|
||||
}
|
||||
298
admin-compliance/lib/sdk/drafting-engine/sanitizer.ts
Normal file
298
admin-compliance/lib/sdk/drafting-engine/sanitizer.ts
Normal file
@@ -0,0 +1,298 @@
|
||||
/**
|
||||
* PII Sanitizer — Bereinigt Kontextdaten vor LLM-Aufruf
|
||||
*
|
||||
* Entfernt personenbezogene Daten (PII) aus AllowedFacts
|
||||
* bevor sie an das LLM weitergegeben werden.
|
||||
*
|
||||
* Bei Fehler: Hard Abort — kein LLM-Aufruf ohne erfolgreiche Sanitization.
|
||||
*/
|
||||
|
||||
import type { AllowedFacts } from './allowed-facts'
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
/** Bereinigtes Faktenbudget (PII-frei) */
|
||||
export type SanitizedFacts = AllowedFacts & {
|
||||
__sanitized: true
|
||||
}
|
||||
|
||||
/** Audit-Protokoll der Sanitization */
|
||||
export interface SanitizationAudit {
|
||||
sanitizationApplied: boolean
|
||||
redactedFieldsCount: number
|
||||
redactedFieldNames: string[]
|
||||
}
|
||||
|
||||
/** Ergebnis der Sanitization */
|
||||
export interface SanitizationResult {
|
||||
facts: SanitizedFacts
|
||||
audit: SanitizationAudit
|
||||
}
|
||||
|
||||
/** Sanitization-Fehler (loest Hard Abort aus) */
|
||||
export class SanitizationError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly field: string,
|
||||
public readonly reason: string
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'SanitizationError'
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// PII Detection Patterns
|
||||
// ============================================================================
|
||||
|
||||
const PII_PATTERNS = {
|
||||
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||||
phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g,
|
||||
ipAddress: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
|
||||
internalId: /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi,
|
||||
apiKey: /\b(sk-|pk-|api[_-]?key[_-]?)[a-zA-Z0-9]{20,}\b/gi,
|
||||
} as const
|
||||
|
||||
// ============================================================================
|
||||
// Sanitizer
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Bereinigt AllowedFacts von PII vor dem LLM-Aufruf.
|
||||
*
|
||||
* @throws {SanitizationError} Wenn ein Feld nicht bereinigt werden kann
|
||||
*/
|
||||
export function sanitizeAllowedFacts(facts: AllowedFacts): SanitizationResult {
|
||||
const redactedFields: string[] = []
|
||||
|
||||
// Kopie erstellen
|
||||
const sanitized: AllowedFacts = {
|
||||
...facts,
|
||||
specialFeatures: [...facts.specialFeatures],
|
||||
triggeredRegulations: [...facts.triggeredRegulations],
|
||||
primaryUseCases: [...facts.primaryUseCases],
|
||||
narrativeTags: { ...facts.narrativeTags },
|
||||
}
|
||||
|
||||
// Firmenname: erlaubt (wird benoetigt), aber PII darin pruefen
|
||||
sanitized.companyName = sanitizeString(facts.companyName, 'companyName', redactedFields)
|
||||
|
||||
// Rechtsform: erlaubt, kurzer Wert
|
||||
sanitized.legalForm = sanitizeString(facts.legalForm, 'legalForm', redactedFields)
|
||||
|
||||
// Branche: erlaubt
|
||||
sanitized.industry = sanitizeString(facts.industry, 'industry', redactedFields)
|
||||
|
||||
// Standort: erlaubt (Stadt/Region), aber keine Strasse/Hausnummer
|
||||
sanitized.location = sanitizeAddress(facts.location, 'location', redactedFields)
|
||||
|
||||
// Mitarbeiterzahl: erlaubt (kein PII)
|
||||
// employeeCount bleibt unveraendert
|
||||
|
||||
// Teamstruktur: erlaubt (generisch)
|
||||
sanitized.teamStructure = sanitizeString(facts.teamStructure, 'teamStructure', redactedFields)
|
||||
|
||||
// IT-Landschaft: erlaubt (generisch)
|
||||
sanitized.itLandscape = sanitizeString(facts.itLandscape, 'itLandscape', redactedFields)
|
||||
|
||||
// Besonderheiten: pruefen
|
||||
sanitized.specialFeatures = facts.specialFeatures.map((f, i) =>
|
||||
sanitizeString(f, `specialFeatures[${i}]`, redactedFields)
|
||||
)
|
||||
|
||||
// Regulierungen: erlaubt (generisch)
|
||||
sanitized.triggeredRegulations = facts.triggeredRegulations.map((r, i) =>
|
||||
sanitizeString(r, `triggeredRegulations[${i}]`, redactedFields)
|
||||
)
|
||||
|
||||
// Use Cases: pruefen
|
||||
sanitized.primaryUseCases = facts.primaryUseCases.map((uc, i) =>
|
||||
sanitizeString(uc, `primaryUseCases[${i}]`, redactedFields)
|
||||
)
|
||||
|
||||
// Narrative Tags: deterministisch, kein PII moeglich
|
||||
// Bleiben unveraendert
|
||||
|
||||
return {
|
||||
facts: { ...sanitized, __sanitized: true } as SanitizedFacts,
|
||||
audit: {
|
||||
sanitizationApplied: true,
|
||||
redactedFieldsCount: redactedFields.length,
|
||||
redactedFieldNames: redactedFields,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prueft ob ein SanitizedFacts-Objekt tatsaechlich bereinigt wurde.
|
||||
*/
|
||||
export function isSanitized(facts: unknown): facts is SanitizedFacts {
|
||||
return (
|
||||
typeof facts === 'object' &&
|
||||
facts !== null &&
|
||||
'__sanitized' in facts &&
|
||||
(facts as SanitizedFacts).__sanitized === true
|
||||
)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Private Helpers
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Bereinigt einen String-Wert von PII.
|
||||
* Gibt den bereinigten String zurueck und fuegt redacted Fields hinzu.
|
||||
*/
|
||||
function sanitizeString(
|
||||
value: string,
|
||||
fieldName: string,
|
||||
redactedFields: string[]
|
||||
): string {
|
||||
if (!value) return value
|
||||
|
||||
let result = value
|
||||
let wasRedacted = false
|
||||
|
||||
// E-Mail-Adressen entfernen
|
||||
if (PII_PATTERNS.email.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.email, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
// Reset regex lastIndex
|
||||
PII_PATTERNS.email.lastIndex = 0
|
||||
|
||||
// Telefonnummern entfernen (nur wenn >= 6 Ziffern)
|
||||
const phoneMatches = result.match(PII_PATTERNS.phone)
|
||||
if (phoneMatches) {
|
||||
for (const match of phoneMatches) {
|
||||
if (match.replace(/\D/g, '').length >= 6) {
|
||||
result = result.replace(match, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
}
|
||||
}
|
||||
PII_PATTERNS.phone.lastIndex = 0
|
||||
|
||||
// IP-Adressen entfernen
|
||||
if (PII_PATTERNS.ipAddress.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.ipAddress, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
PII_PATTERNS.ipAddress.lastIndex = 0
|
||||
|
||||
// Interne IDs (UUIDs) entfernen
|
||||
if (PII_PATTERNS.internalId.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.internalId, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
PII_PATTERNS.internalId.lastIndex = 0
|
||||
|
||||
// API Keys entfernen
|
||||
if (PII_PATTERNS.apiKey.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.apiKey, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
PII_PATTERNS.apiKey.lastIndex = 0
|
||||
|
||||
if (wasRedacted) {
|
||||
redactedFields.push(fieldName)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Bereinigt Adress-Felder: behaelt Stadt/Region, entfernt Strasse/Hausnummer.
|
||||
*/
|
||||
function sanitizeAddress(
|
||||
value: string,
|
||||
fieldName: string,
|
||||
redactedFields: string[]
|
||||
): string {
|
||||
if (!value) return value
|
||||
|
||||
// Zuerst generische PII-Bereinigung
|
||||
let result = sanitizeString(value, fieldName, redactedFields)
|
||||
|
||||
// Strasse + Hausnummer Pattern (deutsch)
|
||||
const streetPattern = /\b[A-ZÄÖÜ][a-zäöüß]+(?:straße|str\.|weg|gasse|platz|allee|ring|damm)\s*\d+[a-z]?\b/gi
|
||||
if (streetPattern.test(result)) {
|
||||
result = result.replace(streetPattern, '')
|
||||
if (!redactedFields.includes(fieldName)) {
|
||||
redactedFields.push(fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
// PLZ-Pattern (5-stellig deutsch)
|
||||
const plzPattern = /\b\d{5}\s+/g
|
||||
if (plzPattern.test(result)) {
|
||||
result = result.replace(plzPattern, '')
|
||||
if (!redactedFields.includes(fieldName)) {
|
||||
redactedFields.push(fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Validiert das gesamte SanitizedFacts-Objekt auf verbleibende PII.
|
||||
* Gibt Warnungen zurueck wenn doch noch PII gefunden wird.
|
||||
*/
|
||||
export function validateNoRemainingPII(facts: SanitizedFacts): string[] {
|
||||
const warnings: string[] = []
|
||||
const allValues = extractAllStringValues(facts)
|
||||
|
||||
for (const { path, value } of allValues) {
|
||||
if (path === '__sanitized') continue
|
||||
|
||||
PII_PATTERNS.email.lastIndex = 0
|
||||
if (PII_PATTERNS.email.test(value)) {
|
||||
warnings.push(`Verbleibende E-Mail in ${path}`)
|
||||
}
|
||||
|
||||
PII_PATTERNS.ipAddress.lastIndex = 0
|
||||
if (PII_PATTERNS.ipAddress.test(value)) {
|
||||
warnings.push(`Verbleibende IP-Adresse in ${path}`)
|
||||
}
|
||||
|
||||
PII_PATTERNS.apiKey.lastIndex = 0
|
||||
if (PII_PATTERNS.apiKey.test(value)) {
|
||||
warnings.push(`Verbleibender API-Key in ${path}`)
|
||||
}
|
||||
}
|
||||
|
||||
return warnings
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert alle String-Werte aus einem Objekt (rekursiv).
|
||||
*/
|
||||
function extractAllStringValues(
|
||||
obj: Record<string, unknown>,
|
||||
prefix = ''
|
||||
): Array<{ path: string; value: string }> {
|
||||
const results: Array<{ path: string; value: string }> = []
|
||||
|
||||
for (const [key, val] of Object.entries(obj)) {
|
||||
const path = prefix ? `${prefix}.${key}` : key
|
||||
|
||||
if (typeof val === 'string') {
|
||||
results.push({ path, value: val })
|
||||
} else if (Array.isArray(val)) {
|
||||
for (let i = 0; i < val.length; i++) {
|
||||
if (typeof val[i] === 'string') {
|
||||
results.push({ path: `${path}[${i}]`, value: val[i] })
|
||||
} else if (typeof val[i] === 'object' && val[i] !== null) {
|
||||
results.push(...extractAllStringValues(val[i] as Record<string, unknown>, `${path}[${i}]`))
|
||||
}
|
||||
}
|
||||
} else if (typeof val === 'object' && val !== null) {
|
||||
results.push(...extractAllStringValues(val as Record<string, unknown>, path))
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
184
admin-compliance/lib/sdk/drafting-engine/terminology.ts
Normal file
184
admin-compliance/lib/sdk/drafting-engine/terminology.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Terminology Guide & Style Contract — Konsistente Fachbegriffe
|
||||
*
|
||||
* Stellt sicher, dass alle Prosa-Bloecke eines Dokuments
|
||||
* dieselben Fachbegriffe und denselben Schreibstil verwenden.
|
||||
*
|
||||
* 100% deterministisch.
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Terminology Guide
|
||||
// ============================================================================
|
||||
|
||||
export interface TerminologyGuide {
|
||||
/** DSGVO-Begriffe */
|
||||
dsgvo: Record<string, string>
|
||||
/** TOM-Begriffe */
|
||||
tom: Record<string, string>
|
||||
/** Allgemeine Compliance-Begriffe */
|
||||
general: Record<string, string>
|
||||
}
|
||||
|
||||
export const DEFAULT_TERMINOLOGY: TerminologyGuide = {
|
||||
dsgvo: {
|
||||
controller: 'Verantwortlicher',
|
||||
processor: 'Auftragsverarbeiter',
|
||||
data_subject: 'betroffene Person',
|
||||
processing: 'Verarbeitung',
|
||||
personal_data: 'personenbezogene Daten',
|
||||
consent: 'Einwilligung',
|
||||
dpia: 'Datenschutz-Folgenabschaetzung (DSFA)',
|
||||
legitimate_interest: 'berechtigtes Interesse',
|
||||
data_breach: 'Verletzung des Schutzes personenbezogener Daten',
|
||||
dpo: 'Datenschutzbeauftragter (DSB)',
|
||||
supervisory_authority: 'Aufsichtsbehoerde',
|
||||
ropa: 'Verzeichnis von Verarbeitungstaetigkeiten (VVT)',
|
||||
retention_period: 'Aufbewahrungsfrist',
|
||||
erasure: 'Loeschung',
|
||||
restriction: 'Einschraenkung der Verarbeitung',
|
||||
portability: 'Datenportabilitaet',
|
||||
third_country: 'Drittland',
|
||||
adequacy_decision: 'Angemessenheitsbeschluss',
|
||||
scc: 'Standardvertragsklauseln (SCC)',
|
||||
},
|
||||
tom: {
|
||||
access_control: 'Zutrittskontrolle',
|
||||
access_management: 'Zugangskontrolle',
|
||||
authorization: 'Zugriffskontrolle',
|
||||
encryption: 'Verschluesselung',
|
||||
pseudonymization: 'Pseudonymisierung',
|
||||
availability: 'Verfuegbarkeitskontrolle',
|
||||
resilience: 'Belastbarkeit',
|
||||
recoverability: 'Wiederherstellbarkeit',
|
||||
audit_logging: 'Protokollierung',
|
||||
separation: 'Trennungsgebot',
|
||||
input_control: 'Eingabekontrolle',
|
||||
transport_control: 'Weitergabekontrolle',
|
||||
order_control: 'Auftragskontrolle',
|
||||
},
|
||||
general: {
|
||||
risk_assessment: 'Risikobewertung',
|
||||
audit_trail: 'Pruefpfad',
|
||||
compliance_level: 'Compliance-Tiefe',
|
||||
gap_analysis: 'Lueckenanalyse',
|
||||
remediation: 'Massnahmenplan',
|
||||
incident_response: 'Vorfallreaktion',
|
||||
business_continuity: 'Geschaeftskontinuitaet',
|
||||
vendor_management: 'Dienstleistermanagement',
|
||||
awareness_training: 'Sensibilisierungsschulung',
|
||||
},
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Style Contract
|
||||
// ============================================================================
|
||||
|
||||
export interface StyleContract {
|
||||
/** Anrede-Stil */
|
||||
addressing: '3rd_person_company'
|
||||
/** Tonalitaet */
|
||||
tone: 'formal_legal_plain'
|
||||
/** Verbotene Formulierungen */
|
||||
forbid: string[]
|
||||
}
|
||||
|
||||
export const DEFAULT_STYLE_CONTRACT: StyleContract = {
|
||||
addressing: '3rd_person_company',
|
||||
tone: 'formal_legal_plain',
|
||||
forbid: [
|
||||
'Denglisch',
|
||||
'Marketing-Sprache',
|
||||
'Superlative',
|
||||
'Direkte Ansprache',
|
||||
'Umgangssprache',
|
||||
'Konjunktiv-Ketten',
|
||||
],
|
||||
}
|
||||
|
||||
/** Konkrete Regex-Muster fuer verbotene Formulierungen */
|
||||
export const STYLE_VIOLATION_PATTERNS: Array<{ name: string; pattern: RegExp }> = [
|
||||
{ name: 'Direkte Ansprache', pattern: /\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/ },
|
||||
{ name: 'Superlative', pattern: /\b(bestmoeglich|hoechstmoeglich|optimal|perfekt|einzigartig)\b/i },
|
||||
{ name: 'Marketing-Sprache', pattern: /\b(revolutionaer|bahnbrechend|innovativ|fuehrend|erstklassig)\b/i },
|
||||
{ name: 'Umgangssprache', pattern: /\b(super|toll|mega|krass|cool|easy)\b/i },
|
||||
{ name: 'Denglisch', pattern: /\b(State of the Art|Best Practice|Compliance Journey|Data Driven)\b/i },
|
||||
]
|
||||
|
||||
// ============================================================================
|
||||
// Serialization
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Serialisiert den Terminology Guide fuer den LLM-Prompt.
|
||||
* Gibt nur die haeufigsten Begriffe aus (Token-Budget).
|
||||
*/
|
||||
export function terminologyToPromptString(guide: TerminologyGuide = DEFAULT_TERMINOLOGY): string {
|
||||
const keyTerms = [
|
||||
...Object.entries(guide.dsgvo).slice(0, 10),
|
||||
...Object.entries(guide.tom).slice(0, 6),
|
||||
...Object.entries(guide.general).slice(0, 4),
|
||||
]
|
||||
return keyTerms.map(([key, value]) => ` ${key}: "${value}"`).join('\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialisiert den Style Contract fuer den LLM-Prompt.
|
||||
*/
|
||||
export function styleContractToPromptString(style: StyleContract = DEFAULT_STYLE_CONTRACT): string {
|
||||
return [
|
||||
`Anrede: Dritte Person ("Die [Firmenname]...", NICHT "Sie...")`,
|
||||
`Ton: Professionell, juristisch korrekt, aber verstaendlich`,
|
||||
`Verboten: ${style.forbid.join(', ')}`,
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Validation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Prueft einen Text auf Style-Verstoesse.
|
||||
* Gibt eine Liste der gefundenen Verstoesse zurueck.
|
||||
*/
|
||||
export function checkStyleViolations(text: string): string[] {
|
||||
const violations: string[] = []
|
||||
for (const { name, pattern } of STYLE_VIOLATION_PATTERNS) {
|
||||
if (pattern.test(text)) {
|
||||
violations.push(`Style-Verstoss: ${name}`)
|
||||
}
|
||||
}
|
||||
return violations
|
||||
}
|
||||
|
||||
/**
|
||||
* Prueft ob die Terminologie korrekt verwendet wird.
|
||||
* Gibt Warnungen zurueck wenn falsche Begriffe erkannt werden.
|
||||
*/
|
||||
export function checkTerminologyUsage(
|
||||
text: string,
|
||||
guide: TerminologyGuide = DEFAULT_TERMINOLOGY
|
||||
): string[] {
|
||||
const warnings: string[] = []
|
||||
const lower = text.toLowerCase()
|
||||
|
||||
// Prüfe ob englische Begriffe statt deutscher verwendet werden
|
||||
const termChecks: Array<{ wrong: string; correct: string }> = [
|
||||
{ wrong: 'data controller', correct: guide.dsgvo.controller },
|
||||
{ wrong: 'data processor', correct: guide.dsgvo.processor },
|
||||
{ wrong: 'data subject', correct: guide.dsgvo.data_subject },
|
||||
{ wrong: 'personal data', correct: guide.dsgvo.personal_data },
|
||||
{ wrong: 'data breach', correct: guide.dsgvo.data_breach },
|
||||
{ wrong: 'encryption', correct: guide.tom.encryption },
|
||||
{ wrong: 'pseudonymization', correct: guide.tom.pseudonymization },
|
||||
{ wrong: 'risk assessment', correct: guide.general.risk_assessment },
|
||||
]
|
||||
|
||||
for (const { wrong, correct } of termChecks) {
|
||||
if (lower.includes(wrong.toLowerCase())) {
|
||||
warnings.push(`Englischer Begriff "${wrong}" gefunden — verwende "${correct}"`)
|
||||
}
|
||||
}
|
||||
|
||||
return warnings
|
||||
}
|
||||
Reference in New Issue
Block a user