feat(sdk,iace): add Personalized Drafting Pipeline v2 and IACE engine

Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 22:27:06 +01:00
parent 3efa391de5
commit 06711bad1c
20 changed files with 10588 additions and 261 deletions
@@ -0,0 +1,485 @@
+/**
+ * Prose Validator + Repair Loop — Governance Layer
+ *
+ * Validiert LLM-generierte Prosa-Bloecke gegen das Regelwerk.
+ * Orchestriert den Repair-Loop (max 2 Versuche) mit Fallback.
+ *
+ * 12 Pruefregeln, davon 10 reparierbar und 2 Hard Aborts.
+ */
+
+import type { NarrativeTags } from './narrative-tags'
+import { getAllAllowedTagValues } from './narrative-tags'
+import type { AllowedFacts } from './allowed-facts'
+import { checkForDisallowedContent } from './allowed-facts'
+import { checkStyleViolations, checkTerminologyUsage } from './terminology'
+import type { SanitizedFacts } from './sanitizer'
+import { isSanitized } from './sanitizer'
+
+// ============================================================================
+// Types
+// ============================================================================
+
+/** Strukturierter LLM-Output (Pflicht-Format) */
+export interface ProseBlockOutput {
+  blockId: string
+  blockType: 'introduction' | 'transition' | 'conclusion' | 'appreciation'
+  language: 'de'
+  text: string
+
+  assertions: {
+    companyNameUsed: boolean
+    industryReferenced: boolean
+    structureReferenced: boolean
+    itLandscapeReferenced: boolean
+    narrativeTagsUsed: string[]
+  }
+
+  forbiddenContentDetected: string[]
+}
+
+/** Einzelner Validierungsfehler */
+export interface ProseValidationError {
+  rule: string
+  severity: 'error' | 'warning'
+  message: string
+  repairable: boolean
+}
+
+/** Validierungsergebnis */
+export interface ProseValidatorResult {
+  valid: boolean
+  errors: ProseValidationError[]
+  repairable: boolean
+}
+
+/** Repair-Loop Audit */
+export interface RepairAudit {
+  repairAttempts: number
+  validatorFailures: string[][]
+  repairSuccessful: boolean
+  fallbackUsed: boolean
+  fallbackReason?: string
+}
+
+/** Word count limits per block type */
+const WORD_COUNT_LIMITS: Record<ProseBlockOutput['blockType'], { min: number; max: number }> = {
+  introduction: { min: 30, max: 200 },
+  transition: { min: 10, max: 80 },
+  conclusion: { min: 20, max: 150 },
+  appreciation: { min: 15, max: 100 },
+}
+
+// ============================================================================
+// Prose Validator
+// ============================================================================
+
+/**
+ * Validiert einen ProseBlockOutput gegen alle 12 Regeln.
+ */
+export function validateProseBlock(
+  block: ProseBlockOutput,
+  facts: AllowedFacts | SanitizedFacts,
+  expectedTags: NarrativeTags
+): ProseValidatorResult {
+  const errors: ProseValidationError[] = []
+
+  // Rule 1: JSON_VALID — wird extern geprueft (Parsing vor Aufruf)
+  // Wenn wir hier sind, ist JSON bereits valide
+
+  // Rule 2: COMPANY_NAME_PRESENT
+  if (!block.text.includes(facts.companyName) && facts.companyName !== 'Unbekannt') {
+    errors.push({
+      rule: 'COMPANY_NAME_PRESENT',
+      severity: 'error',
+      message: `Firmenname "${facts.companyName}" nicht im Text gefunden`,
+      repairable: true,
+    })
+  }
+
+  // Rule 3: INDUSTRY_REFERENCED
+  if (facts.industry && !block.text.toLowerCase().includes(facts.industry.toLowerCase())) {
+    errors.push({
+      rule: 'INDUSTRY_REFERENCED',
+      severity: 'warning',
+      message: `Branche "${facts.industry}" nicht im Text referenziert`,
+      repairable: true,
+    })
+  }
+
+  // Rule 4: NO_NUMERIC_SCORES
+  if (/\d+\s*%/.test(block.text)) {
+    errors.push({
+      rule: 'NO_NUMERIC_SCORES',
+      severity: 'error',
+      message: 'Prozentwerte im Text gefunden',
+      repairable: true,
+    })
+  }
+  if (/score[:\s]*\d+/i.test(block.text)) {
+    errors.push({
+      rule: 'NO_NUMERIC_SCORES',
+      severity: 'error',
+      message: 'Score-Werte im Text gefunden',
+      repairable: true,
+    })
+  }
+  if (/\b(L1|L2|L3|L4)\b/.test(block.text)) {
+    errors.push({
+      rule: 'NO_NUMERIC_SCORES',
+      severity: 'error',
+      message: 'Compliance-Level-Bezeichnungen (L1-L4) im Text gefunden',
+      repairable: true,
+    })
+  }
+
+  // Rule 5: NO_DISALLOWED_TOPICS
+  const disallowedViolations = checkForDisallowedContent(block.text)
+  for (const violation of disallowedViolations) {
+    errors.push({
+      rule: 'NO_DISALLOWED_TOPICS',
+      severity: 'error',
+      message: violation,
+      repairable: true,
+    })
+  }
+
+  // Rule 6: WORD_COUNT_IN_RANGE
+  const wordCount = block.text.split(/\s+/).filter(Boolean).length
+  const limits = WORD_COUNT_LIMITS[block.blockType]
+  if (limits) {
+    if (wordCount < limits.min) {
+      errors.push({
+        rule: 'WORD_COUNT_IN_RANGE',
+        severity: 'warning',
+        message: `Wortanzahl ${wordCount} unter Minimum ${limits.min} fuer ${block.blockType}`,
+        repairable: true,
+      })
+    }
+    if (wordCount > limits.max) {
+      errors.push({
+        rule: 'WORD_COUNT_IN_RANGE',
+        severity: 'error',
+        message: `Wortanzahl ${wordCount} ueber Maximum ${limits.max} fuer ${block.blockType}`,
+        repairable: true,
+      })
+    }
+  }
+
+  // Rule 7: NO_DIRECT_ADDRESS
+  if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(block.text)) {
+    errors.push({
+      rule: 'NO_DIRECT_ADDRESS',
+      severity: 'error',
+      message: 'Direkte Ansprache (Sie/Ihr) gefunden',
+      repairable: true,
+    })
+  }
+
+  // Rule 8: NARRATIVE_TAGS_CONSISTENT
+  const allowedTags = getAllAllowedTagValues()
+  if (block.assertions.narrativeTagsUsed) {
+    for (const tag of block.assertions.narrativeTagsUsed) {
+      if (!allowedTags.includes(tag)) {
+        errors.push({
+          rule: 'NARRATIVE_TAGS_CONSISTENT',
+          severity: 'error',
+          message: `Unbekannter Narrative Tag "${tag}" in assertions`,
+          repairable: true,
+        })
+      }
+    }
+  }
+  // Pruefen ob Text Tags enthaelt die nicht zu den erwarteten gehoeren
+  const expectedTagValues = Object.values(expectedTags)
+  const allTagValues = getAllAllowedTagValues()
+  for (const tagValue of allTagValues) {
+    if (block.text.includes(tagValue) && !expectedTagValues.includes(tagValue)) {
+      errors.push({
+        rule: 'NARRATIVE_TAGS_CONSISTENT',
+        severity: 'error',
+        message: `Tag "${tagValue}" im Text, aber nicht im erwarteten Tag-Set`,
+        repairable: true,
+      })
+    }
+  }
+
+  // Rule 9: TERMINOLOGY_CORRECT
+  const termViolations = checkTerminologyUsage(block.text)
+  for (const warning of termViolations) {
+    errors.push({
+      rule: 'TERMINOLOGY_CORRECT',
+      severity: 'warning',
+      message: warning,
+      repairable: true,
+    })
+  }
+
+  // Rule 10: Style violations
+  const styleViolations = checkStyleViolations(block.text)
+  for (const violation of styleViolations) {
+    errors.push({
+      rule: 'STYLE_VIOLATION',
+      severity: 'warning',
+      message: violation,
+      repairable: true,
+    })
+  }
+
+  // Rule 11: SANITIZATION_PASSED (Hard Abort)
+  if ('__sanitized' in facts && !isSanitized(facts)) {
+    errors.push({
+      rule: 'SANITIZATION_PASSED',
+      severity: 'error',
+      message: 'Sanitization-Flag gesetzt aber nicht valide',
+      repairable: false,
+    })
+  }
+
+  // Rule 12: Self-reported forbidden content
+  if (block.forbiddenContentDetected && block.forbiddenContentDetected.length > 0) {
+    errors.push({
+      rule: 'SELF_REPORTED_FORBIDDEN',
+      severity: 'error',
+      message: `LLM meldet verbotene Inhalte: ${block.forbiddenContentDetected.join(', ')}`,
+      repairable: true,
+    })
+  }
+
+  const hasHardAbort = errors.some(e => !e.repairable)
+  const hasErrors = errors.some(e => e.severity === 'error')
+
+  return {
+    valid: !hasErrors,
+    errors,
+    repairable: hasErrors && !hasHardAbort,
+  }
+}
+
+// ============================================================================
+// JSON Parsing
+// ============================================================================
+
+/**
+ * Parst und validiert LLM-Output als ProseBlockOutput.
+ * Gibt null zurueck wenn JSON nicht parsebar ist.
+ */
+export function parseProseBlockOutput(rawContent: string): ProseBlockOutput | null {
+  try {
+    const parsed = JSON.parse(rawContent)
+
+    // Pflichtfelder pruefen
+    if (
+      typeof parsed.blockId !== 'string' ||
+      typeof parsed.text !== 'string' ||
+      !['introduction', 'transition', 'conclusion', 'appreciation'].includes(parsed.blockType)
+    ) {
+      return null
+    }
+
+    return {
+      blockId: parsed.blockId,
+      blockType: parsed.blockType,
+      language: parsed.language || 'de',
+      text: parsed.text,
+      assertions: {
+        companyNameUsed: parsed.assertions?.companyNameUsed ?? false,
+        industryReferenced: parsed.assertions?.industryReferenced ?? false,
+        structureReferenced: parsed.assertions?.structureReferenced ?? false,
+        itLandscapeReferenced: parsed.assertions?.itLandscapeReferenced ?? false,
+        narrativeTagsUsed: parsed.assertions?.narrativeTagsUsed ?? [],
+      },
+      forbiddenContentDetected: parsed.forbiddenContentDetected ?? [],
+    }
+  } catch {
+    return null
+  }
+}
+
+// ============================================================================
+// Repair Prompt Builder
+// ============================================================================
+
+/**
+ * Baut den Repair-Prompt fuer einen fehlgeschlagenen Block.
+ */
+export function buildRepairPrompt(
+  originalBlock: ProseBlockOutput,
+  validationErrors: ProseValidationError[]
+): string {
+  const errorList = validationErrors
+    .filter(e => e.severity === 'error')
+    .map(e => `- ${e.rule}: ${e.message}`)
+    .join('\n')
+
+  return `Der vorherige Text enthielt Fehler. Ueberarbeite ihn unter Beibehaltung der Aussage.
+
+FEHLER:
+${errorList}
+
+REGELN:
+- Entferne alle unerlaubten Inhalte
+- Behalte den Firmenkontext bei
+- Erzeuge ausschliesslich JSON im vorgegebenen Format
+- Aendere KEINE Fakten, ergaenze KEINE neuen Informationen
+- Verwende KEINE direkte Ansprache (Sie/Ihr)
+- Verwende KEINE konkreten Prozentwerte oder Scores
+
+ORIGINALTEXT:
+${JSON.stringify(originalBlock, null, 2)}`
+}
+
+// ============================================================================
+// Fallback Templates
+// ============================================================================
+
+const FALLBACK_TEMPLATES: Record<ProseBlockOutput['blockType'], string> = {
+  introduction: 'Die {{companyName}} dokumentiert im Folgenden die {{documentType}}-relevanten Massnahmen und Bewertungen. Die nachstehenden Ausfuehrungen basieren auf der aktuellen Analyse der organisatorischen und technischen Gegebenheiten.',
+  transition: 'Auf Grundlage der vorstehenden Daten ergeben sich die folgenden Detailbewertungen.',
+  conclusion: 'Die {{companyName}} verfuegt ueber die dokumentierten Massnahmen und Strukturen. Die Einhaltung der regulatorischen Anforderungen wird fortlaufend ueberprueft und angepasst.',
+  appreciation: 'Die bestehende Organisationsstruktur der {{companyName}} bildet eine {{maturity}} Grundlage fuer die nachfolgend dokumentierten Massnahmen.',
+}
+
+/**
+ * Erzeugt einen Fallback-Block wenn der Repair-Loop fehlschlaegt.
+ */
+export function buildFallbackBlock(
+  blockId: string,
+  blockType: ProseBlockOutput['blockType'],
+  facts: AllowedFacts,
+  documentType?: string
+): ProseBlockOutput {
+  let text = FALLBACK_TEMPLATES[blockType]
+    .replace(/\{\{companyName\}\}/g, facts.companyName)
+    .replace(/\{\{maturity\}\}/g, facts.narrativeTags.maturity)
+    .replace(/\{\{documentType\}\}/g, documentType || 'Compliance')
+
+  return {
+    blockId,
+    blockType,
+    language: 'de',
+    text,
+    assertions: {
+      companyNameUsed: true,
+      industryReferenced: false,
+      structureReferenced: false,
+      itLandscapeReferenced: false,
+      narrativeTagsUsed: blockType === 'appreciation' ? ['maturity'] : [],
+    },
+    forbiddenContentDetected: [],
+  }
+}
+
+// ============================================================================
+// Repair Loop Orchestrator
+// ============================================================================
+
+/** Callback fuer LLM-Aufruf (wird von der Route injiziert) */
+export type LLMCallFn = (prompt: string) => Promise<string>
+
+/**
+ * Orchestriert den Repair-Loop fuer einen einzelnen Prosa-Block.
+ *
+ * 1. Parse + Validate
+ * 2. Bei Fehler: Repair-Prompt → LLM → Parse + Validate (max 2x)
+ * 3. Bei weiterem Fehler: Fallback Template
+ *
+ * @returns Validierter ProseBlockOutput + RepairAudit
+ */
+export async function executeRepairLoop(
+  rawLLMOutput: string,
+  facts: AllowedFacts | SanitizedFacts,
+  expectedTags: NarrativeTags,
+  blockId: string,
+  blockType: ProseBlockOutput['blockType'],
+  llmCall: LLMCallFn,
+  documentType?: string,
+  maxRepairAttempts = 2
+): Promise<{ block: ProseBlockOutput; audit: RepairAudit }> {
+  const audit: RepairAudit = {
+    repairAttempts: 0,
+    validatorFailures: [],
+    repairSuccessful: false,
+    fallbackUsed: false,
+  }
+
+  // Versuch 0: Original-Output parsen + validieren
+  let parsed = parseProseBlockOutput(rawLLMOutput)
+
+  if (!parsed) {
+    // JSON invalid → Regeneration zaehlt als Repair-Versuch
+    audit.validatorFailures.push(['JSON_VALID: LLM-Output konnte nicht als JSON geparst werden'])
+    audit.repairAttempts++
+
+    if (audit.repairAttempts <= maxRepairAttempts) {
+      const repairPrompt = `Der vorherige Output war kein valides JSON. Erzeuge ausschliesslich ein JSON-Objekt mit den Feldern: blockId, blockType, language, text, assertions, forbiddenContentDetected.\n\nOriginal-Output:\n${rawLLMOutput.slice(0, 500)}`
+      try {
+        const repaired = await llmCall(repairPrompt)
+        parsed = parseProseBlockOutput(repaired)
+      } catch {
+        // LLM-Fehler → weiter zum Fallback
+      }
+    }
+  }
+
+  if (!parsed) {
+    audit.fallbackUsed = true
+    audit.fallbackReason = 'JSON konnte nach Repair nicht geparst werden'
+    return {
+      block: buildFallbackBlock(blockId, blockType, facts, documentType),
+      audit,
+    }
+  }
+
+  // Validierungs-Schleife
+  for (let attempt = audit.repairAttempts; attempt <= maxRepairAttempts; attempt++) {
+    const result = validateProseBlock(parsed, facts, expectedTags)
+
+    if (result.valid) {
+      audit.repairSuccessful = attempt === 0 ? true : true
+      return { block: parsed, audit }
+    }
+
+    // Hard Abort? → Fallback sofort
+    if (!result.repairable) {
+      audit.fallbackUsed = true
+      audit.fallbackReason = `Hard Abort: ${result.errors.filter(e => !e.repairable).map(e => e.rule).join(', ')}`
+      audit.validatorFailures.push(result.errors.map(e => `${e.rule}: ${e.message}`))
+      return {
+        block: buildFallbackBlock(blockId, blockType, facts, documentType),
+        audit,
+      }
+    }
+
+    // Fehler protokollieren
+    audit.validatorFailures.push(result.errors.map(e => `${e.rule}: ${e.message}`))
+
+    // Noch Repair-Versuche uebrig?
+    if (attempt >= maxRepairAttempts) {
+      break
+    }
+
+    // Repair-Prompt senden
+    audit.repairAttempts++
+    try {
+      const repairPrompt = buildRepairPrompt(parsed, result.errors)
+      const repairedOutput = await llmCall(repairPrompt)
+      const repairedParsed = parseProseBlockOutput(repairedOutput)
+      if (!repairedParsed) {
+        // Parsing fehlgeschlagen nach Repair
+        continue
+      }
+      parsed = repairedParsed
+    } catch {
+      // LLM-Fehler → naechster Versuch oder Fallback
+      continue
+    }
+  }
+
+  // Alle Versuche erschoepft → Fallback
+  audit.fallbackUsed = true
+  audit.fallbackReason = `${maxRepairAttempts} Repair-Versuche erschoepft`
+  return {
+    block: buildFallbackBlock(blockId, blockType, facts, documentType),
+    audit,
+  }
+}