feat(sdk,iace): add Personalized Drafting Pipeline v2 and IACE engine

Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 22:27:06 +01:00
parent 3efa391de5
commit 06711bad1c
20 changed files with 10588 additions and 261 deletions
@@ -0,0 +1,85 @@
+/**
+ * Allowed Facts v2 Adapter — Baut AllowedFacts aus DraftContext
+ *
+ * Die Haupt-AllowedFacts Datei (allowed-facts.ts) erwartet SDKState,
+ * aber in der Draft API Route haben wir nur DraftContext.
+ * Dieser Adapter ueberbrueckt die Luecke.
+ *
+ * Re-exportiert auch die Serialisierungs-/Validierungsfunktionen.
+ */
+
+import type { AllowedFacts, FactPolicy } from './allowed-facts'
+import {
+  DEFAULT_FACT_POLICY,
+  allowedFactsToPromptString,
+  disallowedTopicsToPromptString,
+  checkForDisallowedContent,
+} from './allowed-facts'
+import type { NarrativeTags } from './narrative-tags'
+import type { DraftContext } from './types'
+
+// Re-exports
+export { allowedFactsToPromptString, disallowedTopicsToPromptString, checkForDisallowedContent }
+
+/**
+ * Baut AllowedFacts aus einem DraftContext (API Route Kontext).
+ * Der DraftContext hat bereits projizierte Firmendaten.
+ */
+export function buildAllowedFactsFromDraftContext(
+  context: DraftContext,
+  narrativeTags: NarrativeTags
+): AllowedFacts {
+  const profile = context.companyProfile
+
+  return {
+    companyName: profile.name || 'Unbekannt',
+    legalForm: '',  // Nicht im DraftContext enthalten
+    industry: profile.industry || '',
+    location: '',   // Nicht im DraftContext enthalten
+    employeeCount: profile.employeeCount || 0,
+
+    teamStructure: deriveTeamStructure(profile.employeeCount),
+    itLandscape: deriveItLandscape(profile.businessModel, profile.isPublicSector),
+    specialFeatures: deriveSpecialFeatures(profile),
+
+    triggeredRegulations: deriveRegulations(context),
+    primaryUseCases: [],  // Nicht im DraftContext enthalten
+
+    narrativeTags,
+  }
+}
+
+// ============================================================================
+// Private Helpers
+// ============================================================================
+
+function deriveTeamStructure(employeeCount: number): string {
+  if (employeeCount > 500) return 'Konzernstruktur'
+  if (employeeCount > 50) return 'mittelstaendisch'
+  return 'Kleinunternehmen'
+}
+
+function deriveItLandscape(businessModel: string, isPublicSector: boolean): string {
+  if (businessModel?.includes('SaaS') || businessModel?.includes('Cloud')) return 'Cloud-First'
+  if (isPublicSector) return 'On-Premise'
+  return 'Hybrid'
+}
+
+function deriveSpecialFeatures(profile: DraftContext['companyProfile']): string[] {
+  const features: string[] = []
+  if (profile.isPublicSector) features.push('Oeffentlicher Sektor')
+  if (profile.employeeCount > 250) features.push('Grossunternehmen')
+  if (profile.dataProtectionOfficer) features.push('Interner DSB benannt')
+  return features
+}
+
+function deriveRegulations(context: DraftContext): string[] {
+  const regs = new Set<string>(['DSGVO'])
+  const triggers = context.decisions.hardTriggers || []
+  for (const t of triggers) {
+    if (t.id.includes('ai_act') || t.id.includes('ai-act')) regs.add('AI Act')
+    if (t.id.includes('nis2') || t.id.includes('NIS2')) regs.add('NIS2')
+    if (t.id.includes('ttdsg') || t.id.includes('TTDSG')) regs.add('TTDSG')
+  }
+  return Array.from(regs)
+}
@@ -0,0 +1,257 @@
+/**
+ * Allowed Facts Governance — Kontrolliertes Faktenbudget fuer LLM
+ *
+ * Definiert welche Fakten das LLM in Prosa-Bloecken verwenden darf
+ * und welche Themen explizit verboten sind.
+ *
+ * Verhindert Halluzinationen durch explizite Whitelisting.
+ */
+
+import type { SDKState, CompanyProfile } from '../types'
+import type { NarrativeTags } from './narrative-tags'
+
+// ============================================================================
+// Types
+// ============================================================================
+
+/** Explizites Faktenbudget fuer das LLM */
+export interface AllowedFacts {
+  // Firmenprofil
+  companyName: string
+  legalForm: string
+  industry: string
+  location: string
+  employeeCount: number
+
+  // Organisation
+  teamStructure: string
+  itLandscape: string
+  specialFeatures: string[]
+
+  // Compliance-Kontext
+  triggeredRegulations: string[]
+  primaryUseCases: string[]
+
+  // Narrative Tags (deterministisch)
+  narrativeTags: NarrativeTags
+}
+
+/** Regeln welche Themen erlaubt/verboten sind */
+export interface FactPolicy {
+  allowedTopics: string[]
+  disallowedTopics: string[]
+}
+
+// ============================================================================
+// Default Policy
+// ============================================================================
+
+export const DEFAULT_FACT_POLICY: FactPolicy = {
+  allowedTopics: [
+    'Branche',
+    'Unternehmensgroesse',
+    'Teamstruktur',
+    'IT-Strategie',
+    'Regulatorischer Kontext',
+    'Anwendungsfaelle',
+    'Organisationsform',
+    'Standort',
+    'Rechtsform',
+  ],
+  disallowedTopics: [
+    'Umsatz',
+    'Gewinn',
+    'Kundenzahlen',
+    'konkrete Zertifizierungen',
+    'interne Tool-Namen',
+    'Personennamen',
+    'E-Mail-Adressen',
+    'Telefonnummern',
+    'IP-Adressen',
+    'konkrete Prozentwerte',
+    'konkrete Scores',
+    'Compliance-Level-Bezeichnungen',
+    'interne Projektnamen',
+    'Passwoerter',
+    'API-Keys',
+    'Vertragsinhalte',
+    'Gehaltsinformationen',
+  ],
+}
+
+// ============================================================================
+// Builder
+// ============================================================================
+
+/**
+ * Extrahiert AllowedFacts aus dem SDKState.
+ * Nur explizit freigegebene Felder werden uebernommen.
+ */
+export function buildAllowedFacts(
+  state: SDKState,
+  narrativeTags: NarrativeTags
+): AllowedFacts {
+  const profile = state.companyProfile
+  const scope = state.complianceScope
+
+  return {
+    companyName: profile?.name ?? 'Unbekannt',
+    legalForm: profile?.legalForm ?? '',
+    industry: profile?.industry ?? '',
+    location: profile?.location ?? '',
+    employeeCount: profile?.employeeCount ?? 0,
+
+    teamStructure: deriveTeamStructure(profile),
+    itLandscape: deriveItLandscape(profile),
+    specialFeatures: deriveSpecialFeatures(profile),
+
+    triggeredRegulations: deriveTriggeredRegulations(scope),
+    primaryUseCases: derivePrimaryUseCases(state),
+
+    narrativeTags,
+  }
+}
+
+// ============================================================================
+// Serialization
+// ============================================================================
+
+/**
+ * Serialisiert AllowedFacts fuer den LLM-Prompt.
+ */
+export function allowedFactsToPromptString(facts: AllowedFacts): string {
+  const lines = [
+    `- Firma: ${facts.companyName}${facts.legalForm ? ` (${facts.legalForm})` : ''}`,
+    `- Branche: ${facts.industry || 'nicht angegeben'}`,
+    `- Standort: ${facts.location || 'nicht angegeben'}`,
+    `- Mitarbeiter: ${facts.employeeCount || 'nicht angegeben'}`,
+    `- Teamstruktur: ${facts.teamStructure || 'nicht angegeben'}`,
+    `- IT-Umgebung: ${facts.itLandscape || 'nicht angegeben'}`,
+  ]
+
+  if (facts.triggeredRegulations.length > 0) {
+    lines.push(`- Relevante Regulierungen: ${facts.triggeredRegulations.join(', ')}`)
+  }
+  if (facts.primaryUseCases.length > 0) {
+    lines.push(`- Anwendungsfaelle: ${facts.primaryUseCases.join(', ')}`)
+  }
+  if (facts.specialFeatures.length > 0) {
+    lines.push(`- Besonderheiten: ${facts.specialFeatures.join(', ')}`)
+  }
+
+  return lines.join('\n')
+}
+
+/**
+ * Serialisiert die Disallowed Topics fuer den LLM-Prompt.
+ */
+export function disallowedTopicsToPromptString(policy: FactPolicy = DEFAULT_FACT_POLICY): string {
+  return policy.disallowedTopics.map(t => `- ${t}`).join('\n')
+}
+
+// ============================================================================
+// Validation
+// ============================================================================
+
+/**
+ * Prueft ob ein Text potentiell verbotene Themen enthaelt.
+ * Gibt eine Liste der erkannten Verstoesse zurueck.
+ */
+export function checkForDisallowedContent(
+  text: string,
+  policy: FactPolicy = DEFAULT_FACT_POLICY
+): string[] {
+  const violations: string[] = []
+  const lower = text.toLowerCase()
+
+  // Prozentwerte
+  if (/\d+\s*%/.test(text)) {
+    violations.push('Konkrete Prozentwerte gefunden')
+  }
+
+  // Score-Muster
+  if (/score[:\s]*\d+/i.test(text)) {
+    violations.push('Konkrete Scores gefunden')
+  }
+
+  // Compliance-Level Bezeichnungen
+  if (/\b(L1|L2|L3|L4)\b/.test(text)) {
+    violations.push('Compliance-Level-Bezeichnungen (L1-L4) gefunden')
+  }
+
+  // E-Mail-Adressen
+  if (/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/.test(text)) {
+    violations.push('E-Mail-Adresse gefunden')
+  }
+
+  // Telefonnummern
+  if (/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/.test(text)) {
+    // Nur wenn es nicht die Mitarbeiterzahl ist (einstellig/zweistellig)
+    const matches = text.match(/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g) || []
+    for (const m of matches) {
+      if (m.replace(/\D/g, '').length >= 6) {
+        violations.push('Telefonnummer gefunden')
+        break
+      }
+    }
+  }
+
+  // IP-Adressen
+  if (/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/.test(text)) {
+    violations.push('IP-Adresse gefunden')
+  }
+
+  // Direkte Ansprache
+  if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(text)) {
+    violations.push('Direkte Ansprache (Sie/Ihr) gefunden')
+  }
+
+  return violations
+}
+
+// ============================================================================
+// Private Helpers
+// ============================================================================
+
+function deriveTeamStructure(profile: CompanyProfile | null): string {
+  if (!profile) return ''
+  // Ableitung aus verfuegbaren Profildaten
+  if (profile.employeeCount > 500) return 'Konzernstruktur'
+  if (profile.employeeCount > 50) return 'mittelstaendisch'
+  return 'Kleinunternehmen'
+}
+
+function deriveItLandscape(profile: CompanyProfile | null): string {
+  if (!profile) return ''
+  return profile.businessModel?.includes('SaaS') ? 'Cloud-First' :
+         profile.businessModel?.includes('Cloud') ? 'Cloud-First' :
+         profile.isPublicSector ? 'On-Premise' : 'Hybrid'
+}
+
+function deriveSpecialFeatures(profile: CompanyProfile | null): string[] {
+  if (!profile) return []
+  const features: string[] = []
+  if (profile.isPublicSector) features.push('Oeffentlicher Sektor')
+  if (profile.employeeCount > 250) features.push('Grossunternehmen')
+  if (profile.dataProtectionOfficer) features.push('Interner DSB benannt')
+  return features
+}
+
+function deriveTriggeredRegulations(
+  scope: import('../compliance-scope-types').ComplianceScopeState | null
+): string[] {
+  if (!scope?.decision) return ['DSGVO']
+  const regs = new Set<string>(['DSGVO'])
+  const triggers = scope.decision.triggeredHardTriggers || []
+  for (const t of triggers) {
+    if (t.rule.id.includes('ai_act') || t.rule.id.includes('ai-act')) regs.add('AI Act')
+    if (t.rule.id.includes('nis2') || t.rule.id.includes('NIS2')) regs.add('NIS2')
+    if (t.rule.id.includes('ttdsg') || t.rule.id.includes('TTDSG')) regs.add('TTDSG')
+  }
+  return Array.from(regs)
+}
+
+function derivePrimaryUseCases(state: SDKState): string[] {
+  if (!state.useCases || state.useCases.length === 0) return []
+  return state.useCases.slice(0, 3).map(uc => uc.name || uc.title || 'Unbenannt')
+}
@@ -0,0 +1,303 @@
+/**
+ * Cache Manager — Hash-basierte Prose-Block-Cache
+ *
+ * Deterministischer Cache fuer LLM-generierte Prosa-Bloecke.
+ * Kein TTL-basiertes Raten — stattdessen Hash-basierte Invalidierung.
+ *
+ * Cache-Key = SHA-256 ueber alle Eingabeparameter.
+ * Aendert sich ein Eingabewert → neuer Hash → Cache-Miss → Neu-Generierung.
+ */
+
+import type { AllowedFacts } from './allowed-facts'
+import type { NarrativeTags } from './narrative-tags'
+import type { ProseBlockOutput } from './prose-validator'
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export interface CacheEntry {
+  block: ProseBlockOutput
+  createdAt: string
+  hitCount: number
+  cacheKey: string
+}
+
+export interface CacheKeyParams {
+  allowedFacts: AllowedFacts
+  templateVersion: string
+  terminologyVersion: string
+  narrativeTags: NarrativeTags
+  promptHash: string
+  blockType: string
+  sectionName: string
+}
+
+export interface CacheStats {
+  totalEntries: number
+  totalHits: number
+  totalMisses: number
+  hitRate: number
+  oldestEntry: string | null
+  newestEntry: string | null
+}
+
+// ============================================================================
+// SHA-256 (Browser-kompatibel via SubtleCrypto)
+// ============================================================================
+
+/**
+ * Berechnet SHA-256 Hash eines Strings.
+ * Nutzt SubtleCrypto (verfuegbar in Node.js 15+ und allen modernen Browsern).
+ */
+async function sha256(input: string): Promise<string> {
+  // In Next.js API Routes laeuft Node.js — nutze crypto
+  if (typeof globalThis.crypto?.subtle !== 'undefined') {
+    const encoder = new TextEncoder()
+    const data = encoder.encode(input)
+    const hashBuffer = await globalThis.crypto.subtle.digest('SHA-256', data)
+    const hashArray = Array.from(new Uint8Array(hashBuffer))
+    return hashArray.map(b => b.toString(16).padStart(2, '0')).join('')
+  }
+
+  // Fallback: Node.js crypto
+  try {
+    const { createHash } = await import('crypto')
+    return createHash('sha256').update(input).digest('hex')
+  } catch {
+    // Letzer Fallback: Einfacher Hash (nicht kryptographisch)
+    return simpleHash(input)
+  }
+}
+
+/**
+ * Synchrone SHA-256 Berechnung (Node.js only).
+ */
+function sha256Sync(input: string): string {
+  try {
+    // eslint-disable-next-line @typescript-eslint/no-require-imports
+    const crypto = require('crypto')
+    return crypto.createHash('sha256').update(input).digest('hex')
+  } catch {
+    return simpleHash(input)
+  }
+}
+
+/**
+ * Einfacher nicht-kryptographischer Hash als Fallback.
+ */
+function simpleHash(input: string): string {
+  let hash = 0
+  for (let i = 0; i < input.length; i++) {
+    const char = input.charCodeAt(i)
+    hash = ((hash << 5) - hash) + char
+    hash = hash & hash
+  }
+  return Math.abs(hash).toString(16).padStart(16, '0')
+}
+
+// ============================================================================
+// Cache Key Computation
+// ============================================================================
+
+/**
+ * Berechnet den deterministischen Cache-Key.
+ * Sortiert Keys um konsistente Serialisierung zu gewaehrleisten.
+ */
+export async function computeCacheKey(params: CacheKeyParams): Promise<string> {
+  const payload = JSON.stringify(params, Object.keys(params).sort())
+  return sha256(payload)
+}
+
+/**
+ * Synchrone Variante fuer Cache-Key (Node.js).
+ */
+export function computeCacheKeySync(params: CacheKeyParams): string {
+  const payload = JSON.stringify(params, Object.keys(params).sort())
+  return sha256Sync(payload)
+}
+
+// ============================================================================
+// In-Memory Cache
+// ============================================================================
+
+/**
+ * In-Memory Cache fuer Prose-Bloecke.
+ *
+ * Sicherheitsmechanismen:
+ * - Max Eintraege (Speicher-Limit)
+ * - TTL als zusaetzlicher Sicherheitsmechanismus (24h default)
+ * - LRU-artige Bereinigung bei Overflow
+ */
+export class ProseCacheManager {
+  private cache = new Map<string, CacheEntry>()
+  private hits = 0
+  private misses = 0
+  private readonly maxEntries: number
+  private readonly ttlMs: number
+
+  constructor(options?: { maxEntries?: number; ttlHours?: number }) {
+    this.maxEntries = options?.maxEntries ?? 500
+    this.ttlMs = (options?.ttlHours ?? 24) * 60 * 60 * 1000
+  }
+
+  /**
+   * Sucht einen gecachten Block.
+   */
+  async get(params: CacheKeyParams): Promise<ProseBlockOutput | null> {
+    const key = await computeCacheKey(params)
+    return this.getByKey(key)
+  }
+
+  /**
+   * Sucht synchron (Node.js).
+   */
+  getSync(params: CacheKeyParams): ProseBlockOutput | null {
+    const key = computeCacheKeySync(params)
+    return this.getByKey(key)
+  }
+
+  /**
+   * Speichert einen Block im Cache.
+   */
+  async set(params: CacheKeyParams, block: ProseBlockOutput): Promise<void> {
+    const key = await computeCacheKey(params)
+    this.setByKey(key, block)
+  }
+
+  /**
+   * Speichert synchron (Node.js).
+   */
+  setSync(params: CacheKeyParams, block: ProseBlockOutput): void {
+    const key = computeCacheKeySync(params)
+    this.setByKey(key, block)
+  }
+
+  /**
+   * Gibt Cache-Statistiken zurueck.
+   */
+  getStats(): CacheStats {
+    const entries = Array.from(this.cache.values())
+    const total = this.hits + this.misses
+
+    return {
+      totalEntries: this.cache.size,
+      totalHits: this.hits,
+      totalMisses: this.misses,
+      hitRate: total > 0 ? this.hits / total : 0,
+      oldestEntry: entries.length > 0
+        ? entries.reduce((a, b) => a.createdAt < b.createdAt ? a : b).createdAt
+        : null,
+      newestEntry: entries.length > 0
+        ? entries.reduce((a, b) => a.createdAt > b.createdAt ? a : b).createdAt
+        : null,
+    }
+  }
+
+  /**
+   * Loescht alle Eintraege.
+   */
+  clear(): void {
+    this.cache.clear()
+    this.hits = 0
+    this.misses = 0
+  }
+
+  /**
+   * Loescht abgelaufene Eintraege.
+   */
+  cleanup(): number {
+    const now = Date.now()
+    let removed = 0
+    for (const [key, entry] of this.cache.entries()) {
+      if (now - new Date(entry.createdAt).getTime() > this.ttlMs) {
+        this.cache.delete(key)
+        removed++
+      }
+    }
+    return removed
+  }
+
+  // ========================================================================
+  // Private
+  // ========================================================================
+
+  private getByKey(key: string): ProseBlockOutput | null {
+    const entry = this.cache.get(key)
+
+    if (!entry) {
+      this.misses++
+      return null
+    }
+
+    // TTL pruefen
+    if (Date.now() - new Date(entry.createdAt).getTime() > this.ttlMs) {
+      this.cache.delete(key)
+      this.misses++
+      return null
+    }
+
+    entry.hitCount++
+    this.hits++
+    return entry.block
+  }
+
+  private setByKey(key: string, block: ProseBlockOutput): void {
+    // Bei Overflow: aeltesten Eintrag entfernen
+    if (this.cache.size >= this.maxEntries) {
+      this.evictOldest()
+    }
+
+    this.cache.set(key, {
+      block,
+      createdAt: new Date().toISOString(),
+      hitCount: 0,
+      cacheKey: key,
+    })
+  }
+
+  private evictOldest(): void {
+    let oldestKey: string | null = null
+    let oldestTime = Infinity
+
+    for (const [key, entry] of this.cache.entries()) {
+      const time = new Date(entry.createdAt).getTime()
+      if (time < oldestTime) {
+        oldestTime = time
+        oldestKey = key
+      }
+    }
+
+    if (oldestKey) {
+      this.cache.delete(oldestKey)
+    }
+  }
+}
+
+// ============================================================================
+// Checksum Utils (fuer Data Block Integritaet)
+// ============================================================================
+
+/**
+ * Berechnet Integritaets-Checksum ueber Daten.
+ */
+export async function computeChecksum(data: unknown): Promise<string> {
+  const serialized = JSON.stringify(data, Object.keys(data as Record<string, unknown>).sort())
+  return sha256(serialized)
+}
+
+/**
+ * Synchrone Checksum-Berechnung.
+ */
+export function computeChecksumSync(data: unknown): string {
+  const serialized = JSON.stringify(data, Object.keys(data as Record<string, unknown>).sort())
+  return sha256Sync(serialized)
+}
+
+/**
+ * Verifiziert eine Checksum gegen Daten.
+ */
+export async function verifyChecksum(data: unknown, expectedChecksum: string): Promise<boolean> {
+  const actual = await computeChecksum(data)
+  return actual === expectedChecksum
+}
@@ -0,0 +1,139 @@
+/**
+ * Narrative Tags — Deterministische Score-zu-Sprache Ableitung
+ *
+ * Der Data Layer erzeugt aus berechneten Scores sprachliche Tags.
+ * Das LLM darf NUR diese Tags verwenden — niemals echte Scores oder Prozentwerte.
+ *
+ * Alle Funktionen sind 100% deterministisch: gleiche Eingabe = gleiche Ausgabe.
+ */
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export interface NarrativeTags {
+  /** Sprachliche Risiko-Einschaetzung */
+  riskSummary: 'niedrig' | 'moderat' | 'erhoht'
+  /** Reifegrad der bestehenden Massnahmen */
+  maturity: 'ausbaufahig' | 'solide' | 'hoch'
+  /** Handlungsprioritaet */
+  priority: 'kurzfristig' | 'mittelfristig' | 'langfristig'
+  /** Abdeckungsgrad der Controls */
+  coverageLevel: 'grundlegend' | 'umfassend' | 'vollstaendig'
+  /** Dringlichkeit */
+  urgency: 'planbar' | 'zeitnah' | 'dringend'
+}
+
+/** Eingabe-Scores fuer die Tag-Ableitung */
+export interface NarrativeTagScores {
+  /** Gesamt-Risikoscore (0-100) */
+  overallRisk: number
+  /** Reife-Score (0-100) */
+  maturityScore: number
+  /** Anzahl identifizierter Luecken */
+  gapCount: number
+  /** Anzahl kritischer Luecken */
+  criticalGaps: number
+  /** Control-Abdeckung (0-100) */
+  controlCoverage: number
+  /** Anzahl kritischer Findings */
+  criticalFindings: number
+  /** Anzahl hoher Findings */
+  highFindings: number
+}
+
+// ============================================================================
+// Tag Derivation (deterministisch)
+// ============================================================================
+
+/**
+ * Leitet aus numerischen Scores sprachliche Narrative Tags ab.
+ * 100% deterministisch — gleiche Scores = gleiche Tags.
+ */
+export function deriveNarrativeTags(scores: NarrativeTagScores): NarrativeTags {
+  return {
+    riskSummary:
+      scores.overallRisk <= 30 ? 'niedrig' :
+      scores.overallRisk <= 65 ? 'moderat' : 'erhoht',
+
+    maturity:
+      scores.maturityScore <= 40 ? 'ausbaufahig' :
+      scores.maturityScore <= 75 ? 'solide' : 'hoch',
+
+    priority:
+      scores.gapCount === 0 ? 'langfristig' :
+      scores.criticalGaps > 0 ? 'kurzfristig' : 'mittelfristig',
+
+    coverageLevel:
+      scores.controlCoverage <= 50 ? 'grundlegend' :
+      scores.controlCoverage <= 80 ? 'umfassend' : 'vollstaendig',
+
+    urgency:
+      scores.criticalFindings > 0 ? 'dringend' :
+      scores.highFindings > 0 ? 'zeitnah' : 'planbar',
+  }
+}
+
+/**
+ * Extrahiert NarrativeTagScores aus einem DraftContext.
+ * Falls Werte fehlen, werden sichere Defaults (konservativ) verwendet.
+ */
+export function extractScoresFromDraftContext(context: {
+  decisions: {
+    scores: {
+      risk_score: number
+      complexity_score: number
+      assurance_score: number
+      composite_score: number
+    }
+  }
+  constraints: {
+    riskFlags: Array<{ severity: string }>
+  }
+}): NarrativeTagScores {
+  const { scores } = context.decisions
+  const riskFlags = context.constraints.riskFlags
+
+  const criticalFindings = riskFlags.filter(f => f.severity === 'critical').length
+  const highFindings = riskFlags.filter(f => f.severity === 'high').length
+
+  return {
+    overallRisk: scores.risk_score ?? 50,
+    maturityScore: scores.assurance_score ?? 50,
+    gapCount: riskFlags.length,
+    criticalGaps: criticalFindings,
+    controlCoverage: scores.assurance_score ?? 50,
+    criticalFindings,
+    highFindings,
+  }
+}
+
+// ============================================================================
+// Serialization
+// ============================================================================
+
+/**
+ * Serialisiert NarrativeTags fuer den LLM-Prompt.
+ */
+export function narrativeTagsToPromptString(tags: NarrativeTags): string {
+  return [
+    `- Risikoprofil: ${tags.riskSummary}`,
+    `- Reifegrad: ${tags.maturity}`,
+    `- Prioritaet: ${tags.priority}`,
+    `- Abdeckungsgrad: ${tags.coverageLevel}`,
+    `- Dringlichkeit: ${tags.urgency}`,
+  ].join('\n')
+}
+
+/**
+ * Gibt die erlaubten Tag-Werte als flache Liste zurueck (fuer Validierung).
+ */
+export function getAllAllowedTagValues(): string[] {
+  return [
+    'niedrig', 'moderat', 'erhoht',
+    'ausbaufahig', 'solide', 'hoch',
+    'kurzfristig', 'mittelfristig', 'langfristig',
+    'grundlegend', 'umfassend', 'vollstaendig',
+    'planbar', 'zeitnah', 'dringend',
+  ]
+}
@@ -0,0 +1,485 @@
+/**
+ * Prose Validator + Repair Loop — Governance Layer
+ *
+ * Validiert LLM-generierte Prosa-Bloecke gegen das Regelwerk.
+ * Orchestriert den Repair-Loop (max 2 Versuche) mit Fallback.
+ *
+ * 12 Pruefregeln, davon 10 reparierbar und 2 Hard Aborts.
+ */
+
+import type { NarrativeTags } from './narrative-tags'
+import { getAllAllowedTagValues } from './narrative-tags'
+import type { AllowedFacts } from './allowed-facts'
+import { checkForDisallowedContent } from './allowed-facts'
+import { checkStyleViolations, checkTerminologyUsage } from './terminology'
+import type { SanitizedFacts } from './sanitizer'
+import { isSanitized } from './sanitizer'
+
+// ============================================================================
+// Types
+// ============================================================================
+
+/** Strukturierter LLM-Output (Pflicht-Format) */
+export interface ProseBlockOutput {
+  blockId: string
+  blockType: 'introduction' | 'transition' | 'conclusion' | 'appreciation'
+  language: 'de'
+  text: string
+
+  assertions: {
+    companyNameUsed: boolean
+    industryReferenced: boolean
+    structureReferenced: boolean
+    itLandscapeReferenced: boolean
+    narrativeTagsUsed: string[]
+  }
+
+  forbiddenContentDetected: string[]
+}
+
+/** Einzelner Validierungsfehler */
+export interface ProseValidationError {
+  rule: string
+  severity: 'error' | 'warning'
+  message: string
+  repairable: boolean
+}
+
+/** Validierungsergebnis */
+export interface ProseValidatorResult {
+  valid: boolean
+  errors: ProseValidationError[]
+  repairable: boolean
+}
+
+/** Repair-Loop Audit */
+export interface RepairAudit {
+  repairAttempts: number
+  validatorFailures: string[][]
+  repairSuccessful: boolean
+  fallbackUsed: boolean
+  fallbackReason?: string
+}
+
+/** Word count limits per block type */
+const WORD_COUNT_LIMITS: Record<ProseBlockOutput['blockType'], { min: number; max: number }> = {
+  introduction: { min: 30, max: 200 },
+  transition: { min: 10, max: 80 },
+  conclusion: { min: 20, max: 150 },
+  appreciation: { min: 15, max: 100 },
+}
+
+// ============================================================================
+// Prose Validator
+// ============================================================================
+
+/**
+ * Validiert einen ProseBlockOutput gegen alle 12 Regeln.
+ */
+export function validateProseBlock(
+  block: ProseBlockOutput,
+  facts: AllowedFacts | SanitizedFacts,
+  expectedTags: NarrativeTags
+): ProseValidatorResult {
+  const errors: ProseValidationError[] = []
+
+  // Rule 1: JSON_VALID — wird extern geprueft (Parsing vor Aufruf)
+  // Wenn wir hier sind, ist JSON bereits valide
+
+  // Rule 2: COMPANY_NAME_PRESENT
+  if (!block.text.includes(facts.companyName) && facts.companyName !== 'Unbekannt') {
+    errors.push({
+      rule: 'COMPANY_NAME_PRESENT',
+      severity: 'error',
+      message: `Firmenname "${facts.companyName}" nicht im Text gefunden`,
+      repairable: true,
+    })
+  }
+
+  // Rule 3: INDUSTRY_REFERENCED
+  if (facts.industry && !block.text.toLowerCase().includes(facts.industry.toLowerCase())) {
+    errors.push({
+      rule: 'INDUSTRY_REFERENCED',
+      severity: 'warning',
+      message: `Branche "${facts.industry}" nicht im Text referenziert`,
+      repairable: true,
+    })
+  }
+
+  // Rule 4: NO_NUMERIC_SCORES
+  if (/\d+\s*%/.test(block.text)) {
+    errors.push({
+      rule: 'NO_NUMERIC_SCORES',
+      severity: 'error',
+      message: 'Prozentwerte im Text gefunden',
+      repairable: true,
+    })
+  }
+  if (/score[:\s]*\d+/i.test(block.text)) {
+    errors.push({
+      rule: 'NO_NUMERIC_SCORES',
+      severity: 'error',
+      message: 'Score-Werte im Text gefunden',
+      repairable: true,
+    })
+  }
+  if (/\b(L1|L2|L3|L4)\b/.test(block.text)) {
+    errors.push({
+      rule: 'NO_NUMERIC_SCORES',
+      severity: 'error',
+      message: 'Compliance-Level-Bezeichnungen (L1-L4) im Text gefunden',
+      repairable: true,
+    })
+  }
+
+  // Rule 5: NO_DISALLOWED_TOPICS
+  const disallowedViolations = checkForDisallowedContent(block.text)
+  for (const violation of disallowedViolations) {
+    errors.push({
+      rule: 'NO_DISALLOWED_TOPICS',
+      severity: 'error',
+      message: violation,
+      repairable: true,
+    })
+  }
+
+  // Rule 6: WORD_COUNT_IN_RANGE
+  const wordCount = block.text.split(/\s+/).filter(Boolean).length
+  const limits = WORD_COUNT_LIMITS[block.blockType]
+  if (limits) {
+    if (wordCount < limits.min) {
+      errors.push({
+        rule: 'WORD_COUNT_IN_RANGE',
+        severity: 'warning',
+        message: `Wortanzahl ${wordCount} unter Minimum ${limits.min} fuer ${block.blockType}`,
+        repairable: true,
+      })
+    }
+    if (wordCount > limits.max) {
+      errors.push({
+        rule: 'WORD_COUNT_IN_RANGE',
+        severity: 'error',
+        message: `Wortanzahl ${wordCount} ueber Maximum ${limits.max} fuer ${block.blockType}`,
+        repairable: true,
+      })
+    }
+  }
+
+  // Rule 7: NO_DIRECT_ADDRESS
+  if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(block.text)) {
+    errors.push({
+      rule: 'NO_DIRECT_ADDRESS',
+      severity: 'error',
+      message: 'Direkte Ansprache (Sie/Ihr) gefunden',
+      repairable: true,
+    })
+  }
+
+  // Rule 8: NARRATIVE_TAGS_CONSISTENT
+  const allowedTags = getAllAllowedTagValues()
+  if (block.assertions.narrativeTagsUsed) {
+    for (const tag of block.assertions.narrativeTagsUsed) {
+      if (!allowedTags.includes(tag)) {
+        errors.push({
+          rule: 'NARRATIVE_TAGS_CONSISTENT',
+          severity: 'error',
+          message: `Unbekannter Narrative Tag "${tag}" in assertions`,
+          repairable: true,
+        })
+      }
+    }
+  }
+  // Pruefen ob Text Tags enthaelt die nicht zu den erwarteten gehoeren
+  const expectedTagValues = Object.values(expectedTags)
+  const allTagValues = getAllAllowedTagValues()
+  for (const tagValue of allTagValues) {
+    if (block.text.includes(tagValue) && !expectedTagValues.includes(tagValue)) {
+      errors.push({
+        rule: 'NARRATIVE_TAGS_CONSISTENT',
+        severity: 'error',
+        message: `Tag "${tagValue}" im Text, aber nicht im erwarteten Tag-Set`,
+        repairable: true,
+      })
+    }
+  }
+
+  // Rule 9: TERMINOLOGY_CORRECT
+  const termViolations = checkTerminologyUsage(block.text)
+  for (const warning of termViolations) {
+    errors.push({
+      rule: 'TERMINOLOGY_CORRECT',
+      severity: 'warning',
+      message: warning,
+      repairable: true,
+    })
+  }
+
+  // Rule 10: Style violations
+  const styleViolations = checkStyleViolations(block.text)
+  for (const violation of styleViolations) {
+    errors.push({
+      rule: 'STYLE_VIOLATION',
+      severity: 'warning',
+      message: violation,
+      repairable: true,
+    })
+  }
+
+  // Rule 11: SANITIZATION_PASSED (Hard Abort)
+  if ('__sanitized' in facts && !isSanitized(facts)) {
+    errors.push({
+      rule: 'SANITIZATION_PASSED',
+      severity: 'error',
+      message: 'Sanitization-Flag gesetzt aber nicht valide',
+      repairable: false,
+    })
+  }
+
+  // Rule 12: Self-reported forbidden content
+  if (block.forbiddenContentDetected && block.forbiddenContentDetected.length > 0) {
+    errors.push({
+      rule: 'SELF_REPORTED_FORBIDDEN',
+      severity: 'error',
+      message: `LLM meldet verbotene Inhalte: ${block.forbiddenContentDetected.join(', ')}`,
+      repairable: true,
+    })
+  }
+
+  const hasHardAbort = errors.some(e => !e.repairable)
+  const hasErrors = errors.some(e => e.severity === 'error')
+
+  return {
+    valid: !hasErrors,
+    errors,
+    repairable: hasErrors && !hasHardAbort,
+  }
+}
+
+// ============================================================================
+// JSON Parsing
+// ============================================================================
+
+/**
+ * Parst und validiert LLM-Output als ProseBlockOutput.
+ * Gibt null zurueck wenn JSON nicht parsebar ist.
+ */
+export function parseProseBlockOutput(rawContent: string): ProseBlockOutput | null {
+  try {
+    const parsed = JSON.parse(rawContent)
+
+    // Pflichtfelder pruefen
+    if (
+      typeof parsed.blockId !== 'string' ||
+      typeof parsed.text !== 'string' ||
+      !['introduction', 'transition', 'conclusion', 'appreciation'].includes(parsed.blockType)
+    ) {
+      return null
+    }
+
+    return {
+      blockId: parsed.blockId,
+      blockType: parsed.blockType,
+      language: parsed.language || 'de',
+      text: parsed.text,
+      assertions: {
+        companyNameUsed: parsed.assertions?.companyNameUsed ?? false,
+        industryReferenced: parsed.assertions?.industryReferenced ?? false,
+        structureReferenced: parsed.assertions?.structureReferenced ?? false,
+        itLandscapeReferenced: parsed.assertions?.itLandscapeReferenced ?? false,
+        narrativeTagsUsed: parsed.assertions?.narrativeTagsUsed ?? [],
+      },
+      forbiddenContentDetected: parsed.forbiddenContentDetected ?? [],
+    }
+  } catch {
+    return null
+  }
+}
+
+// ============================================================================
+// Repair Prompt Builder
+// ============================================================================
+
+/**
+ * Baut den Repair-Prompt fuer einen fehlgeschlagenen Block.
+ */
+export function buildRepairPrompt(
+  originalBlock: ProseBlockOutput,
+  validationErrors: ProseValidationError[]
+): string {
+  const errorList = validationErrors
+    .filter(e => e.severity === 'error')
+    .map(e => `- ${e.rule}: ${e.message}`)
+    .join('\n')
+
+  return `Der vorherige Text enthielt Fehler. Ueberarbeite ihn unter Beibehaltung der Aussage.
+
+FEHLER:
+${errorList}
+
+REGELN:
+- Entferne alle unerlaubten Inhalte
+- Behalte den Firmenkontext bei
+- Erzeuge ausschliesslich JSON im vorgegebenen Format
+- Aendere KEINE Fakten, ergaenze KEINE neuen Informationen
+- Verwende KEINE direkte Ansprache (Sie/Ihr)
+- Verwende KEINE konkreten Prozentwerte oder Scores
+
+ORIGINALTEXT:
+${JSON.stringify(originalBlock, null, 2)}`
+}
+
+// ============================================================================
+// Fallback Templates
+// ============================================================================
+
+const FALLBACK_TEMPLATES: Record<ProseBlockOutput['blockType'], string> = {
+  introduction: 'Die {{companyName}} dokumentiert im Folgenden die {{documentType}}-relevanten Massnahmen und Bewertungen. Die nachstehenden Ausfuehrungen basieren auf der aktuellen Analyse der organisatorischen und technischen Gegebenheiten.',
+  transition: 'Auf Grundlage der vorstehenden Daten ergeben sich die folgenden Detailbewertungen.',
+  conclusion: 'Die {{companyName}} verfuegt ueber die dokumentierten Massnahmen und Strukturen. Die Einhaltung der regulatorischen Anforderungen wird fortlaufend ueberprueft und angepasst.',
+  appreciation: 'Die bestehende Organisationsstruktur der {{companyName}} bildet eine {{maturity}} Grundlage fuer die nachfolgend dokumentierten Massnahmen.',
+}
+
+/**
+ * Erzeugt einen Fallback-Block wenn der Repair-Loop fehlschlaegt.
+ */
+export function buildFallbackBlock(
+  blockId: string,
+  blockType: ProseBlockOutput['blockType'],
+  facts: AllowedFacts,
+  documentType?: string
+): ProseBlockOutput {
+  let text = FALLBACK_TEMPLATES[blockType]
+    .replace(/\{\{companyName\}\}/g, facts.companyName)
+    .replace(/\{\{maturity\}\}/g, facts.narrativeTags.maturity)
+    .replace(/\{\{documentType\}\}/g, documentType || 'Compliance')
+
+  return {
+    blockId,
+    blockType,
+    language: 'de',
+    text,
+    assertions: {
+      companyNameUsed: true,
+      industryReferenced: false,
+      structureReferenced: false,
+      itLandscapeReferenced: false,
+      narrativeTagsUsed: blockType === 'appreciation' ? ['maturity'] : [],
+    },
+    forbiddenContentDetected: [],
+  }
+}
+
+// ============================================================================
+// Repair Loop Orchestrator
+// ============================================================================
+
+/** Callback fuer LLM-Aufruf (wird von der Route injiziert) */
+export type LLMCallFn = (prompt: string) => Promise<string>
+
+/**
+ * Orchestriert den Repair-Loop fuer einen einzelnen Prosa-Block.
+ *
+ * 1. Parse + Validate
+ * 2. Bei Fehler: Repair-Prompt → LLM → Parse + Validate (max 2x)
+ * 3. Bei weiterem Fehler: Fallback Template
+ *
+ * @returns Validierter ProseBlockOutput + RepairAudit
+ */
+export async function executeRepairLoop(
+  rawLLMOutput: string,
+  facts: AllowedFacts | SanitizedFacts,
+  expectedTags: NarrativeTags,
+  blockId: string,
+  blockType: ProseBlockOutput['blockType'],
+  llmCall: LLMCallFn,
+  documentType?: string,
+  maxRepairAttempts = 2
+): Promise<{ block: ProseBlockOutput; audit: RepairAudit }> {
+  const audit: RepairAudit = {
+    repairAttempts: 0,
+    validatorFailures: [],
+    repairSuccessful: false,
+    fallbackUsed: false,
+  }
+
+  // Versuch 0: Original-Output parsen + validieren
+  let parsed = parseProseBlockOutput(rawLLMOutput)
+
+  if (!parsed) {
+    // JSON invalid → Regeneration zaehlt als Repair-Versuch
+    audit.validatorFailures.push(['JSON_VALID: LLM-Output konnte nicht als JSON geparst werden'])
+    audit.repairAttempts++
+
+    if (audit.repairAttempts <= maxRepairAttempts) {
+      const repairPrompt = `Der vorherige Output war kein valides JSON. Erzeuge ausschliesslich ein JSON-Objekt mit den Feldern: blockId, blockType, language, text, assertions, forbiddenContentDetected.\n\nOriginal-Output:\n${rawLLMOutput.slice(0, 500)}`
+      try {
+        const repaired = await llmCall(repairPrompt)
+        parsed = parseProseBlockOutput(repaired)
+      } catch {
+        // LLM-Fehler → weiter zum Fallback
+      }
+    }
+  }
+
+  if (!parsed) {
+    audit.fallbackUsed = true
+    audit.fallbackReason = 'JSON konnte nach Repair nicht geparst werden'
+    return {
+      block: buildFallbackBlock(blockId, blockType, facts, documentType),
+      audit,
+    }
+  }
+
+  // Validierungs-Schleife
+  for (let attempt = audit.repairAttempts; attempt <= maxRepairAttempts; attempt++) {
+    const result = validateProseBlock(parsed, facts, expectedTags)
+
+    if (result.valid) {
+      audit.repairSuccessful = attempt === 0 ? true : true
+      return { block: parsed, audit }
+    }
+
+    // Hard Abort? → Fallback sofort
+    if (!result.repairable) {
+      audit.fallbackUsed = true
+      audit.fallbackReason = `Hard Abort: ${result.errors.filter(e => !e.repairable).map(e => e.rule).join(', ')}`
+      audit.validatorFailures.push(result.errors.map(e => `${e.rule}: ${e.message}`))
+      return {
+        block: buildFallbackBlock(blockId, blockType, facts, documentType),
+        audit,
+      }
+    }
+
+    // Fehler protokollieren
+    audit.validatorFailures.push(result.errors.map(e => `${e.rule}: ${e.message}`))
+
+    // Noch Repair-Versuche uebrig?
+    if (attempt >= maxRepairAttempts) {
+      break
+    }
+
+    // Repair-Prompt senden
+    audit.repairAttempts++
+    try {
+      const repairPrompt = buildRepairPrompt(parsed, result.errors)
+      const repairedOutput = await llmCall(repairPrompt)
+      const repairedParsed = parseProseBlockOutput(repairedOutput)
+      if (!repairedParsed) {
+        // Parsing fehlgeschlagen nach Repair
+        continue
+      }
+      parsed = repairedParsed
+    } catch {
+      // LLM-Fehler → naechster Versuch oder Fallback
+      continue
+    }
+  }
+
+  // Alle Versuche erschoepft → Fallback
+  audit.fallbackUsed = true
+  audit.fallbackReason = `${maxRepairAttempts} Repair-Versuche erschoepft`
+  return {
+    block: buildFallbackBlock(blockId, blockType, facts, documentType),
+    audit,
+  }
+}
@@ -0,0 +1,298 @@
+/**
+ * PII Sanitizer — Bereinigt Kontextdaten vor LLM-Aufruf
+ *
+ * Entfernt personenbezogene Daten (PII) aus AllowedFacts
+ * bevor sie an das LLM weitergegeben werden.
+ *
+ * Bei Fehler: Hard Abort — kein LLM-Aufruf ohne erfolgreiche Sanitization.
+ */
+
+import type { AllowedFacts } from './allowed-facts'
+
+// ============================================================================
+// Types
+// ============================================================================
+
+/** Bereinigtes Faktenbudget (PII-frei) */
+export type SanitizedFacts = AllowedFacts & {
+  __sanitized: true
+}
+
+/** Audit-Protokoll der Sanitization */
+export interface SanitizationAudit {
+  sanitizationApplied: boolean
+  redactedFieldsCount: number
+  redactedFieldNames: string[]
+}
+
+/** Ergebnis der Sanitization */
+export interface SanitizationResult {
+  facts: SanitizedFacts
+  audit: SanitizationAudit
+}
+
+/** Sanitization-Fehler (loest Hard Abort aus) */
+export class SanitizationError extends Error {
+  constructor(
+    message: string,
+    public readonly field: string,
+    public readonly reason: string
+  ) {
+    super(message)
+    this.name = 'SanitizationError'
+  }
+}
+
+// ============================================================================
+// PII Detection Patterns
+// ============================================================================
+
+const PII_PATTERNS = {
+  email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
+  phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g,
+  ipAddress: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
+  internalId: /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi,
+  apiKey: /\b(sk-|pk-|api[_-]?key[_-]?)[a-zA-Z0-9]{20,}\b/gi,
+} as const
+
+// ============================================================================
+// Sanitizer
+// ============================================================================
+
+/**
+ * Bereinigt AllowedFacts von PII vor dem LLM-Aufruf.
+ *
+ * @throws {SanitizationError} Wenn ein Feld nicht bereinigt werden kann
+ */
+export function sanitizeAllowedFacts(facts: AllowedFacts): SanitizationResult {
+  const redactedFields: string[] = []
+
+  // Kopie erstellen
+  const sanitized: AllowedFacts = {
+    ...facts,
+    specialFeatures: [...facts.specialFeatures],
+    triggeredRegulations: [...facts.triggeredRegulations],
+    primaryUseCases: [...facts.primaryUseCases],
+    narrativeTags: { ...facts.narrativeTags },
+  }
+
+  // Firmenname: erlaubt (wird benoetigt), aber PII darin pruefen
+  sanitized.companyName = sanitizeString(facts.companyName, 'companyName', redactedFields)
+
+  // Rechtsform: erlaubt, kurzer Wert
+  sanitized.legalForm = sanitizeString(facts.legalForm, 'legalForm', redactedFields)
+
+  // Branche: erlaubt
+  sanitized.industry = sanitizeString(facts.industry, 'industry', redactedFields)
+
+  // Standort: erlaubt (Stadt/Region), aber keine Strasse/Hausnummer
+  sanitized.location = sanitizeAddress(facts.location, 'location', redactedFields)
+
+  // Mitarbeiterzahl: erlaubt (kein PII)
+  // employeeCount bleibt unveraendert
+
+  // Teamstruktur: erlaubt (generisch)
+  sanitized.teamStructure = sanitizeString(facts.teamStructure, 'teamStructure', redactedFields)
+
+  // IT-Landschaft: erlaubt (generisch)
+  sanitized.itLandscape = sanitizeString(facts.itLandscape, 'itLandscape', redactedFields)
+
+  // Besonderheiten: pruefen
+  sanitized.specialFeatures = facts.specialFeatures.map((f, i) =>
+    sanitizeString(f, `specialFeatures[${i}]`, redactedFields)
+  )
+
+  // Regulierungen: erlaubt (generisch)
+  sanitized.triggeredRegulations = facts.triggeredRegulations.map((r, i) =>
+    sanitizeString(r, `triggeredRegulations[${i}]`, redactedFields)
+  )
+
+  // Use Cases: pruefen
+  sanitized.primaryUseCases = facts.primaryUseCases.map((uc, i) =>
+    sanitizeString(uc, `primaryUseCases[${i}]`, redactedFields)
+  )
+
+  // Narrative Tags: deterministisch, kein PII moeglich
+  // Bleiben unveraendert
+
+  return {
+    facts: { ...sanitized, __sanitized: true } as SanitizedFacts,
+    audit: {
+      sanitizationApplied: true,
+      redactedFieldsCount: redactedFields.length,
+      redactedFieldNames: redactedFields,
+    },
+  }
+}
+
+/**
+ * Prueft ob ein SanitizedFacts-Objekt tatsaechlich bereinigt wurde.
+ */
+export function isSanitized(facts: unknown): facts is SanitizedFacts {
+  return (
+    typeof facts === 'object' &&
+    facts !== null &&
+    '__sanitized' in facts &&
+    (facts as SanitizedFacts).__sanitized === true
+  )
+}
+
+// ============================================================================
+// Private Helpers
+// ============================================================================
+
+/**
+ * Bereinigt einen String-Wert von PII.
+ * Gibt den bereinigten String zurueck und fuegt redacted Fields hinzu.
+ */
+function sanitizeString(
+  value: string,
+  fieldName: string,
+  redactedFields: string[]
+): string {
+  if (!value) return value
+
+  let result = value
+  let wasRedacted = false
+
+  // E-Mail-Adressen entfernen
+  if (PII_PATTERNS.email.test(result)) {
+    result = result.replace(PII_PATTERNS.email, '[REDACTED]')
+    wasRedacted = true
+  }
+  // Reset regex lastIndex
+  PII_PATTERNS.email.lastIndex = 0
+
+  // Telefonnummern entfernen (nur wenn >= 6 Ziffern)
+  const phoneMatches = result.match(PII_PATTERNS.phone)
+  if (phoneMatches) {
+    for (const match of phoneMatches) {
+      if (match.replace(/\D/g, '').length >= 6) {
+        result = result.replace(match, '[REDACTED]')
+        wasRedacted = true
+      }
+    }
+  }
+  PII_PATTERNS.phone.lastIndex = 0
+
+  // IP-Adressen entfernen
+  if (PII_PATTERNS.ipAddress.test(result)) {
+    result = result.replace(PII_PATTERNS.ipAddress, '[REDACTED]')
+    wasRedacted = true
+  }
+  PII_PATTERNS.ipAddress.lastIndex = 0
+
+  // Interne IDs (UUIDs) entfernen
+  if (PII_PATTERNS.internalId.test(result)) {
+    result = result.replace(PII_PATTERNS.internalId, '[REDACTED]')
+    wasRedacted = true
+  }
+  PII_PATTERNS.internalId.lastIndex = 0
+
+  // API Keys entfernen
+  if (PII_PATTERNS.apiKey.test(result)) {
+    result = result.replace(PII_PATTERNS.apiKey, '[REDACTED]')
+    wasRedacted = true
+  }
+  PII_PATTERNS.apiKey.lastIndex = 0
+
+  if (wasRedacted) {
+    redactedFields.push(fieldName)
+  }
+
+  return result
+}
+
+/**
+ * Bereinigt Adress-Felder: behaelt Stadt/Region, entfernt Strasse/Hausnummer.
+ */
+function sanitizeAddress(
+  value: string,
+  fieldName: string,
+  redactedFields: string[]
+): string {
+  if (!value) return value
+
+  // Zuerst generische PII-Bereinigung
+  let result = sanitizeString(value, fieldName, redactedFields)
+
+  // Strasse + Hausnummer Pattern (deutsch)
+  const streetPattern = /\b[A-ZÄÖÜ][a-zäöüß]+(?:straße|str\.|weg|gasse|platz|allee|ring|damm)\s*\d+[a-z]?\b/gi
+  if (streetPattern.test(result)) {
+    result = result.replace(streetPattern, '')
+    if (!redactedFields.includes(fieldName)) {
+      redactedFields.push(fieldName)
+    }
+  }
+
+  // PLZ-Pattern (5-stellig deutsch)
+  const plzPattern = /\b\d{5}\s+/g
+  if (plzPattern.test(result)) {
+    result = result.replace(plzPattern, '')
+    if (!redactedFields.includes(fieldName)) {
+      redactedFields.push(fieldName)
+    }
+  }
+
+  return result.trim()
+}
+
+/**
+ * Validiert das gesamte SanitizedFacts-Objekt auf verbleibende PII.
+ * Gibt Warnungen zurueck wenn doch noch PII gefunden wird.
+ */
+export function validateNoRemainingPII(facts: SanitizedFacts): string[] {
+  const warnings: string[] = []
+  const allValues = extractAllStringValues(facts)
+
+  for (const { path, value } of allValues) {
+    if (path === '__sanitized') continue
+
+    PII_PATTERNS.email.lastIndex = 0
+    if (PII_PATTERNS.email.test(value)) {
+      warnings.push(`Verbleibende E-Mail in ${path}`)
+    }
+
+    PII_PATTERNS.ipAddress.lastIndex = 0
+    if (PII_PATTERNS.ipAddress.test(value)) {
+      warnings.push(`Verbleibende IP-Adresse in ${path}`)
+    }
+
+    PII_PATTERNS.apiKey.lastIndex = 0
+    if (PII_PATTERNS.apiKey.test(value)) {
+      warnings.push(`Verbleibender API-Key in ${path}`)
+    }
+  }
+
+  return warnings
+}
+
+/**
+ * Extrahiert alle String-Werte aus einem Objekt (rekursiv).
+ */
+function extractAllStringValues(
+  obj: Record<string, unknown>,
+  prefix = ''
+): Array<{ path: string; value: string }> {
+  const results: Array<{ path: string; value: string }> = []
+
+  for (const [key, val] of Object.entries(obj)) {
+    const path = prefix ? `${prefix}.${key}` : key
+
+    if (typeof val === 'string') {
+      results.push({ path, value: val })
+    } else if (Array.isArray(val)) {
+      for (let i = 0; i < val.length; i++) {
+        if (typeof val[i] === 'string') {
+          results.push({ path: `${path}[${i}]`, value: val[i] })
+        } else if (typeof val[i] === 'object' && val[i] !== null) {
+          results.push(...extractAllStringValues(val[i] as Record<string, unknown>, `${path}[${i}]`))
+        }
+      }
+    } else if (typeof val === 'object' && val !== null) {
+      results.push(...extractAllStringValues(val as Record<string, unknown>, path))
+    }
+  }
+
+  return results
+}
@@ -0,0 +1,184 @@
+/**
+ * Terminology Guide & Style Contract — Konsistente Fachbegriffe
+ *
+ * Stellt sicher, dass alle Prosa-Bloecke eines Dokuments
+ * dieselben Fachbegriffe und denselben Schreibstil verwenden.
+ *
+ * 100% deterministisch.
+ */
+
+// ============================================================================
+// Terminology Guide
+// ============================================================================
+
+export interface TerminologyGuide {
+  /** DSGVO-Begriffe */
+  dsgvo: Record<string, string>
+  /** TOM-Begriffe */
+  tom: Record<string, string>
+  /** Allgemeine Compliance-Begriffe */
+  general: Record<string, string>
+}
+
+export const DEFAULT_TERMINOLOGY: TerminologyGuide = {
+  dsgvo: {
+    controller: 'Verantwortlicher',
+    processor: 'Auftragsverarbeiter',
+    data_subject: 'betroffene Person',
+    processing: 'Verarbeitung',
+    personal_data: 'personenbezogene Daten',
+    consent: 'Einwilligung',
+    dpia: 'Datenschutz-Folgenabschaetzung (DSFA)',
+    legitimate_interest: 'berechtigtes Interesse',
+    data_breach: 'Verletzung des Schutzes personenbezogener Daten',
+    dpo: 'Datenschutzbeauftragter (DSB)',
+    supervisory_authority: 'Aufsichtsbehoerde',
+    ropa: 'Verzeichnis von Verarbeitungstaetigkeiten (VVT)',
+    retention_period: 'Aufbewahrungsfrist',
+    erasure: 'Loeschung',
+    restriction: 'Einschraenkung der Verarbeitung',
+    portability: 'Datenportabilitaet',
+    third_country: 'Drittland',
+    adequacy_decision: 'Angemessenheitsbeschluss',
+    scc: 'Standardvertragsklauseln (SCC)',
+  },
+  tom: {
+    access_control: 'Zutrittskontrolle',
+    access_management: 'Zugangskontrolle',
+    authorization: 'Zugriffskontrolle',
+    encryption: 'Verschluesselung',
+    pseudonymization: 'Pseudonymisierung',
+    availability: 'Verfuegbarkeitskontrolle',
+    resilience: 'Belastbarkeit',
+    recoverability: 'Wiederherstellbarkeit',
+    audit_logging: 'Protokollierung',
+    separation: 'Trennungsgebot',
+    input_control: 'Eingabekontrolle',
+    transport_control: 'Weitergabekontrolle',
+    order_control: 'Auftragskontrolle',
+  },
+  general: {
+    risk_assessment: 'Risikobewertung',
+    audit_trail: 'Pruefpfad',
+    compliance_level: 'Compliance-Tiefe',
+    gap_analysis: 'Lueckenanalyse',
+    remediation: 'Massnahmenplan',
+    incident_response: 'Vorfallreaktion',
+    business_continuity: 'Geschaeftskontinuitaet',
+    vendor_management: 'Dienstleistermanagement',
+    awareness_training: 'Sensibilisierungsschulung',
+  },
+}
+
+// ============================================================================
+// Style Contract
+// ============================================================================
+
+export interface StyleContract {
+  /** Anrede-Stil */
+  addressing: '3rd_person_company'
+  /** Tonalitaet */
+  tone: 'formal_legal_plain'
+  /** Verbotene Formulierungen */
+  forbid: string[]
+}
+
+export const DEFAULT_STYLE_CONTRACT: StyleContract = {
+  addressing: '3rd_person_company',
+  tone: 'formal_legal_plain',
+  forbid: [
+    'Denglisch',
+    'Marketing-Sprache',
+    'Superlative',
+    'Direkte Ansprache',
+    'Umgangssprache',
+    'Konjunktiv-Ketten',
+  ],
+}
+
+/** Konkrete Regex-Muster fuer verbotene Formulierungen */
+export const STYLE_VIOLATION_PATTERNS: Array<{ name: string; pattern: RegExp }> = [
+  { name: 'Direkte Ansprache', pattern: /\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/ },
+  { name: 'Superlative', pattern: /\b(bestmoeglich|hoechstmoeglich|optimal|perfekt|einzigartig)\b/i },
+  { name: 'Marketing-Sprache', pattern: /\b(revolutionaer|bahnbrechend|innovativ|fuehrend|erstklassig)\b/i },
+  { name: 'Umgangssprache', pattern: /\b(super|toll|mega|krass|cool|easy)\b/i },
+  { name: 'Denglisch', pattern: /\b(State of the Art|Best Practice|Compliance Journey|Data Driven)\b/i },
+]
+
+// ============================================================================
+// Serialization
+// ============================================================================
+
+/**
+ * Serialisiert den Terminology Guide fuer den LLM-Prompt.
+ * Gibt nur die haeufigsten Begriffe aus (Token-Budget).
+ */
+export function terminologyToPromptString(guide: TerminologyGuide = DEFAULT_TERMINOLOGY): string {
+  const keyTerms = [
+    ...Object.entries(guide.dsgvo).slice(0, 10),
+    ...Object.entries(guide.tom).slice(0, 6),
+    ...Object.entries(guide.general).slice(0, 4),
+  ]
+  return keyTerms.map(([key, value]) => `  ${key}: "${value}"`).join('\n')
+}
+
+/**
+ * Serialisiert den Style Contract fuer den LLM-Prompt.
+ */
+export function styleContractToPromptString(style: StyleContract = DEFAULT_STYLE_CONTRACT): string {
+  return [
+    `Anrede: Dritte Person ("Die [Firmenname]...", NICHT "Sie...")`,
+    `Ton: Professionell, juristisch korrekt, aber verstaendlich`,
+    `Verboten: ${style.forbid.join(', ')}`,
+  ].join('\n')
+}
+
+// ============================================================================
+// Validation
+// ============================================================================
+
+/**
+ * Prueft einen Text auf Style-Verstoesse.
+ * Gibt eine Liste der gefundenen Verstoesse zurueck.
+ */
+export function checkStyleViolations(text: string): string[] {
+  const violations: string[] = []
+  for (const { name, pattern } of STYLE_VIOLATION_PATTERNS) {
+    if (pattern.test(text)) {
+      violations.push(`Style-Verstoss: ${name}`)
+    }
+  }
+  return violations
+}
+
+/**
+ * Prueft ob die Terminologie korrekt verwendet wird.
+ * Gibt Warnungen zurueck wenn falsche Begriffe erkannt werden.
+ */
+export function checkTerminologyUsage(
+  text: string,
+  guide: TerminologyGuide = DEFAULT_TERMINOLOGY
+): string[] {
+  const warnings: string[] = []
+  const lower = text.toLowerCase()
+
+  // Prüfe ob englische Begriffe statt deutscher verwendet werden
+  const termChecks: Array<{ wrong: string; correct: string }> = [
+    { wrong: 'data controller', correct: guide.dsgvo.controller },
+    { wrong: 'data processor', correct: guide.dsgvo.processor },
+    { wrong: 'data subject', correct: guide.dsgvo.data_subject },
+    { wrong: 'personal data', correct: guide.dsgvo.personal_data },
+    { wrong: 'data breach', correct: guide.dsgvo.data_breach },
+    { wrong: 'encryption', correct: guide.tom.encryption },
+    { wrong: 'pseudonymization', correct: guide.tom.pseudonymization },
+    { wrong: 'risk assessment', correct: guide.general.risk_assessment },
+  ]
+
+  for (const { wrong, correct } of termChecks) {
+    if (lower.includes(wrong.toLowerCase())) {
+      warnings.push(`Englischer Begriff "${wrong}" gefunden — verwende "${correct}"`)
+    }
+  }
+
+  return warnings
+}