Initial commit: breakpilot-compliance - Compliance SDK Platform

Services: Admin-Compliance, Backend-Compliance, AI-Compliance-SDK, Consent-SDK, Developer-Portal, PCA-Platform, DSMS Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:28 +01:00
commit 4435e7ea0a
734 changed files with 251369 additions and 0 deletions
--- a/admin-compliance/lib/sdk/drafting-engine/intent-classifier.ts
+++ b/admin-compliance/lib/sdk/drafting-engine/intent-classifier.ts
@@ -0,0 +1,241 @@
+/**
+ * Intent Classifier - Leichtgewichtiger Pattern-Matcher
+ *
+ * Erkennt den Agent-Modus anhand des Nutzer-Inputs ohne LLM-Call.
+ * Deutsche und englische Muster werden unterstuetzt.
+ *
+ * Confidence-Schwellen:
+ * - >0.8: Hohe Sicherheit, automatisch anwenden
+ * - 0.6-0.8: Mittel, Nutzer kann bestaetigen
+ * - <0.6: Fallback zu 'explain'
+ */
+
+import type { AgentMode, IntentClassification } from './types'
+import type { ScopeDocumentType } from '../compliance-scope-types'
+
+// ============================================================================
+// Pattern Definitions
+// ============================================================================
+
+interface ModePattern {
+  mode: AgentMode
+  patterns: RegExp[]
+  /** Base-Confidence wenn ein Pattern matched */
+  baseConfidence: number
+}
+
+const MODE_PATTERNS: ModePattern[] = [
+  {
+    mode: 'draft',
+    baseConfidence: 0.85,
+    patterns: [
+      /\b(erstell|generier|entw[iu]rf|entwer[ft]|schreib|verfass|formulier|anlege)/i,
+      /\b(draft|create|generate|write|compose)\b/i,
+      /\b(neues?\s+(?:vvt|tom|dsfa|dokument|loeschkonzept|datenschutzerklaerung))\b/i,
+      /\b(vorlage|template)\s+(erstell|generier)/i,
+      /\bfuer\s+(?:uns|mich|unser)\b.*\b(erstell|schreib)/i,
+    ],
+  },
+  {
+    mode: 'validate',
+    baseConfidence: 0.80,
+    patterns: [
+      /\b(pruef|validier|check|kontrollier|ueberpruef)\b/i,
+      /\b(korrekt|richtig|vollstaendig|konsistent|komplett)\b.*\?/i,
+      /\b(stimmt|passt)\b.*\b(das|mein|unser)\b/i,
+      /\b(validate|verify|check|review)\b/i,
+      /\b(fehler|luecken?|maengel)\b.*\b(find|such|zeig)\b/i,
+      /\bcross[\s-]?check\b/i,
+      /\b(vvt|tom|dsfa)\b.*\b(konsisten[tz]|widerspruch|uebereinstimm)/i,
+    ],
+  },
+  {
+    mode: 'ask',
+    baseConfidence: 0.75,
+    patterns: [
+      /\bwas\s+fehlt\b/i,
+      /\b(luecken?|gaps?)\b.*\b(zeig|find|identifizier|analysier)/i,
+      /\b(unvollstaendig|unfertig|offen)\b/i,
+      /\bwelche\s+(dokumente?|informationen?|daten)\b.*\b(fehlen?|brauch|benoetig)/i,
+      /\b(naechste[rn]?\s+schritt|next\s+step|todo)\b/i,
+      /\bworan\s+(muss|soll)\b/i,
+    ],
+  },
+]
+
+/** Dokumenttyp-Erkennung */
+const DOCUMENT_TYPE_PATTERNS: Array<{
+  type: ScopeDocumentType
+  patterns: RegExp[]
+}> = [
+  {
+    type: 'vvt',
+    patterns: [
+      /\bv{1,2}t\b/i,
+      /\bverarbeitungsverzeichnis\b/i,
+      /\bverarbeitungstaetigkeit/i,
+      /\bprocessing\s+activit/i,
+      /\bart\.?\s*30\b/i,
+    ],
+  },
+  {
+    type: 'tom',
+    patterns: [
+      /\btom\b/i,
+      /\btechnisch.*organisatorisch.*massnahm/i,
+      /\bart\.?\s*32\b/i,
+      /\bsicherheitsmassnahm/i,
+    ],
+  },
+  {
+    type: 'dsfa',
+    patterns: [
+      /\bdsfa\b/i,
+      /\bdatenschutz[\s-]?folgenabschaetzung\b/i,
+      /\bdpia\b/i,
+      /\bart\.?\s*35\b/i,
+      /\bimpact\s+assessment\b/i,
+    ],
+  },
+  {
+    type: 'dsi',
+    patterns: [
+      /\bdatenschutzerklaerung\b/i,
+      /\bprivacy\s+policy\b/i,
+      /\bdsi\b/i,
+      /\bart\.?\s*13\b/i,
+      /\bart\.?\s*14\b/i,
+    ],
+  },
+  {
+    type: 'lf',
+    patterns: [
+      /\bloeschfrist/i,
+      /\bloeschkonzept/i,
+      /\bretention/i,
+      /\baufbewahr/i,
+    ],
+  },
+  {
+    type: 'av_vertrag',
+    patterns: [
+      /\bavv?\b/i,
+      /\bauftragsverarbeit/i,
+      /\bdata\s+processing\s+agreement/i,
+      /\bart\.?\s*28\b/i,
+    ],
+  },
+  {
+    type: 'betroffenenrechte',
+    patterns: [
+      /\bbetroffenenrecht/i,
+      /\bdata\s+subject\s+right/i,
+      /\bart\.?\s*15\b/i,
+      /\bauskunft/i,
+    ],
+  },
+  {
+    type: 'einwilligung',
+    patterns: [
+      /\beinwillig/i,
+      /\bconsent/i,
+      /\bcookie/i,
+    ],
+  },
+]
+
+// ============================================================================
+// Classifier
+// ============================================================================
+
+export class IntentClassifier {
+
+  /**
+   * Klassifiziert die Nutzerabsicht anhand des Inputs.
+   *
+   * @param input - Die Nutzer-Nachricht
+   * @returns IntentClassification mit Mode, Confidence, Patterns
+   */
+  classify(input: string): IntentClassification {
+    const normalized = this.normalize(input)
+    let bestMatch: IntentClassification = {
+      mode: 'explain',
+      confidence: 0.3,
+      matchedPatterns: [],
+    }
+
+    for (const modePattern of MODE_PATTERNS) {
+      const matched: string[] = []
+
+      for (const pattern of modePattern.patterns) {
+        if (pattern.test(normalized)) {
+          matched.push(pattern.source)
+        }
+      }
+
+      if (matched.length > 0) {
+        // Mehr Matches = hoehere Confidence (bis zum Maximum)
+        const matchBonus = Math.min(matched.length - 1, 2) * 0.05
+        const confidence = Math.min(modePattern.baseConfidence + matchBonus, 0.99)
+
+        if (confidence > bestMatch.confidence) {
+          bestMatch = {
+            mode: modePattern.mode,
+            confidence,
+            matchedPatterns: matched,
+          }
+        }
+      }
+    }
+
+    // Dokumenttyp erkennen
+    const detectedDocType = this.detectDocumentType(normalized)
+    if (detectedDocType) {
+      bestMatch.detectedDocumentType = detectedDocType
+      // Dokumenttyp-Erkennung erhoeht Confidence leicht
+      bestMatch.confidence = Math.min(bestMatch.confidence + 0.05, 0.99)
+    }
+
+    // Fallback: Bei Confidence <0.6 immer 'explain'
+    if (bestMatch.confidence < 0.6) {
+      bestMatch.mode = 'explain'
+    }
+
+    return bestMatch
+  }
+
+  /**
+   * Erkennt den Dokumenttyp aus dem Input.
+   */
+  detectDocumentType(input: string): ScopeDocumentType | undefined {
+    const normalized = this.normalize(input)
+
+    for (const docPattern of DOCUMENT_TYPE_PATTERNS) {
+      for (const pattern of docPattern.patterns) {
+        if (pattern.test(normalized)) {
+          return docPattern.type
+        }
+      }
+    }
+
+    return undefined
+  }
+
+  /**
+   * Normalisiert den Input fuer Pattern-Matching.
+   * Ersetzt Umlaute, entfernt Sonderzeichen.
+   */
+  private normalize(input: string): string {
+    return input
+      .replace(/ä/g, 'ae')
+      .replace(/ö/g, 'oe')
+      .replace(/ü/g, 'ue')
+      .replace(/ß/g, 'ss')
+      .replace(/Ä/g, 'Ae')
+      .replace(/Ö/g, 'Oe')
+      .replace(/Ü/g, 'Ue')
+  }
+}
+
+/** Singleton-Instanz */
+export const intentClassifier = new IntentClassifier()