This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/admin-v2/lib/sdk/drafting-engine/intent-classifier.ts
BreakPilot Dev 206183670d feat(sdk): Add Drafting Engine with 4-mode agent system (Explain/Ask/Draft/Validate)
Extends the Compliance Advisor from a Q&A chatbot into a full drafting engine
that can generate, validate, and refine compliance documents within Scope Engine
constraints. Includes intent classifier, state projector, constraint enforcer,
SOUL templates, Go backend endpoints, and React UI components.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 12:37:18 +01:00

242 lines
6.1 KiB
TypeScript

/**
* Intent Classifier - Leichtgewichtiger Pattern-Matcher
*
* Erkennt den Agent-Modus anhand des Nutzer-Inputs ohne LLM-Call.
* Deutsche und englische Muster werden unterstuetzt.
*
* Confidence-Schwellen:
* - >0.8: Hohe Sicherheit, automatisch anwenden
* - 0.6-0.8: Mittel, Nutzer kann bestaetigen
* - <0.6: Fallback zu 'explain'
*/
import type { AgentMode, IntentClassification } from './types'
import type { ScopeDocumentType } from '../compliance-scope-types'
// ============================================================================
// Pattern Definitions
// ============================================================================
interface ModePattern {
mode: AgentMode
patterns: RegExp[]
/** Base-Confidence wenn ein Pattern matched */
baseConfidence: number
}
const MODE_PATTERNS: ModePattern[] = [
{
mode: 'draft',
baseConfidence: 0.85,
patterns: [
/\b(erstell|generier|entw[iu]rf|entwer[ft]|schreib|verfass|formulier|anlege)/i,
/\b(draft|create|generate|write|compose)\b/i,
/\b(neues?\s+(?:vvt|tom|dsfa|dokument|loeschkonzept|datenschutzerklaerung))\b/i,
/\b(vorlage|template)\s+(erstell|generier)/i,
/\bfuer\s+(?:uns|mich|unser)\b.*\b(erstell|schreib)/i,
],
},
{
mode: 'validate',
baseConfidence: 0.80,
patterns: [
/\b(pruef|validier|check|kontrollier|ueberpruef)\b/i,
/\b(korrekt|richtig|vollstaendig|konsistent|komplett)\b.*\?/i,
/\b(stimmt|passt)\b.*\b(das|mein|unser)\b/i,
/\b(validate|verify|check|review)\b/i,
/\b(fehler|luecken?|maengel)\b.*\b(find|such|zeig)\b/i,
/\bcross[\s-]?check\b/i,
/\b(vvt|tom|dsfa)\b.*\b(konsisten[tz]|widerspruch|uebereinstimm)/i,
],
},
{
mode: 'ask',
baseConfidence: 0.75,
patterns: [
/\bwas\s+fehlt\b/i,
/\b(luecken?|gaps?)\b.*\b(zeig|find|identifizier|analysier)/i,
/\b(unvollstaendig|unfertig|offen)\b/i,
/\bwelche\s+(dokumente?|informationen?|daten)\b.*\b(fehlen?|brauch|benoetig)/i,
/\b(naechste[rn]?\s+schritt|next\s+step|todo)\b/i,
/\bworan\s+(muss|soll)\b/i,
],
},
]
/** Dokumenttyp-Erkennung */
const DOCUMENT_TYPE_PATTERNS: Array<{
type: ScopeDocumentType
patterns: RegExp[]
}> = [
{
type: 'vvt',
patterns: [
/\bv{1,2}t\b/i,
/\bverarbeitungsverzeichnis\b/i,
/\bverarbeitungstaetigkeit/i,
/\bprocessing\s+activit/i,
/\bart\.?\s*30\b/i,
],
},
{
type: 'tom',
patterns: [
/\btom\b/i,
/\btechnisch.*organisatorisch.*massnahm/i,
/\bart\.?\s*32\b/i,
/\bsicherheitsmassnahm/i,
],
},
{
type: 'dsfa',
patterns: [
/\bdsfa\b/i,
/\bdatenschutz[\s-]?folgenabschaetzung\b/i,
/\bdpia\b/i,
/\bart\.?\s*35\b/i,
/\bimpact\s+assessment\b/i,
],
},
{
type: 'dsi',
patterns: [
/\bdatenschutzerklaerung\b/i,
/\bprivacy\s+policy\b/i,
/\bdsi\b/i,
/\bart\.?\s*13\b/i,
/\bart\.?\s*14\b/i,
],
},
{
type: 'lf',
patterns: [
/\bloeschfrist/i,
/\bloeschkonzept/i,
/\bretention/i,
/\baufbewahr/i,
],
},
{
type: 'av_vertrag',
patterns: [
/\bavv?\b/i,
/\bauftragsverarbeit/i,
/\bdata\s+processing\s+agreement/i,
/\bart\.?\s*28\b/i,
],
},
{
type: 'betroffenenrechte',
patterns: [
/\bbetroffenenrecht/i,
/\bdata\s+subject\s+right/i,
/\bart\.?\s*15\b/i,
/\bauskunft/i,
],
},
{
type: 'einwilligung',
patterns: [
/\beinwillig/i,
/\bconsent/i,
/\bcookie/i,
],
},
]
// ============================================================================
// Classifier
// ============================================================================
export class IntentClassifier {
/**
* Klassifiziert die Nutzerabsicht anhand des Inputs.
*
* @param input - Die Nutzer-Nachricht
* @returns IntentClassification mit Mode, Confidence, Patterns
*/
classify(input: string): IntentClassification {
const normalized = this.normalize(input)
let bestMatch: IntentClassification = {
mode: 'explain',
confidence: 0.3,
matchedPatterns: [],
}
for (const modePattern of MODE_PATTERNS) {
const matched: string[] = []
for (const pattern of modePattern.patterns) {
if (pattern.test(normalized)) {
matched.push(pattern.source)
}
}
if (matched.length > 0) {
// Mehr Matches = hoehere Confidence (bis zum Maximum)
const matchBonus = Math.min(matched.length - 1, 2) * 0.05
const confidence = Math.min(modePattern.baseConfidence + matchBonus, 0.99)
if (confidence > bestMatch.confidence) {
bestMatch = {
mode: modePattern.mode,
confidence,
matchedPatterns: matched,
}
}
}
}
// Dokumenttyp erkennen
const detectedDocType = this.detectDocumentType(normalized)
if (detectedDocType) {
bestMatch.detectedDocumentType = detectedDocType
// Dokumenttyp-Erkennung erhoeht Confidence leicht
bestMatch.confidence = Math.min(bestMatch.confidence + 0.05, 0.99)
}
// Fallback: Bei Confidence <0.6 immer 'explain'
if (bestMatch.confidence < 0.6) {
bestMatch.mode = 'explain'
}
return bestMatch
}
/**
* Erkennt den Dokumenttyp aus dem Input.
*/
detectDocumentType(input: string): ScopeDocumentType | undefined {
const normalized = this.normalize(input)
for (const docPattern of DOCUMENT_TYPE_PATTERNS) {
for (const pattern of docPattern.patterns) {
if (pattern.test(normalized)) {
return docPattern.type
}
}
}
return undefined
}
/**
* Normalisiert den Input fuer Pattern-Matching.
* Ersetzt Umlaute, entfernt Sonderzeichen.
*/
private normalize(input: string): string {
return input
.replace(/ä/g, 'ae')
.replace(/ö/g, 'oe')
.replace(/ü/g, 'ue')
.replace(/ß/g, 'ss')
.replace(/Ä/g, 'Ae')
.replace(/Ö/g, 'Oe')
.replace(/Ü/g, 'Ue')
}
}
/** Singleton-Instanz */
export const intentClassifier = new IntentClassifier()