Initial commit: breakpilot-compliance - Compliance SDK Platform
Services: Admin-Compliance, Backend-Compliance, AI-Compliance-SDK, Consent-SDK, Developer-Portal, PCA-Platform, DSMS Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
241
admin-compliance/lib/sdk/drafting-engine/intent-classifier.ts
Normal file
241
admin-compliance/lib/sdk/drafting-engine/intent-classifier.ts
Normal file
@@ -0,0 +1,241 @@
|
||||
/**
|
||||
* Intent Classifier - Leichtgewichtiger Pattern-Matcher
|
||||
*
|
||||
* Erkennt den Agent-Modus anhand des Nutzer-Inputs ohne LLM-Call.
|
||||
* Deutsche und englische Muster werden unterstuetzt.
|
||||
*
|
||||
* Confidence-Schwellen:
|
||||
* - >0.8: Hohe Sicherheit, automatisch anwenden
|
||||
* - 0.6-0.8: Mittel, Nutzer kann bestaetigen
|
||||
* - <0.6: Fallback zu 'explain'
|
||||
*/
|
||||
|
||||
import type { AgentMode, IntentClassification } from './types'
|
||||
import type { ScopeDocumentType } from '../compliance-scope-types'
|
||||
|
||||
// ============================================================================
|
||||
// Pattern Definitions
|
||||
// ============================================================================
|
||||
|
||||
interface ModePattern {
|
||||
mode: AgentMode
|
||||
patterns: RegExp[]
|
||||
/** Base-Confidence wenn ein Pattern matched */
|
||||
baseConfidence: number
|
||||
}
|
||||
|
||||
const MODE_PATTERNS: ModePattern[] = [
|
||||
{
|
||||
mode: 'draft',
|
||||
baseConfidence: 0.85,
|
||||
patterns: [
|
||||
/\b(erstell|generier|entw[iu]rf|entwer[ft]|schreib|verfass|formulier|anlege)/i,
|
||||
/\b(draft|create|generate|write|compose)\b/i,
|
||||
/\b(neues?\s+(?:vvt|tom|dsfa|dokument|loeschkonzept|datenschutzerklaerung))\b/i,
|
||||
/\b(vorlage|template)\s+(erstell|generier)/i,
|
||||
/\bfuer\s+(?:uns|mich|unser)\b.*\b(erstell|schreib)/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
mode: 'validate',
|
||||
baseConfidence: 0.80,
|
||||
patterns: [
|
||||
/\b(pruef|validier|check|kontrollier|ueberpruef)\b/i,
|
||||
/\b(korrekt|richtig|vollstaendig|konsistent|komplett)\b.*\?/i,
|
||||
/\b(stimmt|passt)\b.*\b(das|mein|unser)\b/i,
|
||||
/\b(validate|verify|check|review)\b/i,
|
||||
/\b(fehler|luecken?|maengel)\b.*\b(find|such|zeig)\b/i,
|
||||
/\bcross[\s-]?check\b/i,
|
||||
/\b(vvt|tom|dsfa)\b.*\b(konsisten[tz]|widerspruch|uebereinstimm)/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
mode: 'ask',
|
||||
baseConfidence: 0.75,
|
||||
patterns: [
|
||||
/\bwas\s+fehlt\b/i,
|
||||
/\b(luecken?|gaps?)\b.*\b(zeig|find|identifizier|analysier)/i,
|
||||
/\b(unvollstaendig|unfertig|offen)\b/i,
|
||||
/\bwelche\s+(dokumente?|informationen?|daten)\b.*\b(fehlen?|brauch|benoetig)/i,
|
||||
/\b(naechste[rn]?\s+schritt|next\s+step|todo)\b/i,
|
||||
/\bworan\s+(muss|soll)\b/i,
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
/** Dokumenttyp-Erkennung */
|
||||
const DOCUMENT_TYPE_PATTERNS: Array<{
|
||||
type: ScopeDocumentType
|
||||
patterns: RegExp[]
|
||||
}> = [
|
||||
{
|
||||
type: 'vvt',
|
||||
patterns: [
|
||||
/\bv{1,2}t\b/i,
|
||||
/\bverarbeitungsverzeichnis\b/i,
|
||||
/\bverarbeitungstaetigkeit/i,
|
||||
/\bprocessing\s+activit/i,
|
||||
/\bart\.?\s*30\b/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'tom',
|
||||
patterns: [
|
||||
/\btom\b/i,
|
||||
/\btechnisch.*organisatorisch.*massnahm/i,
|
||||
/\bart\.?\s*32\b/i,
|
||||
/\bsicherheitsmassnahm/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'dsfa',
|
||||
patterns: [
|
||||
/\bdsfa\b/i,
|
||||
/\bdatenschutz[\s-]?folgenabschaetzung\b/i,
|
||||
/\bdpia\b/i,
|
||||
/\bart\.?\s*35\b/i,
|
||||
/\bimpact\s+assessment\b/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'dsi',
|
||||
patterns: [
|
||||
/\bdatenschutzerklaerung\b/i,
|
||||
/\bprivacy\s+policy\b/i,
|
||||
/\bdsi\b/i,
|
||||
/\bart\.?\s*13\b/i,
|
||||
/\bart\.?\s*14\b/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'lf',
|
||||
patterns: [
|
||||
/\bloeschfrist/i,
|
||||
/\bloeschkonzept/i,
|
||||
/\bretention/i,
|
||||
/\baufbewahr/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'av_vertrag',
|
||||
patterns: [
|
||||
/\bavv?\b/i,
|
||||
/\bauftragsverarbeit/i,
|
||||
/\bdata\s+processing\s+agreement/i,
|
||||
/\bart\.?\s*28\b/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'betroffenenrechte',
|
||||
patterns: [
|
||||
/\bbetroffenenrecht/i,
|
||||
/\bdata\s+subject\s+right/i,
|
||||
/\bart\.?\s*15\b/i,
|
||||
/\bauskunft/i,
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'einwilligung',
|
||||
patterns: [
|
||||
/\beinwillig/i,
|
||||
/\bconsent/i,
|
||||
/\bcookie/i,
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
// ============================================================================
|
||||
// Classifier
|
||||
// ============================================================================
|
||||
|
||||
export class IntentClassifier {
|
||||
|
||||
/**
|
||||
* Klassifiziert die Nutzerabsicht anhand des Inputs.
|
||||
*
|
||||
* @param input - Die Nutzer-Nachricht
|
||||
* @returns IntentClassification mit Mode, Confidence, Patterns
|
||||
*/
|
||||
classify(input: string): IntentClassification {
|
||||
const normalized = this.normalize(input)
|
||||
let bestMatch: IntentClassification = {
|
||||
mode: 'explain',
|
||||
confidence: 0.3,
|
||||
matchedPatterns: [],
|
||||
}
|
||||
|
||||
for (const modePattern of MODE_PATTERNS) {
|
||||
const matched: string[] = []
|
||||
|
||||
for (const pattern of modePattern.patterns) {
|
||||
if (pattern.test(normalized)) {
|
||||
matched.push(pattern.source)
|
||||
}
|
||||
}
|
||||
|
||||
if (matched.length > 0) {
|
||||
// Mehr Matches = hoehere Confidence (bis zum Maximum)
|
||||
const matchBonus = Math.min(matched.length - 1, 2) * 0.05
|
||||
const confidence = Math.min(modePattern.baseConfidence + matchBonus, 0.99)
|
||||
|
||||
if (confidence > bestMatch.confidence) {
|
||||
bestMatch = {
|
||||
mode: modePattern.mode,
|
||||
confidence,
|
||||
matchedPatterns: matched,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dokumenttyp erkennen
|
||||
const detectedDocType = this.detectDocumentType(normalized)
|
||||
if (detectedDocType) {
|
||||
bestMatch.detectedDocumentType = detectedDocType
|
||||
// Dokumenttyp-Erkennung erhoeht Confidence leicht
|
||||
bestMatch.confidence = Math.min(bestMatch.confidence + 0.05, 0.99)
|
||||
}
|
||||
|
||||
// Fallback: Bei Confidence <0.6 immer 'explain'
|
||||
if (bestMatch.confidence < 0.6) {
|
||||
bestMatch.mode = 'explain'
|
||||
}
|
||||
|
||||
return bestMatch
|
||||
}
|
||||
|
||||
/**
|
||||
* Erkennt den Dokumenttyp aus dem Input.
|
||||
*/
|
||||
detectDocumentType(input: string): ScopeDocumentType | undefined {
|
||||
const normalized = this.normalize(input)
|
||||
|
||||
for (const docPattern of DOCUMENT_TYPE_PATTERNS) {
|
||||
for (const pattern of docPattern.patterns) {
|
||||
if (pattern.test(normalized)) {
|
||||
return docPattern.type
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalisiert den Input fuer Pattern-Matching.
|
||||
* Ersetzt Umlaute, entfernt Sonderzeichen.
|
||||
*/
|
||||
private normalize(input: string): string {
|
||||
return input
|
||||
.replace(/ä/g, 'ae')
|
||||
.replace(/ö/g, 'oe')
|
||||
.replace(/ü/g, 'ue')
|
||||
.replace(/ß/g, 'ss')
|
||||
.replace(/Ä/g, 'Ae')
|
||||
.replace(/Ö/g, 'Oe')
|
||||
.replace(/Ü/g, 'Ue')
|
||||
}
|
||||
}
|
||||
|
||||
/** Singleton-Instanz */
|
||||
export const intentClassifier = new IntentClassifier()
|
||||
Reference in New Issue
Block a user