All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 35s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 24s
CI / test-python-dsms-gateway (push) Successful in 23s
- DOCUMENT_SDK_STEP_MAP: 12 kaputte URLs korrigiert (z.B. /sdk/loeschkonzept → /sdk/loeschfristen) - Go Backend: iace_ce_assessment zur validTypes-Whitelist hinzugefuegt - SOUL-Datei: von 17 auf ~80 Zeilen erweitert (18 draftbare Typen, Redirects, operative Module) - Intent Classifier: 10 fehlende Dokumenttyp-Patterns + 5 Redirect-Patterns (Impressum/AGB/Widerruf → Document Generator) - State Projector: getExistingDocumentTypes von 6 auf 11 Checks erweitert (risks, escalations, iace, obligations, dsr) - DraftingEngineWidget: Gap-Banner fuer kritische Luecken mit Analysieren-Button - Cross-Validation: 4 neue deterministische Regeln (DSFA-NO-VVT, DSFA-NO-TOM, DSI-NO-LF, AV-NO-VVT) - Prose Blocks: 5 neue Dokumenttypen (av_vertrag, betroffenenrechte, risikoanalyse, notfallplan, iace_ce_assessment) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
374 lines
9.2 KiB
TypeScript
374 lines
9.2 KiB
TypeScript
/**
|
|
* Intent Classifier - Leichtgewichtiger Pattern-Matcher
|
|
*
|
|
* Erkennt den Agent-Modus anhand des Nutzer-Inputs ohne LLM-Call.
|
|
* Deutsche und englische Muster werden unterstuetzt.
|
|
*
|
|
* Confidence-Schwellen:
|
|
* - >0.8: Hohe Sicherheit, automatisch anwenden
|
|
* - 0.6-0.8: Mittel, Nutzer kann bestaetigen
|
|
* - <0.6: Fallback zu 'explain'
|
|
*/
|
|
|
|
import type { AgentMode, IntentClassification } from './types'
|
|
import type { ScopeDocumentType } from '../compliance-scope-types'
|
|
|
|
// ============================================================================
|
|
// Pattern Definitions
|
|
// ============================================================================
|
|
|
|
interface ModePattern {
|
|
mode: AgentMode
|
|
patterns: RegExp[]
|
|
/** Base-Confidence wenn ein Pattern matched */
|
|
baseConfidence: number
|
|
}
|
|
|
|
const MODE_PATTERNS: ModePattern[] = [
|
|
{
|
|
mode: 'draft',
|
|
baseConfidence: 0.85,
|
|
patterns: [
|
|
/\b(erstell|generier|entw[iu]rf|entwer[ft]|schreib|verfass|formulier|anlege)/i,
|
|
/\b(draft|create|generate|write|compose)\b/i,
|
|
/\b(neues?\s+(?:vvt|tom|dsfa|dokument|loeschkonzept|datenschutzerklaerung))\b/i,
|
|
/\b(vorlage|template)\s+(erstell|generier)/i,
|
|
/\bfuer\s+(?:uns|mich|unser)\b.*\b(erstell|schreib)/i,
|
|
],
|
|
},
|
|
{
|
|
mode: 'validate',
|
|
baseConfidence: 0.80,
|
|
patterns: [
|
|
/\b(pruef|validier|check|kontrollier|ueberpruef)\b/i,
|
|
/\b(korrekt|richtig|vollstaendig|konsistent|komplett)\b.*\?/i,
|
|
/\b(stimmt|passt)\b.*\b(das|mein|unser)\b/i,
|
|
/\b(validate|verify|check|review)\b/i,
|
|
/\b(fehler|luecken?|maengel)\b.*\b(find|such|zeig)\b/i,
|
|
/\bcross[\s-]?check\b/i,
|
|
/\b(vvt|tom|dsfa)\b.*\b(konsisten[tz]|widerspruch|uebereinstimm)/i,
|
|
],
|
|
},
|
|
{
|
|
mode: 'ask',
|
|
baseConfidence: 0.75,
|
|
patterns: [
|
|
/\bwas\s+fehlt\b/i,
|
|
/\b(luecken?|gaps?)\b.*\b(zeig|find|identifizier|analysier)/i,
|
|
/\b(unvollstaendig|unfertig|offen)\b/i,
|
|
/\bwelche\s+(dokumente?|informationen?|daten)\b.*\b(fehlen?|brauch|benoetig)/i,
|
|
/\b(naechste[rn]?\s+schritt|next\s+step|todo)\b/i,
|
|
/\bworan\s+(muss|soll)\b/i,
|
|
],
|
|
},
|
|
]
|
|
|
|
/** Dokumenttyp-Erkennung */
|
|
const DOCUMENT_TYPE_PATTERNS: Array<{
|
|
type: ScopeDocumentType
|
|
patterns: RegExp[]
|
|
}> = [
|
|
{
|
|
type: 'vvt',
|
|
patterns: [
|
|
/\bv{1,2}t\b/i,
|
|
/\bverarbeitungsverzeichnis\b/i,
|
|
/\bverarbeitungstaetigkeit/i,
|
|
/\bprocessing\s+activit/i,
|
|
/\bart\.?\s*30\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'tom',
|
|
patterns: [
|
|
/\btom\b/i,
|
|
/\btechnisch.*organisatorisch.*massnahm/i,
|
|
/\bart\.?\s*32\b/i,
|
|
/\bsicherheitsmassnahm/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'dsfa',
|
|
patterns: [
|
|
/\bdsfa\b/i,
|
|
/\bdatenschutz[\s-]?folgenabschaetzung\b/i,
|
|
/\bdpia\b/i,
|
|
/\bart\.?\s*35\b/i,
|
|
/\bimpact\s+assessment\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'dsi',
|
|
patterns: [
|
|
/\bdatenschutzerklaerung\b/i,
|
|
/\bprivacy\s+policy\b/i,
|
|
/\bdsi\b/i,
|
|
/\bart\.?\s*13\b/i,
|
|
/\bart\.?\s*14\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'lf',
|
|
patterns: [
|
|
/\bloeschfrist/i,
|
|
/\bloeschkonzept/i,
|
|
/\bretention/i,
|
|
/\baufbewahr/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'av_vertrag',
|
|
patterns: [
|
|
/\bavv?\b/i,
|
|
/\bauftragsverarbeit/i,
|
|
/\bdata\s+processing\s+agreement/i,
|
|
/\bart\.?\s*28\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'betroffenenrechte',
|
|
patterns: [
|
|
/\bbetroffenenrecht/i,
|
|
/\bdata\s+subject\s+right/i,
|
|
/\bart\.?\s*15\b/i,
|
|
/\bauskunft/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'einwilligung',
|
|
patterns: [
|
|
/\beinwillig/i,
|
|
/\bconsent/i,
|
|
/\bcookie/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'datenpannen',
|
|
patterns: [
|
|
/\bdatenpanne/i,
|
|
/\bdata\s*breach/i,
|
|
/\bart\.?\s*33\b/i,
|
|
/\bsicherheitsvorfall/i,
|
|
/\bincident/i,
|
|
/\bmelde.*vorfall/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'daten_transfer',
|
|
patterns: [
|
|
/\bdrittland/i,
|
|
/\btransfer/i,
|
|
/\bscc\b/i,
|
|
/\bstandardvertragsklausel/i,
|
|
/\bart\.?\s*44\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'vertragsmanagement',
|
|
patterns: [
|
|
/\bvertragsmanagement/i,
|
|
/\bcontract\s*management/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'schulung',
|
|
patterns: [
|
|
/\bschulung/i,
|
|
/\btraining/i,
|
|
/\bawareness/i,
|
|
/\bmitarbeiterschulung/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'audit_log',
|
|
patterns: [
|
|
/\baudit/i,
|
|
/\blogging\b/i,
|
|
/\bprotokollierung/i,
|
|
/\bart\.?\s*5\s*abs\.?\s*2\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'risikoanalyse',
|
|
patterns: [
|
|
/\brisikoanalyse/i,
|
|
/\brisk\s*assessment/i,
|
|
/\brisikobewertung/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'notfallplan',
|
|
patterns: [
|
|
/\bnotfallplan/i,
|
|
/\bkrisenmanagement/i,
|
|
/\bbusiness\s*continuity/i,
|
|
/\bnotfall/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'zertifizierung',
|
|
patterns: [
|
|
/\bzertifizierung/i,
|
|
/\biso\s*27001\b/i,
|
|
/\biso\s*27701\b/i,
|
|
/\bart\.?\s*42\b/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'datenschutzmanagement',
|
|
patterns: [
|
|
/\bdsms\b/i,
|
|
/\bdatenschutzmanagement/i,
|
|
/\bpdca/i,
|
|
],
|
|
},
|
|
{
|
|
type: 'iace_ce_assessment',
|
|
patterns: [
|
|
/\biace\b/i,
|
|
/\bce[\s-]?kennzeichnung/i,
|
|
/\bai\s*act\b/i,
|
|
/\bki[\s-]?verordnung/i,
|
|
],
|
|
},
|
|
]
|
|
|
|
// ============================================================================
|
|
// Redirect Patterns (nicht-draftbare Dokumente → Document Generator)
|
|
// ============================================================================
|
|
|
|
const REDIRECT_PATTERNS: Array<{
|
|
pattern: RegExp
|
|
response: string
|
|
}> = [
|
|
{
|
|
pattern: /\bimpressum\b/i,
|
|
response: 'Impressum-Templates finden Sie unter /sdk/document-generator → Kategorie "Impressum". Der Drafting Agent erstellt keine Impressen, da diese nach DDG §5 unternehmensspezifisch sind.',
|
|
},
|
|
{
|
|
pattern: /\b(agb|allgemeine.?geschaefts)/i,
|
|
response: 'AGB-Vorlagen erstellen Sie im Document Generator unter /sdk/document-generator → Kategorie "AGB". Der Drafting Agent erstellt keine AGB, da diese nach BGB §305ff individuell gestaltet werden muessen.',
|
|
},
|
|
{
|
|
pattern: /\bwiderruf/i,
|
|
response: 'Widerrufs-Templates finden Sie unter /sdk/document-generator → Kategorie "Widerruf".',
|
|
},
|
|
{
|
|
pattern: /\bnda\b/i,
|
|
response: 'NDA-Vorlagen finden Sie unter /sdk/document-generator.',
|
|
},
|
|
{
|
|
pattern: /\bsla\b/i,
|
|
response: 'SLA-Vorlagen finden Sie unter /sdk/document-generator.',
|
|
},
|
|
]
|
|
|
|
// ============================================================================
|
|
// Classifier
|
|
// ============================================================================
|
|
|
|
export class IntentClassifier {
|
|
|
|
/**
|
|
* Klassifiziert die Nutzerabsicht anhand des Inputs.
|
|
*
|
|
* @param input - Die Nutzer-Nachricht
|
|
* @returns IntentClassification mit Mode, Confidence, Patterns
|
|
*/
|
|
classify(input: string): IntentClassification {
|
|
const normalized = this.normalize(input)
|
|
|
|
// Redirect-Check: Nicht-draftbare Dokumente → Document Generator
|
|
for (const redirect of REDIRECT_PATTERNS) {
|
|
if (redirect.pattern.test(normalized)) {
|
|
return {
|
|
mode: 'explain',
|
|
confidence: 0.90,
|
|
matchedPatterns: [redirect.pattern.source],
|
|
suggestedResponse: redirect.response,
|
|
}
|
|
}
|
|
}
|
|
|
|
let bestMatch: IntentClassification = {
|
|
mode: 'explain',
|
|
confidence: 0.3,
|
|
matchedPatterns: [],
|
|
}
|
|
|
|
for (const modePattern of MODE_PATTERNS) {
|
|
const matched: string[] = []
|
|
|
|
for (const pattern of modePattern.patterns) {
|
|
if (pattern.test(normalized)) {
|
|
matched.push(pattern.source)
|
|
}
|
|
}
|
|
|
|
if (matched.length > 0) {
|
|
// Mehr Matches = hoehere Confidence (bis zum Maximum)
|
|
const matchBonus = Math.min(matched.length - 1, 2) * 0.05
|
|
const confidence = Math.min(modePattern.baseConfidence + matchBonus, 0.99)
|
|
|
|
if (confidence > bestMatch.confidence) {
|
|
bestMatch = {
|
|
mode: modePattern.mode,
|
|
confidence,
|
|
matchedPatterns: matched,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Dokumenttyp erkennen
|
|
const detectedDocType = this.detectDocumentType(normalized)
|
|
if (detectedDocType) {
|
|
bestMatch.detectedDocumentType = detectedDocType
|
|
// Dokumenttyp-Erkennung erhoeht Confidence leicht
|
|
bestMatch.confidence = Math.min(bestMatch.confidence + 0.05, 0.99)
|
|
}
|
|
|
|
// Fallback: Bei Confidence <0.6 immer 'explain'
|
|
if (bestMatch.confidence < 0.6) {
|
|
bestMatch.mode = 'explain'
|
|
}
|
|
|
|
return bestMatch
|
|
}
|
|
|
|
/**
|
|
* Erkennt den Dokumenttyp aus dem Input.
|
|
*/
|
|
detectDocumentType(input: string): ScopeDocumentType | undefined {
|
|
const normalized = this.normalize(input)
|
|
|
|
for (const docPattern of DOCUMENT_TYPE_PATTERNS) {
|
|
for (const pattern of docPattern.patterns) {
|
|
if (pattern.test(normalized)) {
|
|
return docPattern.type
|
|
}
|
|
}
|
|
}
|
|
|
|
return undefined
|
|
}
|
|
|
|
/**
|
|
* Normalisiert den Input fuer Pattern-Matching.
|
|
* Ersetzt Umlaute, entfernt Sonderzeichen.
|
|
*/
|
|
private normalize(input: string): string {
|
|
return input
|
|
.replace(/ä/g, 'ae')
|
|
.replace(/ö/g, 'oe')
|
|
.replace(/ü/g, 'ue')
|
|
.replace(/ß/g, 'ss')
|
|
.replace(/Ä/g, 'Ae')
|
|
.replace(/Ö/g, 'Oe')
|
|
.replace(/Ü/g, 'Ue')
|
|
}
|
|
}
|
|
|
|
/** Singleton-Instanz */
|
|
export const intentClassifier = new IntentClassifier()
|