Multi-Branche-Auswahl im CompanyProfile, erweiterte allowed-facts fuer Drafting Engine, Demo-Daten und TOM-Generator Anpassungen. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
280 lines
8.5 KiB
TypeScript
280 lines
8.5 KiB
TypeScript
/**
|
|
* Allowed Facts Governance — Kontrolliertes Faktenbudget fuer LLM
|
|
*
|
|
* Definiert welche Fakten das LLM in Prosa-Bloecken verwenden darf
|
|
* und welche Themen explizit verboten sind.
|
|
*
|
|
* Verhindert Halluzinationen durch explizite Whitelisting.
|
|
*/
|
|
|
|
import type { SDKState, CompanyProfile } from '../types'
|
|
import type { NarrativeTags } from './narrative-tags'
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
/** Explizites Faktenbudget fuer das LLM */
|
|
export interface AllowedFacts {
|
|
// Firmenprofil
|
|
companyName: string
|
|
legalForm: string
|
|
industry: string
|
|
location: string
|
|
employeeCount: number
|
|
|
|
// Organisation
|
|
teamStructure: string
|
|
itLandscape: string
|
|
specialFeatures: string[]
|
|
|
|
// Compliance-Kontext
|
|
triggeredRegulations: string[]
|
|
primaryUseCases: string[]
|
|
|
|
// Narrative Tags (deterministisch)
|
|
narrativeTags: NarrativeTags
|
|
}
|
|
|
|
/** Regeln welche Themen erlaubt/verboten sind */
|
|
export interface FactPolicy {
|
|
allowedTopics: string[]
|
|
disallowedTopics: string[]
|
|
}
|
|
|
|
// ============================================================================
|
|
// Default Policy
|
|
// ============================================================================
|
|
|
|
export const DEFAULT_FACT_POLICY: FactPolicy = {
|
|
allowedTopics: [
|
|
'Branche',
|
|
'Unternehmensgroesse',
|
|
'Teamstruktur',
|
|
'IT-Strategie',
|
|
'Regulatorischer Kontext',
|
|
'Anwendungsfaelle',
|
|
'Organisationsform',
|
|
'Standort',
|
|
'Rechtsform',
|
|
],
|
|
disallowedTopics: [
|
|
'Umsatz',
|
|
'Gewinn',
|
|
'Kundenzahlen',
|
|
'konkrete Zertifizierungen',
|
|
'interne Tool-Namen',
|
|
'Personennamen',
|
|
'E-Mail-Adressen',
|
|
'Telefonnummern',
|
|
'IP-Adressen',
|
|
'konkrete Prozentwerte',
|
|
'konkrete Scores',
|
|
'Compliance-Level-Bezeichnungen',
|
|
'interne Projektnamen',
|
|
'Passwoerter',
|
|
'API-Keys',
|
|
'Vertragsinhalte',
|
|
'Gehaltsinformationen',
|
|
],
|
|
}
|
|
|
|
// ============================================================================
|
|
// Builder
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Extrahiert AllowedFacts aus dem SDKState.
|
|
* Nur explizit freigegebene Felder werden uebernommen.
|
|
*/
|
|
export function buildAllowedFacts(
|
|
state: SDKState,
|
|
narrativeTags: NarrativeTags
|
|
): AllowedFacts {
|
|
const profile = state.companyProfile
|
|
const scope = state.complianceScope
|
|
|
|
return {
|
|
companyName: profile?.companyName ?? 'Unbekannt',
|
|
legalForm: profile?.legalForm ?? '',
|
|
industry: Array.isArray(profile?.industry) ? profile.industry.join(', ') : (profile?.industry ?? ''),
|
|
location: profile?.headquartersCity ?? '',
|
|
employeeCount: parseEmployeeCount(profile?.employeeCount),
|
|
|
|
teamStructure: deriveTeamStructure(profile),
|
|
itLandscape: deriveItLandscape(profile),
|
|
specialFeatures: deriveSpecialFeatures(profile),
|
|
|
|
triggeredRegulations: deriveTriggeredRegulations(scope),
|
|
primaryUseCases: derivePrimaryUseCases(state),
|
|
|
|
narrativeTags,
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Serialization
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Serialisiert AllowedFacts fuer den LLM-Prompt.
|
|
*/
|
|
export function allowedFactsToPromptString(facts: AllowedFacts): string {
|
|
const lines = [
|
|
`- Firma: ${facts.companyName}${facts.legalForm ? ` (${facts.legalForm})` : ''}`,
|
|
`- Branche: ${facts.industry || 'nicht angegeben'}`,
|
|
`- Standort: ${facts.location || 'nicht angegeben'}`,
|
|
`- Mitarbeiter: ${facts.employeeCount || 'nicht angegeben'}`,
|
|
`- Teamstruktur: ${facts.teamStructure || 'nicht angegeben'}`,
|
|
`- IT-Umgebung: ${facts.itLandscape || 'nicht angegeben'}`,
|
|
]
|
|
|
|
if (facts.triggeredRegulations.length > 0) {
|
|
lines.push(`- Relevante Regulierungen: ${facts.triggeredRegulations.join(', ')}`)
|
|
}
|
|
if (facts.primaryUseCases.length > 0) {
|
|
lines.push(`- Anwendungsfaelle: ${facts.primaryUseCases.join(', ')}`)
|
|
}
|
|
if (facts.specialFeatures.length > 0) {
|
|
lines.push(`- Besonderheiten: ${facts.specialFeatures.join(', ')}`)
|
|
}
|
|
|
|
return lines.join('\n')
|
|
}
|
|
|
|
/**
|
|
* Serialisiert die Disallowed Topics fuer den LLM-Prompt.
|
|
*/
|
|
export function disallowedTopicsToPromptString(policy: FactPolicy = DEFAULT_FACT_POLICY): string {
|
|
return policy.disallowedTopics.map(t => `- ${t}`).join('\n')
|
|
}
|
|
|
|
// ============================================================================
|
|
// Validation
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Prueft ob ein Text potentiell verbotene Themen enthaelt.
|
|
* Gibt eine Liste der erkannten Verstoesse zurueck.
|
|
*/
|
|
export function checkForDisallowedContent(
|
|
text: string,
|
|
policy: FactPolicy = DEFAULT_FACT_POLICY
|
|
): string[] {
|
|
const violations: string[] = []
|
|
const lower = text.toLowerCase()
|
|
|
|
// Prozentwerte
|
|
if (/\d+\s*%/.test(text)) {
|
|
violations.push('Konkrete Prozentwerte gefunden')
|
|
}
|
|
|
|
// Score-Muster
|
|
if (/score[:\s]*\d+/i.test(text)) {
|
|
violations.push('Konkrete Scores gefunden')
|
|
}
|
|
|
|
// Compliance-Level Bezeichnungen
|
|
if (/\b(L1|L2|L3|L4)\b/.test(text)) {
|
|
violations.push('Compliance-Level-Bezeichnungen (L1-L4) gefunden')
|
|
}
|
|
|
|
// E-Mail-Adressen
|
|
if (/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/.test(text)) {
|
|
violations.push('E-Mail-Adresse gefunden')
|
|
}
|
|
|
|
// Telefonnummern
|
|
if (/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/.test(text)) {
|
|
// Nur wenn es nicht die Mitarbeiterzahl ist (einstellig/zweistellig)
|
|
const matches = text.match(/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g) || []
|
|
for (const m of matches) {
|
|
if (m.replace(/\D/g, '').length >= 6) {
|
|
violations.push('Telefonnummer gefunden')
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// IP-Adressen
|
|
if (/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/.test(text)) {
|
|
violations.push('IP-Adresse gefunden')
|
|
}
|
|
|
|
// Direkte Ansprache
|
|
if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(text)) {
|
|
violations.push('Direkte Ansprache (Sie/Ihr) gefunden')
|
|
}
|
|
|
|
return violations
|
|
}
|
|
|
|
// ============================================================================
|
|
// Private Helpers
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Parst den employeeCount-String (z.B. "1-9", "50-249", "1000+") in eine Zahl.
|
|
* Verwendet den Mittelwert des Bereichs oder den unteren Wert bei "+".
|
|
*/
|
|
function parseEmployeeCount(value: string | undefined | null): number {
|
|
if (!value) return 0
|
|
// Handle "1000+" style
|
|
const plusMatch = value.match(/^(\d+)\+$/)
|
|
if (plusMatch) return parseInt(plusMatch[1], 10)
|
|
// Handle "50-249" style ranges
|
|
const rangeMatch = value.match(/^(\d+)-(\d+)$/)
|
|
if (rangeMatch) {
|
|
const low = parseInt(rangeMatch[1], 10)
|
|
const high = parseInt(rangeMatch[2], 10)
|
|
return Math.round((low + high) / 2)
|
|
}
|
|
// Try plain number
|
|
const num = parseInt(value, 10)
|
|
return isNaN(num) ? 0 : num
|
|
}
|
|
|
|
function deriveTeamStructure(profile: CompanyProfile | null): string {
|
|
if (!profile) return ''
|
|
// Ableitung aus verfuegbaren Profildaten
|
|
const count = parseEmployeeCount(profile.employeeCount)
|
|
if (count > 500) return 'Konzernstruktur'
|
|
if (count > 50) return 'mittelstaendisch'
|
|
return 'Kleinunternehmen'
|
|
}
|
|
|
|
function deriveItLandscape(profile: CompanyProfile | null): string {
|
|
if (!profile) return ''
|
|
return profile.businessModel?.includes('SaaS') ? 'Cloud-First' :
|
|
profile.businessModel?.includes('Cloud') ? 'Cloud-First' :
|
|
'Hybrid'
|
|
}
|
|
|
|
function deriveSpecialFeatures(profile: CompanyProfile | null): string[] {
|
|
if (!profile) return []
|
|
const features: string[] = []
|
|
const count = parseEmployeeCount(profile.employeeCount)
|
|
if (count > 250) features.push('Grossunternehmen')
|
|
if (profile.dpoName) features.push('Interner DSB benannt')
|
|
return features
|
|
}
|
|
|
|
function deriveTriggeredRegulations(
|
|
scope: import('../compliance-scope-types').ComplianceScopeState | null
|
|
): string[] {
|
|
if (!scope?.decision) return ['DSGVO']
|
|
const regs = new Set<string>(['DSGVO'])
|
|
const triggers = scope.decision.triggeredHardTriggers || []
|
|
for (const t of triggers) {
|
|
if (t.rule.id.includes('ai_act') || t.rule.id.includes('ai-act')) regs.add('AI Act')
|
|
if (t.rule.id.includes('nis2') || t.rule.id.includes('NIS2')) regs.add('NIS2')
|
|
if (t.rule.id.includes('ttdsg') || t.rule.id.includes('TTDSG')) regs.add('TTDSG')
|
|
}
|
|
return Array.from(regs)
|
|
}
|
|
|
|
function derivePrimaryUseCases(state: SDKState): string[] {
|
|
if (!state.useCases || state.useCases.length === 0) return []
|
|
return state.useCases.slice(0, 3).map(uc => uc.name || 'Unbenannt')
|
|
}
|