All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
258 lines
7.8 KiB
TypeScript
258 lines
7.8 KiB
TypeScript
/**
|
|
* Allowed Facts Governance — Kontrolliertes Faktenbudget fuer LLM
|
|
*
|
|
* Definiert welche Fakten das LLM in Prosa-Bloecken verwenden darf
|
|
* und welche Themen explizit verboten sind.
|
|
*
|
|
* Verhindert Halluzinationen durch explizite Whitelisting.
|
|
*/
|
|
|
|
import type { SDKState, CompanyProfile } from '../types'
|
|
import type { NarrativeTags } from './narrative-tags'
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
/** Explizites Faktenbudget fuer das LLM */
|
|
export interface AllowedFacts {
|
|
// Firmenprofil
|
|
companyName: string
|
|
legalForm: string
|
|
industry: string
|
|
location: string
|
|
employeeCount: number
|
|
|
|
// Organisation
|
|
teamStructure: string
|
|
itLandscape: string
|
|
specialFeatures: string[]
|
|
|
|
// Compliance-Kontext
|
|
triggeredRegulations: string[]
|
|
primaryUseCases: string[]
|
|
|
|
// Narrative Tags (deterministisch)
|
|
narrativeTags: NarrativeTags
|
|
}
|
|
|
|
/** Regeln welche Themen erlaubt/verboten sind */
|
|
export interface FactPolicy {
|
|
allowedTopics: string[]
|
|
disallowedTopics: string[]
|
|
}
|
|
|
|
// ============================================================================
|
|
// Default Policy
|
|
// ============================================================================
|
|
|
|
export const DEFAULT_FACT_POLICY: FactPolicy = {
|
|
allowedTopics: [
|
|
'Branche',
|
|
'Unternehmensgroesse',
|
|
'Teamstruktur',
|
|
'IT-Strategie',
|
|
'Regulatorischer Kontext',
|
|
'Anwendungsfaelle',
|
|
'Organisationsform',
|
|
'Standort',
|
|
'Rechtsform',
|
|
],
|
|
disallowedTopics: [
|
|
'Umsatz',
|
|
'Gewinn',
|
|
'Kundenzahlen',
|
|
'konkrete Zertifizierungen',
|
|
'interne Tool-Namen',
|
|
'Personennamen',
|
|
'E-Mail-Adressen',
|
|
'Telefonnummern',
|
|
'IP-Adressen',
|
|
'konkrete Prozentwerte',
|
|
'konkrete Scores',
|
|
'Compliance-Level-Bezeichnungen',
|
|
'interne Projektnamen',
|
|
'Passwoerter',
|
|
'API-Keys',
|
|
'Vertragsinhalte',
|
|
'Gehaltsinformationen',
|
|
],
|
|
}
|
|
|
|
// ============================================================================
|
|
// Builder
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Extrahiert AllowedFacts aus dem SDKState.
|
|
* Nur explizit freigegebene Felder werden uebernommen.
|
|
*/
|
|
export function buildAllowedFacts(
|
|
state: SDKState,
|
|
narrativeTags: NarrativeTags
|
|
): AllowedFacts {
|
|
const profile = state.companyProfile
|
|
const scope = state.complianceScope
|
|
|
|
return {
|
|
companyName: profile?.name ?? 'Unbekannt',
|
|
legalForm: profile?.legalForm ?? '',
|
|
industry: profile?.industry ?? '',
|
|
location: profile?.location ?? '',
|
|
employeeCount: profile?.employeeCount ?? 0,
|
|
|
|
teamStructure: deriveTeamStructure(profile),
|
|
itLandscape: deriveItLandscape(profile),
|
|
specialFeatures: deriveSpecialFeatures(profile),
|
|
|
|
triggeredRegulations: deriveTriggeredRegulations(scope),
|
|
primaryUseCases: derivePrimaryUseCases(state),
|
|
|
|
narrativeTags,
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Serialization
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Serialisiert AllowedFacts fuer den LLM-Prompt.
|
|
*/
|
|
export function allowedFactsToPromptString(facts: AllowedFacts): string {
|
|
const lines = [
|
|
`- Firma: ${facts.companyName}${facts.legalForm ? ` (${facts.legalForm})` : ''}`,
|
|
`- Branche: ${facts.industry || 'nicht angegeben'}`,
|
|
`- Standort: ${facts.location || 'nicht angegeben'}`,
|
|
`- Mitarbeiter: ${facts.employeeCount || 'nicht angegeben'}`,
|
|
`- Teamstruktur: ${facts.teamStructure || 'nicht angegeben'}`,
|
|
`- IT-Umgebung: ${facts.itLandscape || 'nicht angegeben'}`,
|
|
]
|
|
|
|
if (facts.triggeredRegulations.length > 0) {
|
|
lines.push(`- Relevante Regulierungen: ${facts.triggeredRegulations.join(', ')}`)
|
|
}
|
|
if (facts.primaryUseCases.length > 0) {
|
|
lines.push(`- Anwendungsfaelle: ${facts.primaryUseCases.join(', ')}`)
|
|
}
|
|
if (facts.specialFeatures.length > 0) {
|
|
lines.push(`- Besonderheiten: ${facts.specialFeatures.join(', ')}`)
|
|
}
|
|
|
|
return lines.join('\n')
|
|
}
|
|
|
|
/**
|
|
* Serialisiert die Disallowed Topics fuer den LLM-Prompt.
|
|
*/
|
|
export function disallowedTopicsToPromptString(policy: FactPolicy = DEFAULT_FACT_POLICY): string {
|
|
return policy.disallowedTopics.map(t => `- ${t}`).join('\n')
|
|
}
|
|
|
|
// ============================================================================
|
|
// Validation
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Prueft ob ein Text potentiell verbotene Themen enthaelt.
|
|
* Gibt eine Liste der erkannten Verstoesse zurueck.
|
|
*/
|
|
export function checkForDisallowedContent(
|
|
text: string,
|
|
policy: FactPolicy = DEFAULT_FACT_POLICY
|
|
): string[] {
|
|
const violations: string[] = []
|
|
const lower = text.toLowerCase()
|
|
|
|
// Prozentwerte
|
|
if (/\d+\s*%/.test(text)) {
|
|
violations.push('Konkrete Prozentwerte gefunden')
|
|
}
|
|
|
|
// Score-Muster
|
|
if (/score[:\s]*\d+/i.test(text)) {
|
|
violations.push('Konkrete Scores gefunden')
|
|
}
|
|
|
|
// Compliance-Level Bezeichnungen
|
|
if (/\b(L1|L2|L3|L4)\b/.test(text)) {
|
|
violations.push('Compliance-Level-Bezeichnungen (L1-L4) gefunden')
|
|
}
|
|
|
|
// E-Mail-Adressen
|
|
if (/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/.test(text)) {
|
|
violations.push('E-Mail-Adresse gefunden')
|
|
}
|
|
|
|
// Telefonnummern
|
|
if (/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/.test(text)) {
|
|
// Nur wenn es nicht die Mitarbeiterzahl ist (einstellig/zweistellig)
|
|
const matches = text.match(/(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g) || []
|
|
for (const m of matches) {
|
|
if (m.replace(/\D/g, '').length >= 6) {
|
|
violations.push('Telefonnummer gefunden')
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// IP-Adressen
|
|
if (/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/.test(text)) {
|
|
violations.push('IP-Adresse gefunden')
|
|
}
|
|
|
|
// Direkte Ansprache
|
|
if (/\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/.test(text)) {
|
|
violations.push('Direkte Ansprache (Sie/Ihr) gefunden')
|
|
}
|
|
|
|
return violations
|
|
}
|
|
|
|
// ============================================================================
|
|
// Private Helpers
|
|
// ============================================================================
|
|
|
|
function deriveTeamStructure(profile: CompanyProfile | null): string {
|
|
if (!profile) return ''
|
|
// Ableitung aus verfuegbaren Profildaten
|
|
if (profile.employeeCount > 500) return 'Konzernstruktur'
|
|
if (profile.employeeCount > 50) return 'mittelstaendisch'
|
|
return 'Kleinunternehmen'
|
|
}
|
|
|
|
function deriveItLandscape(profile: CompanyProfile | null): string {
|
|
if (!profile) return ''
|
|
return profile.businessModel?.includes('SaaS') ? 'Cloud-First' :
|
|
profile.businessModel?.includes('Cloud') ? 'Cloud-First' :
|
|
profile.isPublicSector ? 'On-Premise' : 'Hybrid'
|
|
}
|
|
|
|
function deriveSpecialFeatures(profile: CompanyProfile | null): string[] {
|
|
if (!profile) return []
|
|
const features: string[] = []
|
|
if (profile.isPublicSector) features.push('Oeffentlicher Sektor')
|
|
if (profile.employeeCount > 250) features.push('Grossunternehmen')
|
|
if (profile.dataProtectionOfficer) features.push('Interner DSB benannt')
|
|
return features
|
|
}
|
|
|
|
function deriveTriggeredRegulations(
|
|
scope: import('../compliance-scope-types').ComplianceScopeState | null
|
|
): string[] {
|
|
if (!scope?.decision) return ['DSGVO']
|
|
const regs = new Set<string>(['DSGVO'])
|
|
const triggers = scope.decision.triggeredHardTriggers || []
|
|
for (const t of triggers) {
|
|
if (t.rule.id.includes('ai_act') || t.rule.id.includes('ai-act')) regs.add('AI Act')
|
|
if (t.rule.id.includes('nis2') || t.rule.id.includes('NIS2')) regs.add('NIS2')
|
|
if (t.rule.id.includes('ttdsg') || t.rule.id.includes('TTDSG')) regs.add('TTDSG')
|
|
}
|
|
return Array.from(regs)
|
|
}
|
|
|
|
function derivePrimaryUseCases(state: SDKState): string[] {
|
|
if (!state.useCases || state.useCases.length === 0) return []
|
|
return state.useCases.slice(0, 3).map(uc => uc.name || uc.title || 'Unbenannt')
|
|
}
|