feat(sdk,iace): add Personalized Drafting Pipeline v2 and IACE engine
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
298
admin-compliance/lib/sdk/drafting-engine/sanitizer.ts
Normal file
298
admin-compliance/lib/sdk/drafting-engine/sanitizer.ts
Normal file
@@ -0,0 +1,298 @@
|
||||
/**
|
||||
* PII Sanitizer — Bereinigt Kontextdaten vor LLM-Aufruf
|
||||
*
|
||||
* Entfernt personenbezogene Daten (PII) aus AllowedFacts
|
||||
* bevor sie an das LLM weitergegeben werden.
|
||||
*
|
||||
* Bei Fehler: Hard Abort — kein LLM-Aufruf ohne erfolgreiche Sanitization.
|
||||
*/
|
||||
|
||||
import type { AllowedFacts } from './allowed-facts'
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
/** Bereinigtes Faktenbudget (PII-frei) */
|
||||
export type SanitizedFacts = AllowedFacts & {
|
||||
__sanitized: true
|
||||
}
|
||||
|
||||
/** Audit-Protokoll der Sanitization */
|
||||
export interface SanitizationAudit {
|
||||
sanitizationApplied: boolean
|
||||
redactedFieldsCount: number
|
||||
redactedFieldNames: string[]
|
||||
}
|
||||
|
||||
/** Ergebnis der Sanitization */
|
||||
export interface SanitizationResult {
|
||||
facts: SanitizedFacts
|
||||
audit: SanitizationAudit
|
||||
}
|
||||
|
||||
/** Sanitization-Fehler (loest Hard Abort aus) */
|
||||
export class SanitizationError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly field: string,
|
||||
public readonly reason: string
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'SanitizationError'
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// PII Detection Patterns
|
||||
// ============================================================================
|
||||
|
||||
const PII_PATTERNS = {
|
||||
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||||
phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{2,5}\)?[-.\s]?\d{3,10}/g,
|
||||
ipAddress: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
|
||||
internalId: /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi,
|
||||
apiKey: /\b(sk-|pk-|api[_-]?key[_-]?)[a-zA-Z0-9]{20,}\b/gi,
|
||||
} as const
|
||||
|
||||
// ============================================================================
|
||||
// Sanitizer
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Bereinigt AllowedFacts von PII vor dem LLM-Aufruf.
|
||||
*
|
||||
* @throws {SanitizationError} Wenn ein Feld nicht bereinigt werden kann
|
||||
*/
|
||||
export function sanitizeAllowedFacts(facts: AllowedFacts): SanitizationResult {
|
||||
const redactedFields: string[] = []
|
||||
|
||||
// Kopie erstellen
|
||||
const sanitized: AllowedFacts = {
|
||||
...facts,
|
||||
specialFeatures: [...facts.specialFeatures],
|
||||
triggeredRegulations: [...facts.triggeredRegulations],
|
||||
primaryUseCases: [...facts.primaryUseCases],
|
||||
narrativeTags: { ...facts.narrativeTags },
|
||||
}
|
||||
|
||||
// Firmenname: erlaubt (wird benoetigt), aber PII darin pruefen
|
||||
sanitized.companyName = sanitizeString(facts.companyName, 'companyName', redactedFields)
|
||||
|
||||
// Rechtsform: erlaubt, kurzer Wert
|
||||
sanitized.legalForm = sanitizeString(facts.legalForm, 'legalForm', redactedFields)
|
||||
|
||||
// Branche: erlaubt
|
||||
sanitized.industry = sanitizeString(facts.industry, 'industry', redactedFields)
|
||||
|
||||
// Standort: erlaubt (Stadt/Region), aber keine Strasse/Hausnummer
|
||||
sanitized.location = sanitizeAddress(facts.location, 'location', redactedFields)
|
||||
|
||||
// Mitarbeiterzahl: erlaubt (kein PII)
|
||||
// employeeCount bleibt unveraendert
|
||||
|
||||
// Teamstruktur: erlaubt (generisch)
|
||||
sanitized.teamStructure = sanitizeString(facts.teamStructure, 'teamStructure', redactedFields)
|
||||
|
||||
// IT-Landschaft: erlaubt (generisch)
|
||||
sanitized.itLandscape = sanitizeString(facts.itLandscape, 'itLandscape', redactedFields)
|
||||
|
||||
// Besonderheiten: pruefen
|
||||
sanitized.specialFeatures = facts.specialFeatures.map((f, i) =>
|
||||
sanitizeString(f, `specialFeatures[${i}]`, redactedFields)
|
||||
)
|
||||
|
||||
// Regulierungen: erlaubt (generisch)
|
||||
sanitized.triggeredRegulations = facts.triggeredRegulations.map((r, i) =>
|
||||
sanitizeString(r, `triggeredRegulations[${i}]`, redactedFields)
|
||||
)
|
||||
|
||||
// Use Cases: pruefen
|
||||
sanitized.primaryUseCases = facts.primaryUseCases.map((uc, i) =>
|
||||
sanitizeString(uc, `primaryUseCases[${i}]`, redactedFields)
|
||||
)
|
||||
|
||||
// Narrative Tags: deterministisch, kein PII moeglich
|
||||
// Bleiben unveraendert
|
||||
|
||||
return {
|
||||
facts: { ...sanitized, __sanitized: true } as SanitizedFacts,
|
||||
audit: {
|
||||
sanitizationApplied: true,
|
||||
redactedFieldsCount: redactedFields.length,
|
||||
redactedFieldNames: redactedFields,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prueft ob ein SanitizedFacts-Objekt tatsaechlich bereinigt wurde.
|
||||
*/
|
||||
export function isSanitized(facts: unknown): facts is SanitizedFacts {
|
||||
return (
|
||||
typeof facts === 'object' &&
|
||||
facts !== null &&
|
||||
'__sanitized' in facts &&
|
||||
(facts as SanitizedFacts).__sanitized === true
|
||||
)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Private Helpers
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Bereinigt einen String-Wert von PII.
|
||||
* Gibt den bereinigten String zurueck und fuegt redacted Fields hinzu.
|
||||
*/
|
||||
function sanitizeString(
|
||||
value: string,
|
||||
fieldName: string,
|
||||
redactedFields: string[]
|
||||
): string {
|
||||
if (!value) return value
|
||||
|
||||
let result = value
|
||||
let wasRedacted = false
|
||||
|
||||
// E-Mail-Adressen entfernen
|
||||
if (PII_PATTERNS.email.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.email, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
// Reset regex lastIndex
|
||||
PII_PATTERNS.email.lastIndex = 0
|
||||
|
||||
// Telefonnummern entfernen (nur wenn >= 6 Ziffern)
|
||||
const phoneMatches = result.match(PII_PATTERNS.phone)
|
||||
if (phoneMatches) {
|
||||
for (const match of phoneMatches) {
|
||||
if (match.replace(/\D/g, '').length >= 6) {
|
||||
result = result.replace(match, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
}
|
||||
}
|
||||
PII_PATTERNS.phone.lastIndex = 0
|
||||
|
||||
// IP-Adressen entfernen
|
||||
if (PII_PATTERNS.ipAddress.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.ipAddress, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
PII_PATTERNS.ipAddress.lastIndex = 0
|
||||
|
||||
// Interne IDs (UUIDs) entfernen
|
||||
if (PII_PATTERNS.internalId.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.internalId, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
PII_PATTERNS.internalId.lastIndex = 0
|
||||
|
||||
// API Keys entfernen
|
||||
if (PII_PATTERNS.apiKey.test(result)) {
|
||||
result = result.replace(PII_PATTERNS.apiKey, '[REDACTED]')
|
||||
wasRedacted = true
|
||||
}
|
||||
PII_PATTERNS.apiKey.lastIndex = 0
|
||||
|
||||
if (wasRedacted) {
|
||||
redactedFields.push(fieldName)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Bereinigt Adress-Felder: behaelt Stadt/Region, entfernt Strasse/Hausnummer.
|
||||
*/
|
||||
function sanitizeAddress(
|
||||
value: string,
|
||||
fieldName: string,
|
||||
redactedFields: string[]
|
||||
): string {
|
||||
if (!value) return value
|
||||
|
||||
// Zuerst generische PII-Bereinigung
|
||||
let result = sanitizeString(value, fieldName, redactedFields)
|
||||
|
||||
// Strasse + Hausnummer Pattern (deutsch)
|
||||
const streetPattern = /\b[A-ZÄÖÜ][a-zäöüß]+(?:straße|str\.|weg|gasse|platz|allee|ring|damm)\s*\d+[a-z]?\b/gi
|
||||
if (streetPattern.test(result)) {
|
||||
result = result.replace(streetPattern, '')
|
||||
if (!redactedFields.includes(fieldName)) {
|
||||
redactedFields.push(fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
// PLZ-Pattern (5-stellig deutsch)
|
||||
const plzPattern = /\b\d{5}\s+/g
|
||||
if (plzPattern.test(result)) {
|
||||
result = result.replace(plzPattern, '')
|
||||
if (!redactedFields.includes(fieldName)) {
|
||||
redactedFields.push(fieldName)
|
||||
}
|
||||
}
|
||||
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Validiert das gesamte SanitizedFacts-Objekt auf verbleibende PII.
|
||||
* Gibt Warnungen zurueck wenn doch noch PII gefunden wird.
|
||||
*/
|
||||
export function validateNoRemainingPII(facts: SanitizedFacts): string[] {
|
||||
const warnings: string[] = []
|
||||
const allValues = extractAllStringValues(facts)
|
||||
|
||||
for (const { path, value } of allValues) {
|
||||
if (path === '__sanitized') continue
|
||||
|
||||
PII_PATTERNS.email.lastIndex = 0
|
||||
if (PII_PATTERNS.email.test(value)) {
|
||||
warnings.push(`Verbleibende E-Mail in ${path}`)
|
||||
}
|
||||
|
||||
PII_PATTERNS.ipAddress.lastIndex = 0
|
||||
if (PII_PATTERNS.ipAddress.test(value)) {
|
||||
warnings.push(`Verbleibende IP-Adresse in ${path}`)
|
||||
}
|
||||
|
||||
PII_PATTERNS.apiKey.lastIndex = 0
|
||||
if (PII_PATTERNS.apiKey.test(value)) {
|
||||
warnings.push(`Verbleibender API-Key in ${path}`)
|
||||
}
|
||||
}
|
||||
|
||||
return warnings
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert alle String-Werte aus einem Objekt (rekursiv).
|
||||
*/
|
||||
function extractAllStringValues(
|
||||
obj: Record<string, unknown>,
|
||||
prefix = ''
|
||||
): Array<{ path: string; value: string }> {
|
||||
const results: Array<{ path: string; value: string }> = []
|
||||
|
||||
for (const [key, val] of Object.entries(obj)) {
|
||||
const path = prefix ? `${prefix}.${key}` : key
|
||||
|
||||
if (typeof val === 'string') {
|
||||
results.push({ path, value: val })
|
||||
} else if (Array.isArray(val)) {
|
||||
for (let i = 0; i < val.length; i++) {
|
||||
if (typeof val[i] === 'string') {
|
||||
results.push({ path: `${path}[${i}]`, value: val[i] })
|
||||
} else if (typeof val[i] === 'object' && val[i] !== null) {
|
||||
results.push(...extractAllStringValues(val[i] as Record<string, unknown>, `${path}[${i}]`))
|
||||
}
|
||||
}
|
||||
} else if (typeof val === 'object' && val !== null) {
|
||||
results.push(...extractAllStringValues(val as Record<string, unknown>, path))
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
Reference in New Issue
Block a user