feat(sdk,iace): add Personalized Drafting Pipeline v2 and IACE engine
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
184
admin-compliance/lib/sdk/drafting-engine/terminology.ts
Normal file
184
admin-compliance/lib/sdk/drafting-engine/terminology.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Terminology Guide & Style Contract — Konsistente Fachbegriffe
|
||||
*
|
||||
* Stellt sicher, dass alle Prosa-Bloecke eines Dokuments
|
||||
* dieselben Fachbegriffe und denselben Schreibstil verwenden.
|
||||
*
|
||||
* 100% deterministisch.
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Terminology Guide
|
||||
// ============================================================================
|
||||
|
||||
export interface TerminologyGuide {
|
||||
/** DSGVO-Begriffe */
|
||||
dsgvo: Record<string, string>
|
||||
/** TOM-Begriffe */
|
||||
tom: Record<string, string>
|
||||
/** Allgemeine Compliance-Begriffe */
|
||||
general: Record<string, string>
|
||||
}
|
||||
|
||||
export const DEFAULT_TERMINOLOGY: TerminologyGuide = {
|
||||
dsgvo: {
|
||||
controller: 'Verantwortlicher',
|
||||
processor: 'Auftragsverarbeiter',
|
||||
data_subject: 'betroffene Person',
|
||||
processing: 'Verarbeitung',
|
||||
personal_data: 'personenbezogene Daten',
|
||||
consent: 'Einwilligung',
|
||||
dpia: 'Datenschutz-Folgenabschaetzung (DSFA)',
|
||||
legitimate_interest: 'berechtigtes Interesse',
|
||||
data_breach: 'Verletzung des Schutzes personenbezogener Daten',
|
||||
dpo: 'Datenschutzbeauftragter (DSB)',
|
||||
supervisory_authority: 'Aufsichtsbehoerde',
|
||||
ropa: 'Verzeichnis von Verarbeitungstaetigkeiten (VVT)',
|
||||
retention_period: 'Aufbewahrungsfrist',
|
||||
erasure: 'Loeschung',
|
||||
restriction: 'Einschraenkung der Verarbeitung',
|
||||
portability: 'Datenportabilitaet',
|
||||
third_country: 'Drittland',
|
||||
adequacy_decision: 'Angemessenheitsbeschluss',
|
||||
scc: 'Standardvertragsklauseln (SCC)',
|
||||
},
|
||||
tom: {
|
||||
access_control: 'Zutrittskontrolle',
|
||||
access_management: 'Zugangskontrolle',
|
||||
authorization: 'Zugriffskontrolle',
|
||||
encryption: 'Verschluesselung',
|
||||
pseudonymization: 'Pseudonymisierung',
|
||||
availability: 'Verfuegbarkeitskontrolle',
|
||||
resilience: 'Belastbarkeit',
|
||||
recoverability: 'Wiederherstellbarkeit',
|
||||
audit_logging: 'Protokollierung',
|
||||
separation: 'Trennungsgebot',
|
||||
input_control: 'Eingabekontrolle',
|
||||
transport_control: 'Weitergabekontrolle',
|
||||
order_control: 'Auftragskontrolle',
|
||||
},
|
||||
general: {
|
||||
risk_assessment: 'Risikobewertung',
|
||||
audit_trail: 'Pruefpfad',
|
||||
compliance_level: 'Compliance-Tiefe',
|
||||
gap_analysis: 'Lueckenanalyse',
|
||||
remediation: 'Massnahmenplan',
|
||||
incident_response: 'Vorfallreaktion',
|
||||
business_continuity: 'Geschaeftskontinuitaet',
|
||||
vendor_management: 'Dienstleistermanagement',
|
||||
awareness_training: 'Sensibilisierungsschulung',
|
||||
},
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Style Contract
|
||||
// ============================================================================
|
||||
|
||||
export interface StyleContract {
|
||||
/** Anrede-Stil */
|
||||
addressing: '3rd_person_company'
|
||||
/** Tonalitaet */
|
||||
tone: 'formal_legal_plain'
|
||||
/** Verbotene Formulierungen */
|
||||
forbid: string[]
|
||||
}
|
||||
|
||||
export const DEFAULT_STYLE_CONTRACT: StyleContract = {
|
||||
addressing: '3rd_person_company',
|
||||
tone: 'formal_legal_plain',
|
||||
forbid: [
|
||||
'Denglisch',
|
||||
'Marketing-Sprache',
|
||||
'Superlative',
|
||||
'Direkte Ansprache',
|
||||
'Umgangssprache',
|
||||
'Konjunktiv-Ketten',
|
||||
],
|
||||
}
|
||||
|
||||
/** Konkrete Regex-Muster fuer verbotene Formulierungen */
|
||||
export const STYLE_VIOLATION_PATTERNS: Array<{ name: string; pattern: RegExp }> = [
|
||||
{ name: 'Direkte Ansprache', pattern: /\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/ },
|
||||
{ name: 'Superlative', pattern: /\b(bestmoeglich|hoechstmoeglich|optimal|perfekt|einzigartig)\b/i },
|
||||
{ name: 'Marketing-Sprache', pattern: /\b(revolutionaer|bahnbrechend|innovativ|fuehrend|erstklassig)\b/i },
|
||||
{ name: 'Umgangssprache', pattern: /\b(super|toll|mega|krass|cool|easy)\b/i },
|
||||
{ name: 'Denglisch', pattern: /\b(State of the Art|Best Practice|Compliance Journey|Data Driven)\b/i },
|
||||
]
|
||||
|
||||
// ============================================================================
|
||||
// Serialization
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Serialisiert den Terminology Guide fuer den LLM-Prompt.
|
||||
* Gibt nur die haeufigsten Begriffe aus (Token-Budget).
|
||||
*/
|
||||
export function terminologyToPromptString(guide: TerminologyGuide = DEFAULT_TERMINOLOGY): string {
|
||||
const keyTerms = [
|
||||
...Object.entries(guide.dsgvo).slice(0, 10),
|
||||
...Object.entries(guide.tom).slice(0, 6),
|
||||
...Object.entries(guide.general).slice(0, 4),
|
||||
]
|
||||
return keyTerms.map(([key, value]) => ` ${key}: "${value}"`).join('\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialisiert den Style Contract fuer den LLM-Prompt.
|
||||
*/
|
||||
export function styleContractToPromptString(style: StyleContract = DEFAULT_STYLE_CONTRACT): string {
|
||||
return [
|
||||
`Anrede: Dritte Person ("Die [Firmenname]...", NICHT "Sie...")`,
|
||||
`Ton: Professionell, juristisch korrekt, aber verstaendlich`,
|
||||
`Verboten: ${style.forbid.join(', ')}`,
|
||||
].join('\n')
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Validation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Prueft einen Text auf Style-Verstoesse.
|
||||
* Gibt eine Liste der gefundenen Verstoesse zurueck.
|
||||
*/
|
||||
export function checkStyleViolations(text: string): string[] {
|
||||
const violations: string[] = []
|
||||
for (const { name, pattern } of STYLE_VIOLATION_PATTERNS) {
|
||||
if (pattern.test(text)) {
|
||||
violations.push(`Style-Verstoss: ${name}`)
|
||||
}
|
||||
}
|
||||
return violations
|
||||
}
|
||||
|
||||
/**
|
||||
* Prueft ob die Terminologie korrekt verwendet wird.
|
||||
* Gibt Warnungen zurueck wenn falsche Begriffe erkannt werden.
|
||||
*/
|
||||
export function checkTerminologyUsage(
|
||||
text: string,
|
||||
guide: TerminologyGuide = DEFAULT_TERMINOLOGY
|
||||
): string[] {
|
||||
const warnings: string[] = []
|
||||
const lower = text.toLowerCase()
|
||||
|
||||
// Prüfe ob englische Begriffe statt deutscher verwendet werden
|
||||
const termChecks: Array<{ wrong: string; correct: string }> = [
|
||||
{ wrong: 'data controller', correct: guide.dsgvo.controller },
|
||||
{ wrong: 'data processor', correct: guide.dsgvo.processor },
|
||||
{ wrong: 'data subject', correct: guide.dsgvo.data_subject },
|
||||
{ wrong: 'personal data', correct: guide.dsgvo.personal_data },
|
||||
{ wrong: 'data breach', correct: guide.dsgvo.data_breach },
|
||||
{ wrong: 'encryption', correct: guide.tom.encryption },
|
||||
{ wrong: 'pseudonymization', correct: guide.tom.pseudonymization },
|
||||
{ wrong: 'risk assessment', correct: guide.general.risk_assessment },
|
||||
]
|
||||
|
||||
for (const { wrong, correct } of termChecks) {
|
||||
if (lower.includes(wrong.toLowerCase())) {
|
||||
warnings.push(`Englischer Begriff "${wrong}" gefunden — verwende "${correct}"`)
|
||||
}
|
||||
}
|
||||
|
||||
return warnings
|
||||
}
|
||||
Reference in New Issue
Block a user