All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 44s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
Drafting Engine: 7-module pipeline with narrative tags, allowed facts governance, PII sanitizer, prose validator with repair loop, hash-based cache, and terminology guide. v1 fallback via ?v=1 query param. IACE: Initial AI-Act Conformity Engine with risk classifier, completeness checker, hazard library, and PostgreSQL store for AI system assessments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
185 lines
6.4 KiB
TypeScript
185 lines
6.4 KiB
TypeScript
/**
|
|
* Terminology Guide & Style Contract — Konsistente Fachbegriffe
|
|
*
|
|
* Stellt sicher, dass alle Prosa-Bloecke eines Dokuments
|
|
* dieselben Fachbegriffe und denselben Schreibstil verwenden.
|
|
*
|
|
* 100% deterministisch.
|
|
*/
|
|
|
|
// ============================================================================
|
|
// Terminology Guide
|
|
// ============================================================================
|
|
|
|
export interface TerminologyGuide {
|
|
/** DSGVO-Begriffe */
|
|
dsgvo: Record<string, string>
|
|
/** TOM-Begriffe */
|
|
tom: Record<string, string>
|
|
/** Allgemeine Compliance-Begriffe */
|
|
general: Record<string, string>
|
|
}
|
|
|
|
export const DEFAULT_TERMINOLOGY: TerminologyGuide = {
|
|
dsgvo: {
|
|
controller: 'Verantwortlicher',
|
|
processor: 'Auftragsverarbeiter',
|
|
data_subject: 'betroffene Person',
|
|
processing: 'Verarbeitung',
|
|
personal_data: 'personenbezogene Daten',
|
|
consent: 'Einwilligung',
|
|
dpia: 'Datenschutz-Folgenabschaetzung (DSFA)',
|
|
legitimate_interest: 'berechtigtes Interesse',
|
|
data_breach: 'Verletzung des Schutzes personenbezogener Daten',
|
|
dpo: 'Datenschutzbeauftragter (DSB)',
|
|
supervisory_authority: 'Aufsichtsbehoerde',
|
|
ropa: 'Verzeichnis von Verarbeitungstaetigkeiten (VVT)',
|
|
retention_period: 'Aufbewahrungsfrist',
|
|
erasure: 'Loeschung',
|
|
restriction: 'Einschraenkung der Verarbeitung',
|
|
portability: 'Datenportabilitaet',
|
|
third_country: 'Drittland',
|
|
adequacy_decision: 'Angemessenheitsbeschluss',
|
|
scc: 'Standardvertragsklauseln (SCC)',
|
|
},
|
|
tom: {
|
|
access_control: 'Zutrittskontrolle',
|
|
access_management: 'Zugangskontrolle',
|
|
authorization: 'Zugriffskontrolle',
|
|
encryption: 'Verschluesselung',
|
|
pseudonymization: 'Pseudonymisierung',
|
|
availability: 'Verfuegbarkeitskontrolle',
|
|
resilience: 'Belastbarkeit',
|
|
recoverability: 'Wiederherstellbarkeit',
|
|
audit_logging: 'Protokollierung',
|
|
separation: 'Trennungsgebot',
|
|
input_control: 'Eingabekontrolle',
|
|
transport_control: 'Weitergabekontrolle',
|
|
order_control: 'Auftragskontrolle',
|
|
},
|
|
general: {
|
|
risk_assessment: 'Risikobewertung',
|
|
audit_trail: 'Pruefpfad',
|
|
compliance_level: 'Compliance-Tiefe',
|
|
gap_analysis: 'Lueckenanalyse',
|
|
remediation: 'Massnahmenplan',
|
|
incident_response: 'Vorfallreaktion',
|
|
business_continuity: 'Geschaeftskontinuitaet',
|
|
vendor_management: 'Dienstleistermanagement',
|
|
awareness_training: 'Sensibilisierungsschulung',
|
|
},
|
|
}
|
|
|
|
// ============================================================================
|
|
// Style Contract
|
|
// ============================================================================
|
|
|
|
export interface StyleContract {
|
|
/** Anrede-Stil */
|
|
addressing: '3rd_person_company'
|
|
/** Tonalitaet */
|
|
tone: 'formal_legal_plain'
|
|
/** Verbotene Formulierungen */
|
|
forbid: string[]
|
|
}
|
|
|
|
export const DEFAULT_STYLE_CONTRACT: StyleContract = {
|
|
addressing: '3rd_person_company',
|
|
tone: 'formal_legal_plain',
|
|
forbid: [
|
|
'Denglisch',
|
|
'Marketing-Sprache',
|
|
'Superlative',
|
|
'Direkte Ansprache',
|
|
'Umgangssprache',
|
|
'Konjunktiv-Ketten',
|
|
],
|
|
}
|
|
|
|
/** Konkrete Regex-Muster fuer verbotene Formulierungen */
|
|
export const STYLE_VIOLATION_PATTERNS: Array<{ name: string; pattern: RegExp }> = [
|
|
{ name: 'Direkte Ansprache', pattern: /\b(Sie|Ihr|Ihnen|Ihrem|Ihrer)\b/ },
|
|
{ name: 'Superlative', pattern: /\b(bestmoeglich|hoechstmoeglich|optimal|perfekt|einzigartig)\b/i },
|
|
{ name: 'Marketing-Sprache', pattern: /\b(revolutionaer|bahnbrechend|innovativ|fuehrend|erstklassig)\b/i },
|
|
{ name: 'Umgangssprache', pattern: /\b(super|toll|mega|krass|cool|easy)\b/i },
|
|
{ name: 'Denglisch', pattern: /\b(State of the Art|Best Practice|Compliance Journey|Data Driven)\b/i },
|
|
]
|
|
|
|
// ============================================================================
|
|
// Serialization
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Serialisiert den Terminology Guide fuer den LLM-Prompt.
|
|
* Gibt nur die haeufigsten Begriffe aus (Token-Budget).
|
|
*/
|
|
export function terminologyToPromptString(guide: TerminologyGuide = DEFAULT_TERMINOLOGY): string {
|
|
const keyTerms = [
|
|
...Object.entries(guide.dsgvo).slice(0, 10),
|
|
...Object.entries(guide.tom).slice(0, 6),
|
|
...Object.entries(guide.general).slice(0, 4),
|
|
]
|
|
return keyTerms.map(([key, value]) => ` ${key}: "${value}"`).join('\n')
|
|
}
|
|
|
|
/**
|
|
* Serialisiert den Style Contract fuer den LLM-Prompt.
|
|
*/
|
|
export function styleContractToPromptString(style: StyleContract = DEFAULT_STYLE_CONTRACT): string {
|
|
return [
|
|
`Anrede: Dritte Person ("Die [Firmenname]...", NICHT "Sie...")`,
|
|
`Ton: Professionell, juristisch korrekt, aber verstaendlich`,
|
|
`Verboten: ${style.forbid.join(', ')}`,
|
|
].join('\n')
|
|
}
|
|
|
|
// ============================================================================
|
|
// Validation
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Prueft einen Text auf Style-Verstoesse.
|
|
* Gibt eine Liste der gefundenen Verstoesse zurueck.
|
|
*/
|
|
export function checkStyleViolations(text: string): string[] {
|
|
const violations: string[] = []
|
|
for (const { name, pattern } of STYLE_VIOLATION_PATTERNS) {
|
|
if (pattern.test(text)) {
|
|
violations.push(`Style-Verstoss: ${name}`)
|
|
}
|
|
}
|
|
return violations
|
|
}
|
|
|
|
/**
|
|
* Prueft ob die Terminologie korrekt verwendet wird.
|
|
* Gibt Warnungen zurueck wenn falsche Begriffe erkannt werden.
|
|
*/
|
|
export function checkTerminologyUsage(
|
|
text: string,
|
|
guide: TerminologyGuide = DEFAULT_TERMINOLOGY
|
|
): string[] {
|
|
const warnings: string[] = []
|
|
const lower = text.toLowerCase()
|
|
|
|
// Prüfe ob englische Begriffe statt deutscher verwendet werden
|
|
const termChecks: Array<{ wrong: string; correct: string }> = [
|
|
{ wrong: 'data controller', correct: guide.dsgvo.controller },
|
|
{ wrong: 'data processor', correct: guide.dsgvo.processor },
|
|
{ wrong: 'data subject', correct: guide.dsgvo.data_subject },
|
|
{ wrong: 'personal data', correct: guide.dsgvo.personal_data },
|
|
{ wrong: 'data breach', correct: guide.dsgvo.data_breach },
|
|
{ wrong: 'encryption', correct: guide.tom.encryption },
|
|
{ wrong: 'pseudonymization', correct: guide.tom.pseudonymization },
|
|
{ wrong: 'risk assessment', correct: guide.general.risk_assessment },
|
|
]
|
|
|
|
for (const { wrong, correct } of termChecks) {
|
|
if (lower.includes(wrong.toLowerCase())) {
|
|
warnings.push(`Englischer Begriff "${wrong}" gefunden — verwende "${correct}"`)
|
|
}
|
|
}
|
|
|
|
return warnings
|
|
}
|