test+docs(rag): Tests und Entwicklerdoku fuer RAG Landkarte
- 44 Vitest-Tests: JSON-Struktur, Branchen-Zuordnung, Applicability Notes, Dokumenttyp-Verteilung, keine Duplikate - MkDocs-Seite: Architektur, 10 Branchen, Zuordnungslogik, Integration in andere Projekte, Datenquellen Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
252
admin-lehrer/app/(admin)/ai/rag/__tests__/rag-documents.test.ts
Normal file
252
admin-lehrer/app/(admin)/ai/rag/__tests__/rag-documents.test.ts
Normal file
@@ -0,0 +1,252 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import ragData from '../rag-documents.json'
|
||||
|
||||
/**
|
||||
* Tests fuer rag-documents.json — Branchen-Regulierungs-Matrix
|
||||
*
|
||||
* Validiert die JSON-Struktur, Branchen-Zuordnung und Datenintegritaet
|
||||
* der 320 Dokumente fuer die RAG Landkarte.
|
||||
*/
|
||||
|
||||
const VALID_INDUSTRY_IDS = ragData.industries.map((i: any) => i.id)
|
||||
const VALID_DOC_TYPE_IDS = ragData.doc_types.map((dt: any) => dt.id)
|
||||
|
||||
describe('rag-documents.json — Struktur', () => {
|
||||
it('sollte doc_types, industries und documents enthalten', () => {
|
||||
expect(ragData).toHaveProperty('doc_types')
|
||||
expect(ragData).toHaveProperty('industries')
|
||||
expect(ragData).toHaveProperty('documents')
|
||||
expect(Array.isArray(ragData.doc_types)).toBe(true)
|
||||
expect(Array.isArray(ragData.industries)).toBe(true)
|
||||
expect(Array.isArray(ragData.documents)).toBe(true)
|
||||
})
|
||||
|
||||
it('sollte genau 10 Branchen haben (VDMA/VDA/BDI)', () => {
|
||||
expect(ragData.industries).toHaveLength(10)
|
||||
const ids = ragData.industries.map((i: any) => i.id)
|
||||
expect(ids).toContain('automotive')
|
||||
expect(ids).toContain('maschinenbau')
|
||||
expect(ids).toContain('elektrotechnik')
|
||||
expect(ids).toContain('chemie')
|
||||
expect(ids).toContain('metall')
|
||||
expect(ids).toContain('energie')
|
||||
expect(ids).toContain('transport')
|
||||
expect(ids).toContain('handel')
|
||||
expect(ids).toContain('konsumgueter')
|
||||
expect(ids).toContain('bau')
|
||||
})
|
||||
|
||||
it('sollte keine Pseudo-Branchen enthalten (IoT, KI, HR, KRITIS, etc.)', () => {
|
||||
const ids = ragData.industries.map((i: any) => i.id)
|
||||
expect(ids).not.toContain('iot')
|
||||
expect(ids).not.toContain('ai')
|
||||
expect(ids).not.toContain('hr')
|
||||
expect(ids).not.toContain('kritis')
|
||||
expect(ids).not.toContain('ecommerce')
|
||||
expect(ids).not.toContain('tech')
|
||||
expect(ids).not.toContain('media')
|
||||
expect(ids).not.toContain('public')
|
||||
})
|
||||
|
||||
it('sollte 17 Dokumenttypen haben', () => {
|
||||
expect(ragData.doc_types.length).toBe(17)
|
||||
})
|
||||
|
||||
it('sollte mindestens 300 Dokumente haben', () => {
|
||||
expect(ragData.documents.length).toBeGreaterThanOrEqual(300)
|
||||
})
|
||||
|
||||
it('sollte jede Branche name und icon haben', () => {
|
||||
ragData.industries.forEach((ind: any) => {
|
||||
expect(ind).toHaveProperty('id')
|
||||
expect(ind).toHaveProperty('name')
|
||||
expect(ind).toHaveProperty('icon')
|
||||
expect(ind.name.length).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
it('sollte jeden doc_type mit id, label, icon und sort haben', () => {
|
||||
ragData.doc_types.forEach((dt: any) => {
|
||||
expect(dt).toHaveProperty('id')
|
||||
expect(dt).toHaveProperty('label')
|
||||
expect(dt).toHaveProperty('icon')
|
||||
expect(dt).toHaveProperty('sort')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('rag-documents.json — Dokument-Validierung', () => {
|
||||
it('sollte keine doppelten Codes haben', () => {
|
||||
const codes = ragData.documents.map((d: any) => d.code)
|
||||
const unique = new Set(codes)
|
||||
expect(unique.size).toBe(codes.length)
|
||||
})
|
||||
|
||||
it('sollte Pflichtfelder bei jedem Dokument haben', () => {
|
||||
ragData.documents.forEach((doc: any) => {
|
||||
expect(doc).toHaveProperty('code')
|
||||
expect(doc).toHaveProperty('name')
|
||||
expect(doc).toHaveProperty('doc_type')
|
||||
expect(doc).toHaveProperty('industries')
|
||||
expect(doc).toHaveProperty('in_rag')
|
||||
expect(doc).toHaveProperty('rag_collection')
|
||||
expect(doc.code.length).toBeGreaterThan(0)
|
||||
expect(doc.name.length).toBeGreaterThan(0)
|
||||
expect(Array.isArray(doc.industries)).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
it('sollte nur gueltige doc_type IDs verwenden', () => {
|
||||
ragData.documents.forEach((doc: any) => {
|
||||
expect(VALID_DOC_TYPE_IDS).toContain(doc.doc_type)
|
||||
})
|
||||
})
|
||||
|
||||
it('sollte nur gueltige industry IDs verwenden (oder "all")', () => {
|
||||
ragData.documents.forEach((doc: any) => {
|
||||
doc.industries.forEach((ind: string) => {
|
||||
if (ind !== 'all') {
|
||||
expect(VALID_INDUSTRY_IDS).toContain(ind)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
it('sollte gueltige rag_collection Namen verwenden', () => {
|
||||
const validCollections = [
|
||||
'bp_compliance_ce',
|
||||
'bp_compliance_gesetze',
|
||||
'bp_compliance_datenschutz',
|
||||
'bp_dsfa_corpus',
|
||||
'bp_legal_templates',
|
||||
'bp_compliance_recht',
|
||||
'bp_nibis_eh',
|
||||
]
|
||||
ragData.documents.forEach((doc: any) => {
|
||||
expect(validCollections).toContain(doc.rag_collection)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('rag-documents.json — Branchen-Zuordnungslogik', () => {
|
||||
const findDoc = (code: string) => ragData.documents.find((d: any) => d.code === code)
|
||||
|
||||
describe('Horizontale Regulierungen (alle Branchen)', () => {
|
||||
const horizontalCodes = [
|
||||
'GDPR', 'BDSG_FULL', 'EPRIVACY', 'TDDDG', 'AIACT', 'CRA',
|
||||
'NIS2', 'GPSR', 'PLD', 'EUCSA', 'DATAACT',
|
||||
]
|
||||
|
||||
horizontalCodes.forEach((code) => {
|
||||
it(`${code} sollte fuer alle Branchen gelten`, () => {
|
||||
const doc = findDoc(code)
|
||||
if (doc) {
|
||||
expect(doc.industries).toContain('all')
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('Sektorspezifische Regulierungen', () => {
|
||||
it('Maschinenverordnung sollte Maschinenbau, Automotive, Elektrotechnik enthalten', () => {
|
||||
const doc = findDoc('MACHINERY_REG')
|
||||
if (doc) {
|
||||
expect(doc.industries).toContain('maschinenbau')
|
||||
expect(doc.industries).toContain('automotive')
|
||||
expect(doc.industries).toContain('elektrotechnik')
|
||||
expect(doc.industries).not.toContain('all')
|
||||
}
|
||||
})
|
||||
|
||||
it('ElektroG sollte Elektrotechnik und Automotive enthalten', () => {
|
||||
const doc = findDoc('DE_ELEKTROG')
|
||||
if (doc) {
|
||||
expect(doc.industries).toContain('elektrotechnik')
|
||||
expect(doc.industries).toContain('automotive')
|
||||
}
|
||||
})
|
||||
|
||||
it('BattDG sollte Automotive und Elektrotechnik enthalten', () => {
|
||||
const doc = findDoc('DE_BATTDG')
|
||||
if (doc) {
|
||||
expect(doc.industries).toContain('automotive')
|
||||
expect(doc.industries).toContain('elektrotechnik')
|
||||
}
|
||||
})
|
||||
|
||||
it('ENISA ICS/SCADA sollte Energie, Maschinenbau, Chemie enthalten', () => {
|
||||
const doc = findDoc('ENISA_ICS_SCADA')
|
||||
if (doc) {
|
||||
expect(doc.industries).toContain('energie')
|
||||
expect(doc.industries).toContain('maschinenbau')
|
||||
expect(doc.industries).toContain('chemie')
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('Nicht zutreffende Regulierungen (Finanz/Medizin/Plattformen)', () => {
|
||||
const emptyIndustryCodes = ['DORA', 'PSD2', 'MiCA', 'AMLR', 'EHDS', 'DSA', 'DMA', 'MDR']
|
||||
|
||||
emptyIndustryCodes.forEach((code) => {
|
||||
it(`${code} sollte keine Branchen-Zuordnung haben`, () => {
|
||||
const doc = findDoc(code)
|
||||
if (doc) {
|
||||
expect(doc.industries).toHaveLength(0)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('BSI-TR-03161 (DiGA) sollte nicht zutreffend sein', () => {
|
||||
['BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3'].forEach((code) => {
|
||||
it(`${code} sollte keine Branchen-Zuordnung haben`, () => {
|
||||
const doc = findDoc(code)
|
||||
if (doc) {
|
||||
expect(doc.industries).toHaveLength(0)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('rag-documents.json — Applicability Notes', () => {
|
||||
it('sollte applicability_note bei Dokumenten mit description haben', () => {
|
||||
const withDescription = ragData.documents.filter((d: any) => d.description)
|
||||
const withNote = withDescription.filter((d: any) => d.applicability_note)
|
||||
// Mindestens 90% der Dokumente mit Beschreibung sollten eine Note haben
|
||||
expect(withNote.length / withDescription.length).toBeGreaterThan(0.9)
|
||||
})
|
||||
|
||||
it('horizontale Regulierungen sollten "alle Branchen" in der Note erwaehnen', () => {
|
||||
const gdpr = ragData.documents.find((d: any) => d.code === 'GDPR')
|
||||
if (gdpr?.applicability_note) {
|
||||
expect(gdpr.applicability_note.toLowerCase()).toContain('alle branchen')
|
||||
}
|
||||
})
|
||||
|
||||
it('nicht zutreffende sollten "nicht zutreffend" in der Note erwaehnen', () => {
|
||||
const dora = ragData.documents.find((d: any) => d.code === 'DORA')
|
||||
if (dora?.applicability_note) {
|
||||
expect(dora.applicability_note.toLowerCase()).toContain('nicht zutreffend')
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('rag-documents.json — Dokumenttyp-Verteilung', () => {
|
||||
it('sollte Dokumente in jedem doc_type haben', () => {
|
||||
ragData.doc_types.forEach((dt: any) => {
|
||||
const count = ragData.documents.filter((d: any) => d.doc_type === dt.id).length
|
||||
expect(count).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
it('sollte EU-Verordnungen als groesste Kategorie haben (mind. 15)', () => {
|
||||
const euRegs = ragData.documents.filter((d: any) => d.doc_type === 'eu_regulation')
|
||||
expect(euRegs.length).toBeGreaterThanOrEqual(15)
|
||||
})
|
||||
|
||||
it('sollte EDPB Leitlinien als umfangreichste Kategorie haben (mind. 40)', () => {
|
||||
const edpb = ragData.documents.filter((d: any) => d.doc_type === 'edpb_guideline')
|
||||
expect(edpb.length).toBeGreaterThanOrEqual(40)
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user