From 8a0097f5dad8ffbc2c7229716eff6b0a3bb15f15 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 16 Jun 2026 12:20:10 +0200 Subject: [PATCH] feat(coverage): Korpus-Dokumente gruppiert nach Art + Herausgeber-Familie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Die "Korpus-Dokumente"-Tabelle wird nach Dokument-Art geordnet (Gesetze & Verordnungen → Behörden-Leitfäden → Standards & Best Practice → Rechtsprechung) mit Zwischenüberschriften, und je Herausgeber-Familie zusammengefasst (alle DSK, alle EDPB, alle OWASP/NIST/ENISA gemeinsam). Deterministischer Kategorisierer (categorizeCorpusDoc) + Grouper (groupCorpusDocs), pure + unit-getestet. Co-Authored-By: Claude Opus 4.8 --- .../app/sdk/coverage/_helpers.test.ts | 51 +++++++++++ admin-compliance/app/sdk/coverage/_helpers.ts | 91 +++++++++++++++++++ admin-compliance/app/sdk/coverage/page.tsx | 66 +++++++++++--- 3 files changed, 193 insertions(+), 15 deletions(-) diff --git a/admin-compliance/app/sdk/coverage/_helpers.test.ts b/admin-compliance/app/sdk/coverage/_helpers.test.ts index 5bbf20f9..7b6edbf8 100644 --- a/admin-compliance/app/sdk/coverage/_helpers.test.ts +++ b/admin-compliance/app/sdk/coverage/_helpers.test.ts @@ -8,10 +8,21 @@ import { splitByTier, severityBadgeClass, addresseeLabel, + categorizeCorpusDoc, + groupCorpusDocs, type UseCaseRow, type ControlItem, + type CorpusDoc, } from './_helpers' +const doc = (src: string, n = 1): CorpusDoc => ({ + source_regulation: src, + license_rule: 1, + license_tier: 't', + atom_count: n, + use_case: null, +}) + const ctrl = (over: Partial): ControlItem => ({ id: 'id', title: 'T', @@ -108,6 +119,46 @@ describe('coverage helpers', () => { expect(addresseeLabel('unbekannt_neu')).toBe('unbekannt_neu') }) + it('categorizes corpus docs by type + issuer family', () => { + expect(categorizeCorpusDoc('DSGVO (EU) 2016/679').cat.key).toBe('law') + expect(categorizeCorpusDoc('Medizinprodukteverordnung (EU) 2017/745 (MDR)').cat.key).toBe('law') + expect(categorizeCorpusDoc('DSK OH Telemedien')).toMatchObject({ + cat: { key: 'guidance' }, + family: 'DSK (Datenschutzkonferenz)', + }) + expect(categorizeCorpusDoc('EDPB Fines Calculation')).toMatchObject({ + cat: { key: 'guidance' }, + family: 'EDPB', + }) + expect(categorizeCorpusDoc('OWASP Top 10 (2021)')).toMatchObject({ + cat: { key: 'standard' }, + family: 'OWASP', + }) + expect(categorizeCorpusDoc('NIST SP 800-53 Rev. 5').family).toBe('NIST') + expect(categorizeCorpusDoc('ENISA NIS2 Security Measures').family).toBe('ENISA') + expect(categorizeCorpusDoc('BGH I ZR 7/16').cat.key).toBe('court') + }) + + it('groups corpus docs: laws → guidance → standards → court, families clustered', () => { + const groups = groupCorpusDocs([ + doc('OWASP Top 10', 10), + doc('DSGVO (EU) 2016/679', 50), + doc('DSK OH Telemedien', 5), + doc('EDPB Fines', 8), + doc('NIST SP 800-53', 20), + doc('DSK OH Direktwerbung', 3), + doc('BGH I ZR 7/16', 1), + ]) + expect(groups.map((g) => g.key)).toEqual(['law', 'guidance', 'standard', 'court']) + const guidance = groups.find((g) => g.key === 'guidance')! + // two DSK docs collapse into one family + const dsk = guidance.families.find((f) => f.family.startsWith('DSK'))! + expect(dsk.docs.length).toBe(2) + const std = groups.find((g) => g.key === 'standard')! + // NIST (20) before OWASP (10) — families sorted by size desc + expect(std.families.map((f) => f.family)).toEqual(['NIST', 'OWASP']) + }) + it('splitByTier separates core (relevant) from review', () => { const { core, review } = splitByTier([ ctrl({ id: 'a', relevant: true }), diff --git a/admin-compliance/app/sdk/coverage/_helpers.ts b/admin-compliance/app/sdk/coverage/_helpers.ts index 0546f5a3..bc56be0f 100644 --- a/admin-compliance/app/sdk/coverage/_helpers.ts +++ b/admin-compliance/app/sdk/coverage/_helpers.ts @@ -46,6 +46,97 @@ export interface CorpusOverview { totals: { documents: number; catalog_sources: number } } +// --- Korpus-Dokumente: gruppieren nach Art (Gesetz/Leitfaden/Standard/Urteil) +// + Herausgeber-Familie (DSK, EDPB, OWASP, NIST …). Deterministisch, pure. --- +interface DocCat { + key: string + label: string + order: number +} +const CAT_LAW: DocCat = { key: 'law', label: 'Gesetze & Verordnungen', order: 1 } +const CAT_GUIDANCE: DocCat = { + key: 'guidance', + label: 'Behörden-Leitfäden & Orientierungshilfen', + order: 2, +} +const CAT_STANDARD: DocCat = { + key: 'standard', + label: 'Standards & Best Practice', + order: 3, +} +const CAT_COURT: DocCat = { key: 'court', label: 'Rechtsprechung', order: 4 } + +export function categorizeCorpusDoc(src: string): { cat: DocCat; family: string } { + const u = (src || '').toUpperCase() + // Standards & Best Practice (technische Familien) + if (u.includes('OWASP')) return { cat: CAT_STANDARD, family: 'OWASP' } + if (u.includes('NIST')) return { cat: CAT_STANDARD, family: 'NIST' } + if (u.includes('CISA')) return { cat: CAT_STANDARD, family: 'CISA' } + if (u.includes('OECD')) return { cat: CAT_STANDARD, family: 'OECD' } + if (u.includes('ENISA')) return { cat: CAT_STANDARD, family: 'ENISA' } + // Behörden-Leitfäden (Datenschutz-Aufsicht + EU-Kommissions-Guides) + if (u.startsWith('DSK')) + return { cat: CAT_GUIDANCE, family: 'DSK (Datenschutzkonferenz)' } + if (u.includes('EDPB')) return { cat: CAT_GUIDANCE, family: 'EDPB' } + if (u.includes('EDPS')) return { cat: CAT_GUIDANCE, family: 'EDPS' } + if (u.includes('WP29')) + return { cat: CAT_GUIDANCE, family: 'WP29 (Art.-29-Gruppe)' } + if (u.includes('BFDI')) return { cat: CAT_GUIDANCE, family: 'BfDI' } + if (u.includes('EU MACHINERY GUIDE') || u.includes('EU BLUE GUIDE')) + return { cat: CAT_GUIDANCE, family: 'EU-Kommission (Guides)' } + // Rechtsprechung + if (u.startsWith('BGH') || u.startsWith('BVGER') || u.startsWith('EUGH')) + return { cat: CAT_COURT, family: 'Rechtsprechung' } + // Default: Gesetz/Verordnung/Richtlinie + return { cat: CAT_LAW, family: 'Gesetze & Verordnungen' } +} + +export interface CorpusFamilyGroup { + family: string + total: number + docs: CorpusDoc[] +} + +export interface CorpusCatGroup { + key: string + label: string + order: number + total: number + families: CorpusFamilyGroup[] +} + +// Group corpus docs by category (ordered: laws → guidance → standards → court), +// families within each sorted by size, docs within a family by size. So all DSK +// sit together, all EDPB together, all OWASP/NIST together, under headings. +export function groupCorpusDocs(docs: CorpusDoc[]): CorpusCatGroup[] { + const cats = new Map }>() + for (const d of docs) { + const { cat, family } = categorizeCorpusDoc(d.source_regulation) + if (!cats.has(cat.key)) cats.set(cat.key, { cat, fam: new Map() }) + const fam = cats.get(cat.key)!.fam + if (!fam.has(family)) fam.set(family, []) + fam.get(family)!.push(d) + } + return [...cats.values()] + .map(({ cat, fam }) => { + const families = [...fam.entries()] + .map(([family, ds]) => ({ + family, + docs: [...ds].sort((a, b) => b.atom_count - a.atom_count), + total: ds.reduce((s, d) => s + d.atom_count, 0), + })) + .sort((a, b) => b.total - a.total) + return { + key: cat.key, + label: cat.label, + order: cat.order, + total: families.reduce((s, f) => s + f.total, 0), + families, + } + }) + .sort((a, b) => a.order - b.order) +} + export const USE_CASE_GROUP_LABELS: Record = { document: 'Dokument-Compliance', security: 'Security', diff --git a/admin-compliance/app/sdk/coverage/page.tsx b/admin-compliance/app/sdk/coverage/page.tsx index 9090bf8e..dc73df1e 100644 --- a/admin-compliance/app/sdk/coverage/page.tsx +++ b/admin-compliance/app/sdk/coverage/page.tsx @@ -1,3 +1,4 @@ +import { Fragment } from 'react' import Link from 'next/link' import { type UseCaseRow, @@ -5,6 +6,7 @@ import { licenseTierBadgeClass, commercialBadgeClass, groupUseCases, + groupCorpusDocs, } from './_helpers' const BACKEND_URL = @@ -163,22 +165,56 @@ export default async function CoveragePage() { - {(corpus?.documents ?? []).map((d) => ( - - {d.source_regulation} - - ( + + + - Tier {d.license_rule ?? '?'} - - - {d.atom_count.toLocaleString('de-DE')} - - {d.use_case ?? — ungemappt} - - + {cat.label}{' '} + + ({cat.families.reduce((s, f) => s + f.docs.length, 0)} Quellen ·{' '} + {cat.total.toLocaleString('de-DE')} Pflichten) + + + + {cat.families.map((fam) => ( + + + + {fam.family} + + + {fam.docs.map((d) => ( + + + {d.source_regulation} + + + + Tier {d.license_rule ?? '?'} + + + + {d.atom_count.toLocaleString('de-DE')} + + + {d.use_case ?? ( + — ungemappt + )} + + + ))} + + ))} + ))}