feat(coverage): Korpus-Dokumente gruppiert nach Art + Herausgeber-Familie
CI / dep-audit (push) Has been skipped
CI / test-python-backend (push) Successful in 27s
CI / test-python-document-crawler (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / build-sha-integrity (push) Successful in 14s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 25s
CI / go-lint (push) Has been skipped
CI / detect-changes (push) Successful in 19s
CI / python-lint (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m8s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / test-python-backend (push) Successful in 27s
CI / test-python-document-crawler (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / build-sha-integrity (push) Successful in 14s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 25s
CI / go-lint (push) Has been skipped
CI / detect-changes (push) Successful in 19s
CI / python-lint (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m8s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
Die "Korpus-Dokumente"-Tabelle wird nach Dokument-Art geordnet (Gesetze & Verordnungen → Behörden-Leitfäden → Standards & Best Practice → Rechtsprechung) mit Zwischenüberschriften, und je Herausgeber-Familie zusammengefasst (alle DSK, alle EDPB, alle OWASP/NIST/ENISA gemeinsam). Deterministischer Kategorisierer (categorizeCorpusDoc) + Grouper (groupCorpusDocs), pure + unit-getestet. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -8,10 +8,21 @@ import {
|
||||
splitByTier,
|
||||
severityBadgeClass,
|
||||
addresseeLabel,
|
||||
categorizeCorpusDoc,
|
||||
groupCorpusDocs,
|
||||
type UseCaseRow,
|
||||
type ControlItem,
|
||||
type CorpusDoc,
|
||||
} from './_helpers'
|
||||
|
||||
const doc = (src: string, n = 1): CorpusDoc => ({
|
||||
source_regulation: src,
|
||||
license_rule: 1,
|
||||
license_tier: 't',
|
||||
atom_count: n,
|
||||
use_case: null,
|
||||
})
|
||||
|
||||
const ctrl = (over: Partial<ControlItem>): ControlItem => ({
|
||||
id: 'id',
|
||||
title: 'T',
|
||||
@@ -108,6 +119,46 @@ describe('coverage helpers', () => {
|
||||
expect(addresseeLabel('unbekannt_neu')).toBe('unbekannt_neu')
|
||||
})
|
||||
|
||||
it('categorizes corpus docs by type + issuer family', () => {
|
||||
expect(categorizeCorpusDoc('DSGVO (EU) 2016/679').cat.key).toBe('law')
|
||||
expect(categorizeCorpusDoc('Medizinprodukteverordnung (EU) 2017/745 (MDR)').cat.key).toBe('law')
|
||||
expect(categorizeCorpusDoc('DSK OH Telemedien')).toMatchObject({
|
||||
cat: { key: 'guidance' },
|
||||
family: 'DSK (Datenschutzkonferenz)',
|
||||
})
|
||||
expect(categorizeCorpusDoc('EDPB Fines Calculation')).toMatchObject({
|
||||
cat: { key: 'guidance' },
|
||||
family: 'EDPB',
|
||||
})
|
||||
expect(categorizeCorpusDoc('OWASP Top 10 (2021)')).toMatchObject({
|
||||
cat: { key: 'standard' },
|
||||
family: 'OWASP',
|
||||
})
|
||||
expect(categorizeCorpusDoc('NIST SP 800-53 Rev. 5').family).toBe('NIST')
|
||||
expect(categorizeCorpusDoc('ENISA NIS2 Security Measures').family).toBe('ENISA')
|
||||
expect(categorizeCorpusDoc('BGH I ZR 7/16').cat.key).toBe('court')
|
||||
})
|
||||
|
||||
it('groups corpus docs: laws → guidance → standards → court, families clustered', () => {
|
||||
const groups = groupCorpusDocs([
|
||||
doc('OWASP Top 10', 10),
|
||||
doc('DSGVO (EU) 2016/679', 50),
|
||||
doc('DSK OH Telemedien', 5),
|
||||
doc('EDPB Fines', 8),
|
||||
doc('NIST SP 800-53', 20),
|
||||
doc('DSK OH Direktwerbung', 3),
|
||||
doc('BGH I ZR 7/16', 1),
|
||||
])
|
||||
expect(groups.map((g) => g.key)).toEqual(['law', 'guidance', 'standard', 'court'])
|
||||
const guidance = groups.find((g) => g.key === 'guidance')!
|
||||
// two DSK docs collapse into one family
|
||||
const dsk = guidance.families.find((f) => f.family.startsWith('DSK'))!
|
||||
expect(dsk.docs.length).toBe(2)
|
||||
const std = groups.find((g) => g.key === 'standard')!
|
||||
// NIST (20) before OWASP (10) — families sorted by size desc
|
||||
expect(std.families.map((f) => f.family)).toEqual(['NIST', 'OWASP'])
|
||||
})
|
||||
|
||||
it('splitByTier separates core (relevant) from review', () => {
|
||||
const { core, review } = splitByTier([
|
||||
ctrl({ id: 'a', relevant: true }),
|
||||
|
||||
@@ -46,6 +46,97 @@ export interface CorpusOverview {
|
||||
totals: { documents: number; catalog_sources: number }
|
||||
}
|
||||
|
||||
// --- Korpus-Dokumente: gruppieren nach Art (Gesetz/Leitfaden/Standard/Urteil)
|
||||
// + Herausgeber-Familie (DSK, EDPB, OWASP, NIST …). Deterministisch, pure. ---
|
||||
interface DocCat {
|
||||
key: string
|
||||
label: string
|
||||
order: number
|
||||
}
|
||||
const CAT_LAW: DocCat = { key: 'law', label: 'Gesetze & Verordnungen', order: 1 }
|
||||
const CAT_GUIDANCE: DocCat = {
|
||||
key: 'guidance',
|
||||
label: 'Behörden-Leitfäden & Orientierungshilfen',
|
||||
order: 2,
|
||||
}
|
||||
const CAT_STANDARD: DocCat = {
|
||||
key: 'standard',
|
||||
label: 'Standards & Best Practice',
|
||||
order: 3,
|
||||
}
|
||||
const CAT_COURT: DocCat = { key: 'court', label: 'Rechtsprechung', order: 4 }
|
||||
|
||||
export function categorizeCorpusDoc(src: string): { cat: DocCat; family: string } {
|
||||
const u = (src || '').toUpperCase()
|
||||
// Standards & Best Practice (technische Familien)
|
||||
if (u.includes('OWASP')) return { cat: CAT_STANDARD, family: 'OWASP' }
|
||||
if (u.includes('NIST')) return { cat: CAT_STANDARD, family: 'NIST' }
|
||||
if (u.includes('CISA')) return { cat: CAT_STANDARD, family: 'CISA' }
|
||||
if (u.includes('OECD')) return { cat: CAT_STANDARD, family: 'OECD' }
|
||||
if (u.includes('ENISA')) return { cat: CAT_STANDARD, family: 'ENISA' }
|
||||
// Behörden-Leitfäden (Datenschutz-Aufsicht + EU-Kommissions-Guides)
|
||||
if (u.startsWith('DSK'))
|
||||
return { cat: CAT_GUIDANCE, family: 'DSK (Datenschutzkonferenz)' }
|
||||
if (u.includes('EDPB')) return { cat: CAT_GUIDANCE, family: 'EDPB' }
|
||||
if (u.includes('EDPS')) return { cat: CAT_GUIDANCE, family: 'EDPS' }
|
||||
if (u.includes('WP29'))
|
||||
return { cat: CAT_GUIDANCE, family: 'WP29 (Art.-29-Gruppe)' }
|
||||
if (u.includes('BFDI')) return { cat: CAT_GUIDANCE, family: 'BfDI' }
|
||||
if (u.includes('EU MACHINERY GUIDE') || u.includes('EU BLUE GUIDE'))
|
||||
return { cat: CAT_GUIDANCE, family: 'EU-Kommission (Guides)' }
|
||||
// Rechtsprechung
|
||||
if (u.startsWith('BGH') || u.startsWith('BVGER') || u.startsWith('EUGH'))
|
||||
return { cat: CAT_COURT, family: 'Rechtsprechung' }
|
||||
// Default: Gesetz/Verordnung/Richtlinie
|
||||
return { cat: CAT_LAW, family: 'Gesetze & Verordnungen' }
|
||||
}
|
||||
|
||||
export interface CorpusFamilyGroup {
|
||||
family: string
|
||||
total: number
|
||||
docs: CorpusDoc[]
|
||||
}
|
||||
|
||||
export interface CorpusCatGroup {
|
||||
key: string
|
||||
label: string
|
||||
order: number
|
||||
total: number
|
||||
families: CorpusFamilyGroup[]
|
||||
}
|
||||
|
||||
// Group corpus docs by category (ordered: laws → guidance → standards → court),
|
||||
// families within each sorted by size, docs within a family by size. So all DSK
|
||||
// sit together, all EDPB together, all OWASP/NIST together, under headings.
|
||||
export function groupCorpusDocs(docs: CorpusDoc[]): CorpusCatGroup[] {
|
||||
const cats = new Map<string, { cat: DocCat; fam: Map<string, CorpusDoc[]> }>()
|
||||
for (const d of docs) {
|
||||
const { cat, family } = categorizeCorpusDoc(d.source_regulation)
|
||||
if (!cats.has(cat.key)) cats.set(cat.key, { cat, fam: new Map() })
|
||||
const fam = cats.get(cat.key)!.fam
|
||||
if (!fam.has(family)) fam.set(family, [])
|
||||
fam.get(family)!.push(d)
|
||||
}
|
||||
return [...cats.values()]
|
||||
.map(({ cat, fam }) => {
|
||||
const families = [...fam.entries()]
|
||||
.map(([family, ds]) => ({
|
||||
family,
|
||||
docs: [...ds].sort((a, b) => b.atom_count - a.atom_count),
|
||||
total: ds.reduce((s, d) => s + d.atom_count, 0),
|
||||
}))
|
||||
.sort((a, b) => b.total - a.total)
|
||||
return {
|
||||
key: cat.key,
|
||||
label: cat.label,
|
||||
order: cat.order,
|
||||
total: families.reduce((s, f) => s + f.total, 0),
|
||||
families,
|
||||
}
|
||||
})
|
||||
.sort((a, b) => a.order - b.order)
|
||||
}
|
||||
|
||||
export const USE_CASE_GROUP_LABELS: Record<string, string> = {
|
||||
document: 'Dokument-Compliance',
|
||||
security: 'Security',
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { Fragment } from 'react'
|
||||
import Link from 'next/link'
|
||||
import {
|
||||
type UseCaseRow,
|
||||
@@ -5,6 +6,7 @@ import {
|
||||
licenseTierBadgeClass,
|
||||
commercialBadgeClass,
|
||||
groupUseCases,
|
||||
groupCorpusDocs,
|
||||
} from './_helpers'
|
||||
|
||||
const BACKEND_URL =
|
||||
@@ -163,9 +165,35 @@ export default async function CoveragePage() {
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-gray-100 bg-white">
|
||||
{(corpus?.documents ?? []).map((d) => (
|
||||
{groupCorpusDocs(corpus?.documents ?? []).map((cat) => (
|
||||
<Fragment key={cat.key}>
|
||||
<tr className="bg-gray-100">
|
||||
<td
|
||||
colSpan={4}
|
||||
className="px-4 py-2 text-sm font-semibold text-gray-800"
|
||||
>
|
||||
{cat.label}{' '}
|
||||
<span className="font-normal text-gray-500">
|
||||
({cat.families.reduce((s, f) => s + f.docs.length, 0)} Quellen ·{' '}
|
||||
{cat.total.toLocaleString('de-DE')} Pflichten)
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
{cat.families.map((fam) => (
|
||||
<Fragment key={cat.key + fam.family}>
|
||||
<tr className="bg-gray-50">
|
||||
<td
|
||||
colSpan={4}
|
||||
className="px-4 py-1 pl-8 text-xs font-medium uppercase tracking-wide text-gray-500"
|
||||
>
|
||||
{fam.family}
|
||||
</td>
|
||||
</tr>
|
||||
{fam.docs.map((d) => (
|
||||
<tr key={d.source_regulation}>
|
||||
<td className="px-4 py-2 text-gray-900">{d.source_regulation}</td>
|
||||
<td className="px-4 py-2 pl-8 text-gray-900">
|
||||
{d.source_regulation}
|
||||
</td>
|
||||
<td className="px-4 py-2">
|
||||
<span
|
||||
className={`rounded px-2 py-0.5 text-xs font-medium ${licenseTierBadgeClass(d.license_rule)}`}
|
||||
@@ -174,12 +202,20 @@ export default async function CoveragePage() {
|
||||
Tier {d.license_rule ?? '?'}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-2 text-right">{d.atom_count.toLocaleString('de-DE')}</td>
|
||||
<td className="px-4 py-2 text-right">
|
||||
{d.atom_count.toLocaleString('de-DE')}
|
||||
</td>
|
||||
<td className="px-4 py-2 font-mono text-xs text-gray-600">
|
||||
{d.use_case ?? <span className="text-amber-600">— ungemappt</span>}
|
||||
{d.use_case ?? (
|
||||
<span className="text-amber-600">— ungemappt</span>
|
||||
)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</Fragment>
|
||||
))}
|
||||
</Fragment>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user