feat(coverage): Korpus-Dokumente gruppiert nach Art + Herausgeber-Familie
CI / dep-audit (push) Has been skipped
CI / test-python-backend (push) Successful in 27s
CI / test-python-document-crawler (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / build-sha-integrity (push) Successful in 14s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 25s
CI / go-lint (push) Has been skipped
CI / detect-changes (push) Successful in 19s
CI / python-lint (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m8s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / test-python-backend (push) Successful in 27s
CI / test-python-document-crawler (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / build-sha-integrity (push) Successful in 14s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 25s
CI / go-lint (push) Has been skipped
CI / detect-changes (push) Successful in 19s
CI / python-lint (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m8s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
Die "Korpus-Dokumente"-Tabelle wird nach Dokument-Art geordnet (Gesetze & Verordnungen → Behörden-Leitfäden → Standards & Best Practice → Rechtsprechung) mit Zwischenüberschriften, und je Herausgeber-Familie zusammengefasst (alle DSK, alle EDPB, alle OWASP/NIST/ENISA gemeinsam). Deterministischer Kategorisierer (categorizeCorpusDoc) + Grouper (groupCorpusDocs), pure + unit-getestet. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,97 @@ export interface CorpusOverview {
|
||||
totals: { documents: number; catalog_sources: number }
|
||||
}
|
||||
|
||||
// --- Korpus-Dokumente: gruppieren nach Art (Gesetz/Leitfaden/Standard/Urteil)
|
||||
// + Herausgeber-Familie (DSK, EDPB, OWASP, NIST …). Deterministisch, pure. ---
|
||||
interface DocCat {
|
||||
key: string
|
||||
label: string
|
||||
order: number
|
||||
}
|
||||
const CAT_LAW: DocCat = { key: 'law', label: 'Gesetze & Verordnungen', order: 1 }
|
||||
const CAT_GUIDANCE: DocCat = {
|
||||
key: 'guidance',
|
||||
label: 'Behörden-Leitfäden & Orientierungshilfen',
|
||||
order: 2,
|
||||
}
|
||||
const CAT_STANDARD: DocCat = {
|
||||
key: 'standard',
|
||||
label: 'Standards & Best Practice',
|
||||
order: 3,
|
||||
}
|
||||
const CAT_COURT: DocCat = { key: 'court', label: 'Rechtsprechung', order: 4 }
|
||||
|
||||
export function categorizeCorpusDoc(src: string): { cat: DocCat; family: string } {
|
||||
const u = (src || '').toUpperCase()
|
||||
// Standards & Best Practice (technische Familien)
|
||||
if (u.includes('OWASP')) return { cat: CAT_STANDARD, family: 'OWASP' }
|
||||
if (u.includes('NIST')) return { cat: CAT_STANDARD, family: 'NIST' }
|
||||
if (u.includes('CISA')) return { cat: CAT_STANDARD, family: 'CISA' }
|
||||
if (u.includes('OECD')) return { cat: CAT_STANDARD, family: 'OECD' }
|
||||
if (u.includes('ENISA')) return { cat: CAT_STANDARD, family: 'ENISA' }
|
||||
// Behörden-Leitfäden (Datenschutz-Aufsicht + EU-Kommissions-Guides)
|
||||
if (u.startsWith('DSK'))
|
||||
return { cat: CAT_GUIDANCE, family: 'DSK (Datenschutzkonferenz)' }
|
||||
if (u.includes('EDPB')) return { cat: CAT_GUIDANCE, family: 'EDPB' }
|
||||
if (u.includes('EDPS')) return { cat: CAT_GUIDANCE, family: 'EDPS' }
|
||||
if (u.includes('WP29'))
|
||||
return { cat: CAT_GUIDANCE, family: 'WP29 (Art.-29-Gruppe)' }
|
||||
if (u.includes('BFDI')) return { cat: CAT_GUIDANCE, family: 'BfDI' }
|
||||
if (u.includes('EU MACHINERY GUIDE') || u.includes('EU BLUE GUIDE'))
|
||||
return { cat: CAT_GUIDANCE, family: 'EU-Kommission (Guides)' }
|
||||
// Rechtsprechung
|
||||
if (u.startsWith('BGH') || u.startsWith('BVGER') || u.startsWith('EUGH'))
|
||||
return { cat: CAT_COURT, family: 'Rechtsprechung' }
|
||||
// Default: Gesetz/Verordnung/Richtlinie
|
||||
return { cat: CAT_LAW, family: 'Gesetze & Verordnungen' }
|
||||
}
|
||||
|
||||
export interface CorpusFamilyGroup {
|
||||
family: string
|
||||
total: number
|
||||
docs: CorpusDoc[]
|
||||
}
|
||||
|
||||
export interface CorpusCatGroup {
|
||||
key: string
|
||||
label: string
|
||||
order: number
|
||||
total: number
|
||||
families: CorpusFamilyGroup[]
|
||||
}
|
||||
|
||||
// Group corpus docs by category (ordered: laws → guidance → standards → court),
|
||||
// families within each sorted by size, docs within a family by size. So all DSK
|
||||
// sit together, all EDPB together, all OWASP/NIST together, under headings.
|
||||
export function groupCorpusDocs(docs: CorpusDoc[]): CorpusCatGroup[] {
|
||||
const cats = new Map<string, { cat: DocCat; fam: Map<string, CorpusDoc[]> }>()
|
||||
for (const d of docs) {
|
||||
const { cat, family } = categorizeCorpusDoc(d.source_regulation)
|
||||
if (!cats.has(cat.key)) cats.set(cat.key, { cat, fam: new Map() })
|
||||
const fam = cats.get(cat.key)!.fam
|
||||
if (!fam.has(family)) fam.set(family, [])
|
||||
fam.get(family)!.push(d)
|
||||
}
|
||||
return [...cats.values()]
|
||||
.map(({ cat, fam }) => {
|
||||
const families = [...fam.entries()]
|
||||
.map(([family, ds]) => ({
|
||||
family,
|
||||
docs: [...ds].sort((a, b) => b.atom_count - a.atom_count),
|
||||
total: ds.reduce((s, d) => s + d.atom_count, 0),
|
||||
}))
|
||||
.sort((a, b) => b.total - a.total)
|
||||
return {
|
||||
key: cat.key,
|
||||
label: cat.label,
|
||||
order: cat.order,
|
||||
total: families.reduce((s, f) => s + f.total, 0),
|
||||
families,
|
||||
}
|
||||
})
|
||||
.sort((a, b) => a.order - b.order)
|
||||
}
|
||||
|
||||
export const USE_CASE_GROUP_LABELS: Record<string, string> = {
|
||||
document: 'Dokument-Compliance',
|
||||
security: 'Security',
|
||||
|
||||
Reference in New Issue
Block a user