From 00f304fed9cc0f81e942224cedb8401e94850e2b Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 14 Jun 2026 21:49:22 +0200 Subject: [PATCH] =?UTF-8?q?feat(controls):=205=20neue=20Use=20Cases=20+=20?= =?UTF-8?q?Machinery-Fix=20+=20Korpus-/Lizenz-=C3=9Cbersicht?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Registry: arbeitsrecht, gesellschaftsrecht, insolvenzrecht, csrd, bafin_it + Mapper-Regeln für zuvor ungemappte Quell-Gesetze, Machinery-Guide 2006/42 -> maschinen. Jetzt 43 Use Cases (Achse 1 / license 1+2 vollständig). - corpus_overview Service + GET /v1/controls/corpus: Quell-Dokumente mit Lizenz-Tier + atom-Count + Use-Case + kuratiertem Lizenz-Katalog. - list_use_cases trägt atom_classification-Counts (atom_total/atom_relevant). - Frontend /sdk/coverage: Use-Case-Übersicht + Korpus-Dokumente + Lizenz-Katalog. - Tests: registry-Mappings (neue Domänen), corpus tier-labels, coverage-helpers. Co-Authored-By: Claude Opus 4.7 --- .../app/sdk/coverage/_helpers.test.ts | 55 +++++ admin-compliance/app/sdk/coverage/_helpers.ts | 105 ++++++++ admin-compliance/app/sdk/coverage/page.tsx | 228 ++++++++++++++++++ .../api/use_case_controls_routes.py | 10 + .../compliance/data/use_case_registry.py | 37 +++ .../compliance/services/corpus_overview.py | 93 +++++++ .../compliance/services/use_case_controls.py | 20 +- .../tests/test_corpus_overview.py | 17 ++ .../tests/test_use_case_registry.py | 22 ++ 9 files changed, 585 insertions(+), 2 deletions(-) create mode 100644 admin-compliance/app/sdk/coverage/_helpers.test.ts create mode 100644 admin-compliance/app/sdk/coverage/_helpers.ts create mode 100644 admin-compliance/app/sdk/coverage/page.tsx create mode 100644 backend-compliance/compliance/services/corpus_overview.py create mode 100644 backend-compliance/tests/test_corpus_overview.py diff --git a/admin-compliance/app/sdk/coverage/_helpers.test.ts b/admin-compliance/app/sdk/coverage/_helpers.test.ts new file mode 100644 index 00000000..5ed648bb --- /dev/null +++ b/admin-compliance/app/sdk/coverage/_helpers.test.ts @@ -0,0 +1,55 @@ +import { describe, it, expect } from 'vitest' +import { + licenseTierBadgeClass, + commercialBadgeClass, + groupUseCases, + type UseCaseRow, +} from './_helpers' + +const uc = (over: Partial): UseCaseRow => ({ + key: 'x', + label: 'X', + group: 'security', + regulations: [], + verification_methods: [], + mapped_controls: 0, + atom_total: 0, + atom_relevant: 0, + ...over, +}) + +describe('coverage helpers', () => { + it('license tier badge classes', () => { + expect(licenseTierBadgeClass(1)).toContain('green') + expect(licenseTierBadgeClass(2)).toContain('blue') + expect(licenseTierBadgeClass(3)).toContain('amber') + expect(licenseTierBadgeClass(null)).toContain('gray') + }) + + it('commercial-use badge classes', () => { + expect(commercialBadgeClass('allowed')).toContain('green') + expect(commercialBadgeClass('restricted')).toContain('amber') + expect(commercialBadgeClass('prohibited')).toContain('red') + expect(commercialBadgeClass(null)).toContain('gray') + }) + + it('groups use-cases in stable order and sorts by relevant desc', () => { + const groups = groupUseCases([ + uc({ key: 'a', group: 'security', atom_relevant: 5 }), + uc({ key: 'b', group: 'security', atom_relevant: 15 }), + uc({ key: 'c', group: 'document', atom_relevant: 1 }), + ]) + expect(groups[0].group).toBe('document') + expect(groups[1].group).toBe('security') + expect(groups[1].rows[0].key).toBe('b') + expect(groups[1].rows[1].key).toBe('a') + }) + + it('appends unknown groups after the known order', () => { + const groups = groupUseCases([ + uc({ key: 'z', group: 'mystery', atom_relevant: 9 }), + uc({ key: 'd', group: 'document', atom_relevant: 2 }), + ]) + expect(groups.map((g) => g.group)).toEqual(['document', 'mystery']) + }) +}) diff --git a/admin-compliance/app/sdk/coverage/_helpers.ts b/admin-compliance/app/sdk/coverage/_helpers.ts new file mode 100644 index 00000000..a04de891 --- /dev/null +++ b/admin-compliance/app/sdk/coverage/_helpers.ts @@ -0,0 +1,105 @@ +// Pure helpers for the Coverage page (#74). Kept separate so they are unit-testable +// without rendering the server component. + +export interface UseCaseRow { + key: string + label: string + group: string + regulations: string[] + verification_methods: string[] + mapped_controls: number + atom_total: number + atom_relevant: number +} + +export interface CorpusDoc { + source_regulation: string + license_rule: number | null + license_tier: string + atom_count: number + use_case: string | null +} + +export interface LicenseSummaryRow { + license_rule: number | null + label: string + atom_count: number +} + +export interface LicenseCatalogEntry { + source_id: string + title: string + publisher: string | null + url: string | null + version: string | null + license_id: string | null + license_name: string | null + commercial_use: string | null + ship_in_product: boolean | null + terms_url: string | null +} + +export interface CorpusOverview { + license_summary: LicenseSummaryRow[] + documents: CorpusDoc[] + license_catalog: LicenseCatalogEntry[] + totals: { documents: number; catalog_sources: number } +} + +export const USE_CASE_GROUP_LABELS: Record = { + document: 'Dokument-Compliance', + security: 'Security', + cross_cutting: 'Querschnitt', + product: 'Produkt / Sektor', +} + +export function licenseTierBadgeClass(rule: number | null): string { + switch (rule) { + case 1: + return 'bg-green-100 text-green-800' + case 2: + return 'bg-blue-100 text-blue-800' + case 3: + return 'bg-amber-100 text-amber-800' + default: + return 'bg-gray-100 text-gray-700' + } +} + +export function commercialBadgeClass(commercial: string | null): string { + switch ((commercial || '').toLowerCase()) { + case 'allowed': + return 'bg-green-100 text-green-800' + case 'restricted': + return 'bg-amber-100 text-amber-800' + case 'prohibited': + return 'bg-red-100 text-red-800' + default: + return 'bg-gray-100 text-gray-700' + } +} + +export interface UseCaseGroup { + group: string + label: string + rows: UseCaseRow[] +} + +// Group use-cases by their registry group (stable order), each group's rows +// sorted by how many relevant obligations it carries (desc). +export function groupUseCases(rows: UseCaseRow[]): UseCaseGroup[] { + const order = ['document', 'security', 'cross_cutting', 'product'] + const by: Record = {} + for (const r of rows) { + ;(by[r.group] ||= []).push(r) + } + const groups = order.filter((g) => by[g]?.length) + for (const g of Object.keys(by)) { + if (!order.includes(g)) groups.push(g) + } + return groups.map((g) => ({ + group: g, + label: USE_CASE_GROUP_LABELS[g] || g, + rows: [...by[g]].sort((a, b) => b.atom_relevant - a.atom_relevant), + })) +} diff --git a/admin-compliance/app/sdk/coverage/page.tsx b/admin-compliance/app/sdk/coverage/page.tsx new file mode 100644 index 00000000..096b9a65 --- /dev/null +++ b/admin-compliance/app/sdk/coverage/page.tsx @@ -0,0 +1,228 @@ +import { + type UseCaseRow, + type CorpusOverview, + licenseTierBadgeClass, + commercialBadgeClass, + groupUseCases, +} from './_helpers' + +const BACKEND_URL = + process.env.COMPLIANCE_BACKEND_URL || 'http://backend-compliance:8002' + +export const dynamic = 'force-dynamic' + +async function getData(): Promise<{ + useCases: UseCaseRow[] + corpus: CorpusOverview | null +}> { + try { + const [ucRes, corpusRes] = await Promise.all([ + fetch(`${BACKEND_URL}/api/compliance/v1/controls/use-cases`, { + cache: 'no-store', + }), + fetch(`${BACKEND_URL}/api/compliance/v1/controls/corpus`, { + cache: 'no-store', + }), + ]) + return { + useCases: ucRes.ok ? await ucRes.json() : [], + corpus: corpusRes.ok ? await corpusRes.json() : null, + } + } catch { + return { useCases: [], corpus: null } + } +} + +function Stat({ label, value }: { label: string; value: string | number }) { + return ( +
+
{value}
+
{label}
+
+ ) +} + +export default async function CoveragePage() { + const { useCases, corpus } = await getData() + const groups = groupUseCases(useCases) + const totalRelevant = useCases.reduce((s, u) => s + u.atom_relevant, 0) + const totalAtoms = useCases.reduce((s, u) => s + u.atom_total, 0) + + return ( +
+
+

+ Compliance-Abdeckung +

+

+ Alle ableitbaren Use Cases und alle Quell-Dokumente im Korpus inkl. + Lizenz — damit kein Thema und keine Quelle vergessen wird. +

+
+ +
+ + + + +
+ + {corpus?.license_summary?.length ? ( +
+

Lizenz-Verteilung

+
+ {corpus.license_summary.map((l) => ( +
+ + Tier {l.license_rule ?? '?'} + + {l.label} + + {l.atom_count.toLocaleString('de-DE')} + +
+ ))} +
+
+ ) : null} + +
+

Use Cases

+ {groups.map((g) => ( +
+

+ {g.label} ({g.rows.length}) +

+
+ + + + + + + + + + + + {g.rows.map((u) => ( + + + + + + + + ))} + +
Use CaseKeyrelevantklassifiziertQuellen
{u.label}{u.key} + {u.atom_relevant.toLocaleString('de-DE')} + + {u.atom_total.toLocaleString('de-DE')} + + {u.regulations.slice(0, 4).join(', ')} +
+
+
+ ))} +
+ +
+

+ Korpus-Dokumente ({corpus?.documents.length ?? 0}) +

+

+ Quell-Regulierung × Lizenz-Tier × Anzahl Pflichten × gemappter Use Case. +

+
+ + + + + + + + + + + {(corpus?.documents ?? []).map((d) => ( + + + + + + + ))} + +
Dokument / QuelleLizenzPflichtenUse Case
{d.source_regulation} + + Tier {d.license_rule ?? '?'} + + {d.atom_count.toLocaleString('de-DE')} + {d.use_case ?? — ungemappt} +
+
+
+ + {corpus?.license_catalog?.length ? ( +
+

+ Lizenz-Katalog ({corpus.license_catalog.length} kuratierte Quellen) +

+

+ Detaillierte Nutzungsrechte je kuratierter Quelle (kommerzielle + Nutzung, Auslieferung im Produkt). +

+
+ + + + + + + + + + + + {corpus.license_catalog.map((c) => ( + + + + + + + + ))} + +
QuelleHerausgeberLizenzkommerziellim Produkt
+ {c.terms_url ? ( + + {c.title} + + ) : ( + c.title + )} + {c.publisher ?? '—'}{c.license_name ?? c.license_id ?? '—'} + + {c.commercial_use ?? 'unbekannt'} + + + {c.ship_in_product ? 'ja' : 'nein'} +
+
+
+ ) : null} +
+ ) +} diff --git a/backend-compliance/compliance/api/use_case_controls_routes.py b/backend-compliance/compliance/api/use_case_controls_routes.py index 1fef4411..f9d864a8 100644 --- a/backend-compliance/compliance/api/use_case_controls_routes.py +++ b/backend-compliance/compliance/api/use_case_controls_routes.py @@ -16,6 +16,7 @@ from sqlalchemy.orm import Session from classroom_engine.database import get_db from compliance.api._http_errors import translate_domain_errors +from compliance.services.corpus_overview import corpus_overview from compliance.services.use_case_controls import UseCaseControlsService router = APIRouter(prefix="/v1/controls", tags=["use-case-controls"]) @@ -36,6 +37,15 @@ async def list_use_cases( return svc.list_use_cases() +@router.get("/corpus") +async def corpus(db: Session = Depends(get_db)) -> dict[str, Any]: + """Korpus-Übersicht: Quell-Dokumente (source_regulation) mit Lizenz-Tier + + Atom-Count + gemapptem Use Case, plus den kuratierten Lizenz-Katalog + (canonical_control_sources ⋈ licenses) mit Nutzungsrechten.""" + with translate_domain_errors(): + return corpus_overview(db) + + @router.get("/use-cases/{use_case}/controls") async def controls_for_use_case( use_case: str, diff --git a/backend-compliance/compliance/data/use_case_registry.py b/backend-compliance/compliance/data/use_case_registry.py index 80bdffc5..6778a52e 100644 --- a/backend-compliance/compliance/data/use_case_registry.py +++ b/backend-compliance/compliance/data/use_case_registry.py @@ -214,6 +214,23 @@ _USE_CASES: tuple[UseCase, ...] = ( UseCase("handelsrecht", "Handelsrecht", "document", regulations=("HGB", "UGB", "ABGB"), verification_methods=("document", "it_process")), + # ── Arbeits-/Gesellschafts-/Insolvenzrecht + ESG + Finanz-IT ───── + UseCase("arbeitsrecht", "Arbeitsrecht", "document", + regulations=("ArbVG", "AZG", "ArbZG", "MuSchG", "MiLoG", + "NachwG", "AngG", "ArG", "BUrlG"), + verification_methods=("document", "it_process")), + UseCase("gesellschaftsrecht", "Gesellschaftsrecht", "document", + regulations=("AktG", "GmbHG", "OR"), + verification_methods=("document", "it_process")), + UseCase("insolvenzrecht", "Insolvenzrecht", "document", + regulations=("InsO",), + verification_methods=("document", "it_process")), + UseCase("csrd", "Nachhaltigkeitsberichterstattung (CSRD)", "document", + regulations=("CSRD",), + verification_methods=("document", "it_process")), + UseCase("bafin_it", "BaFin IT-Aufsicht (VAIT/BAIT)", "security", + regulations=("VAIT", "BAIT"), + verification_methods=("it_process", "document", "network")), ) @@ -256,6 +273,7 @@ _REGULATION_RULES: tuple[tuple[str, str], ...] = ( ("medizinprodukte", "mdr"), ("(mdr)", "mdr"), ("maschinenverordnung", "maschinen"), + ("machinery", "maschinen"), ("batterie", "batterie"), ("health data space", "ehds"), ("produktsicherheit", "produktsicherheit"), @@ -304,6 +322,25 @@ _REGULATION_RULES: tuple[tuple[str, str], ...] = ( ("bao", "steuerrecht"), ("standardvertragsklauseln", "avv"), ("(scc)", "avv"), + # Arbeits-/Gesellschafts-/Insolvenzrecht + ESG + Finanz-IT-Aufsicht + ("arbeitsverfassungsgesetz", "arbeitsrecht"), + ("arbeitszeitgesetz", "arbeitsrecht"), + ("mutterschutzgesetz", "arbeitsrecht"), + ("mindestlohngesetz", "arbeitsrecht"), + ("nachweisgesetz", "arbeitsrecht"), + ("angestelltengesetz", "arbeitsrecht"), + ("bundesurlaubsgesetz", "arbeitsrecht"), + ("arbeitsgesetz", "arbeitsrecht"), + ("aktiengesetz", "gesellschaftsrecht"), + ("gmbh", "gesellschaftsrecht"), + ("obligationenrecht", "gesellschaftsrecht"), + ("insolvenzordnung", "insolvenzrecht"), + ("corporate sustainability", "csrd"), + ("csrd", "csrd"), + ("vait", "bafin_it"), + ("bait", "bafin_it"), + ("gobd", "steuerrecht"), + ("dienstleistungs-informationspflichten", "impressum"), # Datenschutz-Catch-alls (zuletzt) ("nist privacy framework", "dse"), ("dsgvo", "dse"), diff --git a/backend-compliance/compliance/services/corpus_overview.py b/backend-compliance/compliance/services/corpus_overview.py new file mode 100644 index 00000000..080aaba2 --- /dev/null +++ b/backend-compliance/compliance/services/corpus_overview.py @@ -0,0 +1,93 @@ +"""Corpus + license overview — which source documents are in the corpus and +under which license / usage rights. Read-only; backs the admin coverage page so +the team can SEE every use-case and every ingested document with its license +(and not forget any). See use_case_controls for the per-topic retrieval. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from sqlalchemy import text +from sqlalchemy.orm import Session + +from compliance.data.use_case_registry import use_case_for_regulation + +# canonical_controls.license_rule is a coarse 3-tier flag (the detailed terms +# live in canonical_control_licenses, keyed per curated source). +_TIER: dict[int, str] = { + 1: "Öffentlich / frei nutzbar (Public Domain, EU-Recht)", + 2: "Offen mit Attribution (CC-BY / CC-BY-SA)", + 3: "Eigenformulierung / eingeschränkt", +} +_LIVE = ("cc.decomposition_method = 'pass0b' " + "AND cc.release_state NOT IN ('deprecated', 'duplicate', 'rejected')") + + +def license_tier_label(rule: Optional[int]) -> str: + """Human label for the coarse license_rule tier. Pure → unit-testable.""" + return _TIER.get(rule or 0, "unbekannt") + + +def corpus_overview(db: Session) -> dict[str, Any]: + """Three views for the coverage page: (1) atom counts per license tier, + (2) every source document (source_regulation) with tier + count + mapped + use-case, (3) the curated license catalog with detailed usage rights.""" + summary = [ + { + "license_rule": int(r[0]) if r[0] is not None else None, + "label": license_tier_label(r[0]), + "atom_count": int(r[1]), + } + for r in db.execute(text( + f"SELECT cc.license_rule, count(*) FROM canonical_controls cc " + f"WHERE {_LIVE} GROUP BY cc.license_rule ORDER BY cc.license_rule" + )).fetchall() + ] + + documents = [ + { + "source_regulation": r.src, + "license_rule": int(r.lic) if r.lic is not None else None, + "license_tier": license_tier_label(r.lic), + "atom_count": int(r.n), + "use_case": use_case_for_regulation(r.src), + } + for r in db.execute(text( + f"SELECT cpl.source_regulation AS src, max(cc.license_rule) AS lic, " + f"count(DISTINCT cc.id) AS n FROM canonical_controls cc " + f"JOIN control_parent_links cpl ON cpl.control_uuid = cc.id " + f"WHERE {_LIVE} AND coalesce(cpl.source_regulation, '') <> '' " + f"GROUP BY cpl.source_regulation ORDER BY n DESC" + )).fetchall() + ] + + catalog: list[dict[str, Any]] = [] + if db.execute(text( + "SELECT to_regclass('compliance.canonical_control_sources')" + )).scalar() is not None: + catalog = [ + { + "source_id": r.source_id, "title": r.title, + "publisher": r.publisher, "url": r.url, "version": r.version_label, + "license_id": r.license_id, "license_name": r.license_name, + "commercial_use": r.commercial_use, + "ship_in_product": r.allowed_ship_in_product, + "terms_url": r.terms_url, + } + for r in db.execute(text( + "SELECT s.source_id, s.title, s.publisher, s.url, s.version_label, " + "s.license_id, s.allowed_ship_in_product, l.name AS license_name, " + "l.commercial_use, l.terms_url " + "FROM canonical_control_sources s " + "LEFT JOIN canonical_control_licenses l ON l.license_id = s.license_id " + "ORDER BY s.publisher NULLS LAST, s.title" + )).fetchall() + ] + + return { + "license_summary": summary, + "documents": documents, + "license_catalog": catalog, + "totals": {"documents": len(documents), "catalog_sources": len(catalog)}, + } diff --git a/backend-compliance/compliance/services/use_case_controls.py b/backend-compliance/compliance/services/use_case_controls.py index 32e21bb4..6db8efb9 100644 --- a/backend-compliance/compliance/services/use_case_controls.py +++ b/backend-compliance/compliance/services/use_case_controls.py @@ -98,7 +98,9 @@ class UseCaseControlsService: self.db = db def list_use_cases(self) -> list[dict[str, Any]]: - """Registry use-cases with their live mapped-control counts.""" + """Registry use-cases with live counts — atom-grain (Haiku classification) + plus the legacy master seed. Backs the coverage overview so every topic is + visible with how many obligations it actually carries.""" counts = { row[0]: int(row[1]) for row in self.db.execute(text( @@ -106,6 +108,17 @@ class UseCaseControlsService: "GROUP BY use_case" )).fetchall() } + atom: dict[str, tuple[int, int]] = {} + if self.db.execute(text( + "SELECT to_regclass('compliance.atom_classification')" + )).scalar() is not None: + atom = { + row[0]: (int(row[1]), int(row[2])) + for row in self.db.execute(text( + "SELECT use_case, count(*), count(*) FILTER (WHERE relevant) " + "FROM atom_classification GROUP BY use_case" + )).fetchall() + } out = [ { "key": uc.key, @@ -114,10 +127,13 @@ class UseCaseControlsService: "regulations": list(uc.regulations), "verification_methods": list(uc.verification_methods), "mapped_controls": counts.get(uc.key, 0), + "atom_total": atom.get(uc.key, (0, 0))[0], + "atom_relevant": atom.get(uc.key, (0, 0))[1], } for uc in REGISTRY.values() if uc.enabled ] - out.sort(key=lambda x: x["mapped_controls"], reverse=True) + out.sort(key=lambda x: (x["atom_relevant"], x["mapped_controls"]), + reverse=True) return out def controls_for_use_case( diff --git a/backend-compliance/tests/test_corpus_overview.py b/backend-compliance/tests/test_corpus_overview.py new file mode 100644 index 00000000..6d5d53fd --- /dev/null +++ b/backend-compliance/tests/test_corpus_overview.py @@ -0,0 +1,17 @@ +"""Tests fuer die Korpus-/Lizenz-Uebersicht (#74).""" + +from __future__ import annotations + +from compliance.services.corpus_overview import license_tier_label + + +def test_license_tier_labels(): + assert "frei nutzbar" in license_tier_label(1) + assert "Attribution" in license_tier_label(2) + assert "Eigenformulierung" in license_tier_label(3) + + +def test_license_tier_label_unknown_safe(): + assert license_tier_label(None) == "unbekannt" + assert license_tier_label(0) == "unbekannt" + assert license_tier_label(99) == "unbekannt" diff --git a/backend-compliance/tests/test_use_case_registry.py b/backend-compliance/tests/test_use_case_registry.py index 6d4f5e7c..1b0cb1a2 100644 --- a/backend-compliance/tests/test_use_case_registry.py +++ b/backend-compliance/tests/test_use_case_registry.py @@ -132,6 +132,28 @@ def test_regulation_mapper_known(): assert reg.use_case_for_regulation(reg_str) == expected, reg_str +def test_regulation_mapper_new_domains(): + # 2026-06-14: zuvor ungemappte Quell-Gesetze -> neue Use Cases + Fixes. + cases = { + "Arbeitsverfassungsgesetz (ArbVG)": "arbeitsrecht", + "Arbeitszeitgesetz (AZG)": "arbeitsrecht", + "Mutterschutzgesetz (MuSchG)": "arbeitsrecht", + "Mindestlohngesetz (MiLoG)": "arbeitsrecht", + "Arbeitsgesetz (ArG)": "arbeitsrecht", + "Aktiengesetz (AktG)": "gesellschaftsrecht", + "GmbH-Gesetz (GmbHG)": "gesellschaftsrecht", + "Obligationenrecht (OR)": "gesellschaftsrecht", + "Insolvenzordnung (InsO)": "insolvenzrecht", + "Corporate Sustainability Reporting Directive (CSRD)": "csrd", + "VAIT (BaFin 2022)": "bafin_it", + "BAIT (BaFin 2024)": "bafin_it", + "EU Machinery Guide 2006/42": "maschinen", + "GoBD (BMF-Schreiben 2025)": "steuerrecht", + } + for reg_str, expected in cases.items(): + assert reg.use_case_for_regulation(reg_str) == expected, reg_str + + def test_regulation_mapper_impressum_misroutes_fixed(): # Phase A: Telekom-/Datenschutz-/Gewerbe-Gesetze duerfen NICHT mehr als # Impressum durchgehen (Korpus enthaelt kein echtes Impressumsrecht ausser