feat(controls): 5 neue Use Cases + Machinery-Fix + Korpus-/Lizenz-Übersicht
CI / detect-changes (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 11s
CI / validate-canonical-controls (push) Failing after 5s
CI / loc-budget (push) Successful in 22s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / test-go (push) Successful in 1m11s
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m41s
CI / iace-gt-coverage (push) Failing after 5s
CI / test-python-backend (push) Failing after 5s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

- Registry: arbeitsrecht, gesellschaftsrecht, insolvenzrecht, csrd, bafin_it
  + Mapper-Regeln für zuvor ungemappte Quell-Gesetze, Machinery-Guide 2006/42
  -> maschinen. Jetzt 43 Use Cases (Achse 1 / license 1+2 vollständig).
- corpus_overview Service + GET /v1/controls/corpus: Quell-Dokumente mit
  Lizenz-Tier + atom-Count + Use-Case + kuratiertem Lizenz-Katalog.
- list_use_cases trägt atom_classification-Counts (atom_total/atom_relevant).
- Frontend /sdk/coverage: Use-Case-Übersicht + Korpus-Dokumente + Lizenz-Katalog.
- Tests: registry-Mappings (neue Domänen), corpus tier-labels, coverage-helpers.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-14 21:49:22 +02:00
parent 60f988f3cb
commit 00f304fed9
9 changed files with 585 additions and 2 deletions
@@ -16,6 +16,7 @@ from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from compliance.api._http_errors import translate_domain_errors
from compliance.services.corpus_overview import corpus_overview
from compliance.services.use_case_controls import UseCaseControlsService
router = APIRouter(prefix="/v1/controls", tags=["use-case-controls"])
@@ -36,6 +37,15 @@ async def list_use_cases(
return svc.list_use_cases()
@router.get("/corpus")
async def corpus(db: Session = Depends(get_db)) -> dict[str, Any]:
"""Korpus-Übersicht: Quell-Dokumente (source_regulation) mit Lizenz-Tier +
Atom-Count + gemapptem Use Case, plus den kuratierten Lizenz-Katalog
(canonical_control_sources ⋈ licenses) mit Nutzungsrechten."""
with translate_domain_errors():
return corpus_overview(db)
@router.get("/use-cases/{use_case}/controls")
async def controls_for_use_case(
use_case: str,
@@ -214,6 +214,23 @@ _USE_CASES: tuple[UseCase, ...] = (
UseCase("handelsrecht", "Handelsrecht", "document",
regulations=("HGB", "UGB", "ABGB"),
verification_methods=("document", "it_process")),
# ── Arbeits-/Gesellschafts-/Insolvenzrecht + ESG + Finanz-IT ─────
UseCase("arbeitsrecht", "Arbeitsrecht", "document",
regulations=("ArbVG", "AZG", "ArbZG", "MuSchG", "MiLoG",
"NachwG", "AngG", "ArG", "BUrlG"),
verification_methods=("document", "it_process")),
UseCase("gesellschaftsrecht", "Gesellschaftsrecht", "document",
regulations=("AktG", "GmbHG", "OR"),
verification_methods=("document", "it_process")),
UseCase("insolvenzrecht", "Insolvenzrecht", "document",
regulations=("InsO",),
verification_methods=("document", "it_process")),
UseCase("csrd", "Nachhaltigkeitsberichterstattung (CSRD)", "document",
regulations=("CSRD",),
verification_methods=("document", "it_process")),
UseCase("bafin_it", "BaFin IT-Aufsicht (VAIT/BAIT)", "security",
regulations=("VAIT", "BAIT"),
verification_methods=("it_process", "document", "network")),
)
@@ -256,6 +273,7 @@ _REGULATION_RULES: tuple[tuple[str, str], ...] = (
("medizinprodukte", "mdr"),
("(mdr)", "mdr"),
("maschinenverordnung", "maschinen"),
("machinery", "maschinen"),
("batterie", "batterie"),
("health data space", "ehds"),
("produktsicherheit", "produktsicherheit"),
@@ -304,6 +322,25 @@ _REGULATION_RULES: tuple[tuple[str, str], ...] = (
("bao", "steuerrecht"),
("standardvertragsklauseln", "avv"),
("(scc)", "avv"),
# Arbeits-/Gesellschafts-/Insolvenzrecht + ESG + Finanz-IT-Aufsicht
("arbeitsverfassungsgesetz", "arbeitsrecht"),
("arbeitszeitgesetz", "arbeitsrecht"),
("mutterschutzgesetz", "arbeitsrecht"),
("mindestlohngesetz", "arbeitsrecht"),
("nachweisgesetz", "arbeitsrecht"),
("angestelltengesetz", "arbeitsrecht"),
("bundesurlaubsgesetz", "arbeitsrecht"),
("arbeitsgesetz", "arbeitsrecht"),
("aktiengesetz", "gesellschaftsrecht"),
("gmbh", "gesellschaftsrecht"),
("obligationenrecht", "gesellschaftsrecht"),
("insolvenzordnung", "insolvenzrecht"),
("corporate sustainability", "csrd"),
("csrd", "csrd"),
("vait", "bafin_it"),
("bait", "bafin_it"),
("gobd", "steuerrecht"),
("dienstleistungs-informationspflichten", "impressum"),
# Datenschutz-Catch-alls (zuletzt)
("nist privacy framework", "dse"),
("dsgvo", "dse"),
@@ -0,0 +1,93 @@
"""Corpus + license overview — which source documents are in the corpus and
under which license / usage rights. Read-only; backs the admin coverage page so
the team can SEE every use-case and every ingested document with its license
(and not forget any). See use_case_controls for the per-topic retrieval.
"""
from __future__ import annotations
from typing import Any, Optional
from sqlalchemy import text
from sqlalchemy.orm import Session
from compliance.data.use_case_registry import use_case_for_regulation
# canonical_controls.license_rule is a coarse 3-tier flag (the detailed terms
# live in canonical_control_licenses, keyed per curated source).
_TIER: dict[int, str] = {
1: "Öffentlich / frei nutzbar (Public Domain, EU-Recht)",
2: "Offen mit Attribution (CC-BY / CC-BY-SA)",
3: "Eigenformulierung / eingeschränkt",
}
_LIVE = ("cc.decomposition_method = 'pass0b' "
"AND cc.release_state NOT IN ('deprecated', 'duplicate', 'rejected')")
def license_tier_label(rule: Optional[int]) -> str:
"""Human label for the coarse license_rule tier. Pure → unit-testable."""
return _TIER.get(rule or 0, "unbekannt")
def corpus_overview(db: Session) -> dict[str, Any]:
"""Three views for the coverage page: (1) atom counts per license tier,
(2) every source document (source_regulation) with tier + count + mapped
use-case, (3) the curated license catalog with detailed usage rights."""
summary = [
{
"license_rule": int(r[0]) if r[0] is not None else None,
"label": license_tier_label(r[0]),
"atom_count": int(r[1]),
}
for r in db.execute(text(
f"SELECT cc.license_rule, count(*) FROM canonical_controls cc "
f"WHERE {_LIVE} GROUP BY cc.license_rule ORDER BY cc.license_rule"
)).fetchall()
]
documents = [
{
"source_regulation": r.src,
"license_rule": int(r.lic) if r.lic is not None else None,
"license_tier": license_tier_label(r.lic),
"atom_count": int(r.n),
"use_case": use_case_for_regulation(r.src),
}
for r in db.execute(text(
f"SELECT cpl.source_regulation AS src, max(cc.license_rule) AS lic, "
f"count(DISTINCT cc.id) AS n FROM canonical_controls cc "
f"JOIN control_parent_links cpl ON cpl.control_uuid = cc.id "
f"WHERE {_LIVE} AND coalesce(cpl.source_regulation, '') <> '' "
f"GROUP BY cpl.source_regulation ORDER BY n DESC"
)).fetchall()
]
catalog: list[dict[str, Any]] = []
if db.execute(text(
"SELECT to_regclass('compliance.canonical_control_sources')"
)).scalar() is not None:
catalog = [
{
"source_id": r.source_id, "title": r.title,
"publisher": r.publisher, "url": r.url, "version": r.version_label,
"license_id": r.license_id, "license_name": r.license_name,
"commercial_use": r.commercial_use,
"ship_in_product": r.allowed_ship_in_product,
"terms_url": r.terms_url,
}
for r in db.execute(text(
"SELECT s.source_id, s.title, s.publisher, s.url, s.version_label, "
"s.license_id, s.allowed_ship_in_product, l.name AS license_name, "
"l.commercial_use, l.terms_url "
"FROM canonical_control_sources s "
"LEFT JOIN canonical_control_licenses l ON l.license_id = s.license_id "
"ORDER BY s.publisher NULLS LAST, s.title"
)).fetchall()
]
return {
"license_summary": summary,
"documents": documents,
"license_catalog": catalog,
"totals": {"documents": len(documents), "catalog_sources": len(catalog)},
}
@@ -98,7 +98,9 @@ class UseCaseControlsService:
self.db = db
def list_use_cases(self) -> list[dict[str, Any]]:
"""Registry use-cases with their live mapped-control counts."""
"""Registry use-cases with live counts — atom-grain (Haiku classification)
plus the legacy master seed. Backs the coverage overview so every topic is
visible with how many obligations it actually carries."""
counts = {
row[0]: int(row[1])
for row in self.db.execute(text(
@@ -106,6 +108,17 @@ class UseCaseControlsService:
"GROUP BY use_case"
)).fetchall()
}
atom: dict[str, tuple[int, int]] = {}
if self.db.execute(text(
"SELECT to_regclass('compliance.atom_classification')"
)).scalar() is not None:
atom = {
row[0]: (int(row[1]), int(row[2]))
for row in self.db.execute(text(
"SELECT use_case, count(*), count(*) FILTER (WHERE relevant) "
"FROM atom_classification GROUP BY use_case"
)).fetchall()
}
out = [
{
"key": uc.key,
@@ -114,10 +127,13 @@ class UseCaseControlsService:
"regulations": list(uc.regulations),
"verification_methods": list(uc.verification_methods),
"mapped_controls": counts.get(uc.key, 0),
"atom_total": atom.get(uc.key, (0, 0))[0],
"atom_relevant": atom.get(uc.key, (0, 0))[1],
}
for uc in REGISTRY.values() if uc.enabled
]
out.sort(key=lambda x: x["mapped_controls"], reverse=True)
out.sort(key=lambda x: (x["atom_relevant"], x["mapped_controls"]),
reverse=True)
return out
def controls_for_use_case(