diff --git a/admin-compliance/app/sdk/licenses/page.tsx b/admin-compliance/app/sdk/licenses/page.tsx new file mode 100644 index 00000000..f3a26572 --- /dev/null +++ b/admin-compliance/app/sdk/licenses/page.tsx @@ -0,0 +1,160 @@ +'use client' + +import { useEffect, useState } from 'react' + +// Stufe 1 of the Attribution Renderer (Task #23): the global +// "Quellen & Lizenzen" overview. Aggregates all 314k canonical_controls +// by their license_rule and shows the source regulations behind each +// bucket. Drives the footer link and gives auditors a one-page view of +// what licence classes the platform is operating under. + +type SourceCount = { + regulation_id: string + regulation_name_de: string | null + license_rule: number + license_type: string | null + attribution: string | null + jurisdiction: string | null + source_type: string | null + n_controls: number +} + +type RuleBucket = { + rule: number + label_de: string + label_en: string + attribution_required: boolean + render_full_text: boolean + total_controls: number + distinct_sources: number + sources: SourceCount[] +} + +type Overview = { + total_controls: number + buckets: RuleBucket[] +} + +const RULE_COLOR: Record = { + 1: 'border-emerald-200 bg-emerald-50', + 2: 'border-amber-200 bg-amber-50', + 3: 'border-slate-200 bg-slate-50', +} + +const RULE_BADGE: Record = { + 1: 'bg-emerald-600 text-white', + 2: 'bg-amber-600 text-white', + 3: 'bg-slate-600 text-white', +} + +export default function LicensesPage() { + const [data, setData] = useState(null) + const [error, setError] = useState(null) + + useEffect(() => { + fetch('/api/sdk/v1/compliance/licenses/overview') + .then((r) => (r.ok ? r.json() : Promise.reject(`HTTP ${r.status}`))) + .then(setData) + .catch((e) => setError(String(e))) + }, []) + + if (error) { + return ( +
+

Quellen & Lizenzen

+

Fehler beim Laden: {error}

+
+ ) + } + if (!data) { + return ( +
+

Quellen & Lizenzen

+

Lade …

+
+ ) + } + + return ( +
+
+

Quellen & Lizenzen

+

+ Diese Plattform stützt sich auf {data.total_controls.toLocaleString('de-DE')}{' '} + klassifizierte Compliance-Controls aus den unten genannten Quellen. + Jeder Control trägt eine deterministische Lizenzregel (R1–R3), die das + Render-Verhalten in Berichten und im Frontend steuert. +

+
+ +
+

Klassifizierungs-Schema

+
+ {data.buckets.map((b) => ( +
+
+ + R{b.rule} + + {b.label_de} +
+
    +
  • {b.total_controls.toLocaleString('de-DE')} Controls
  • +
  • {b.distinct_sources} Quellen
  • +
  • {b.render_full_text ? 'Volltext-Anzeige erlaubt' : 'Nur Identifier-Verweis'}
  • +
  • {b.attribution_required ? 'Attribution-Pflicht in Output' : 'keine Attribution-Pflicht'}
  • +
+
+ ))} +
+
+ + {data.buckets.map((b) => ( +
+

+ + R{b.rule} + + {b.label_de}{' '} + + ({b.total_controls.toLocaleString('de-DE')} Controls aus {b.distinct_sources} Quellen) + +

+ +
+ + + + + + + + + + + + {b.sources.map((s) => ( + + + + + + + + ))} + +
QuelleLizenztypRechtsraumAttributionControls
{s.regulation_name_de ?? s.regulation_id}{s.license_type ?? '—'}{s.jurisdiction ?? '—'}{s.attribution ?? '—'}{s.n_controls.toLocaleString('de-DE')}
+
+
+ ))} + +
+ Klassifizierung: deterministisch über parent_control_uuid-Vererbung, + control_parent_links → regulation_registry, source_citation, + canonical_processed_chunks (Pipeline-Ground-Truth) und LLM-Aggregat- + Identifikation für eigene Werke. Audit-Skripte unter + breakpilot-core/control-pipeline/scripts/. +
+
+ ) +} diff --git a/admin-compliance/components/sdk/SourceBadge.tsx b/admin-compliance/components/sdk/SourceBadge.tsx new file mode 100644 index 00000000..379bcbd1 --- /dev/null +++ b/admin-compliance/components/sdk/SourceBadge.tsx @@ -0,0 +1,138 @@ +'use client' + +import { useEffect, useState } from 'react' + +// Stufe 3 of the Attribution Renderer (Task #23): an inline source +// badge that any rendered control/hazard/measure can attach to itself. +// +// Visually a small license-rule pill (R1/R2/R3); on hover/click it +// reveals the underlying regulation, license type, and — for Rule 2 — +// the mandatory attribution string. +// +// Usage: +// +// +// The component lazily fetches /licenses/source-info/{uuid} on first +// expand so the surrounding list view stays cheap. + +type SourceInfo = { + control_uuid: string + license_rule: number | null + license_label_de: string | null + attribution_required: boolean + render_full_text: boolean + regulation_id: string | null + regulation_name_de: string | null + license_type: string | null + attribution: string | null + source_url: string | null +} + +const RULE_BADGE: Record = { + 1: 'bg-emerald-100 text-emerald-800 border-emerald-300', + 2: 'bg-amber-100 text-amber-800 border-amber-300', + 3: 'bg-slate-100 text-slate-700 border-slate-300', +} + +const RULE_TITLE: Record = { + 1: 'R1 — wörtlich übernehmbar', + 2: 'R2 — wörtlich mit Attribution', + 3: 'R3 — nur Identifier zitieren', +} + +interface SourceBadgeProps { + controlUuid: string + /** Optional: skip the fetch and render from already-known data. */ + prefetched?: SourceInfo + /** Compact mode for tight UI rows (smaller pill). */ + compact?: boolean +} + +export function SourceBadge({ controlUuid, prefetched, compact }: SourceBadgeProps) { + const [data, setData] = useState(prefetched ?? null) + const [open, setOpen] = useState(false) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + + useEffect(() => { + if (!open || data) return + setLoading(true) + fetch(`/api/sdk/v1/compliance/licenses/source-info/${controlUuid}`) + .then((r) => (r.ok ? r.json() : Promise.reject(`HTTP ${r.status}`))) + .then(setData) + .catch((e) => setError(String(e))) + .finally(() => setLoading(false)) + }, [open, data, controlUuid]) + + const rule = data?.license_rule ?? prefetched?.license_rule ?? null + const badgeClass = rule ? RULE_BADGE[rule] ?? RULE_BADGE[3] : 'bg-slate-100 text-slate-500 border-slate-200' + const sizeClass = compact ? 'text-[10px] px-1.5 py-0.5' : 'text-xs px-2 py-0.5' + + return ( + + + + {open && ( +
+ {loading &&

Lade Quellen-Info…

} + {error &&

Fehler: {error}

} + {data && ( +
+
+ {data.license_label_de ?? 'Lizenz unbekannt'} +
+ {data.regulation_name_de && ( +
+ Quelle:{' '} + {data.regulation_name_de} +
+ )} + {data.license_type && ( +
+ Lizenztyp:{' '} + {data.license_type} +
+ )} + {data.attribution && ( +
+
+ Attribution-Pflicht +
+
{data.attribution}
+
+ )} + {!data.render_full_text && ( +
+ Volltext wird im Output nicht gerendert — nur Identifier-Verweis. +
+ )} + {data.source_url && ( + + Originalquelle öffnen ↗ + + )} +
+ )} +
+ )} +
+ ) +} + +export default SourceBadge diff --git a/backend-compliance/compliance/api/__init__.py b/backend-compliance/compliance/api/__init__.py index 43e6e077..b7129161 100644 --- a/backend-compliance/compliance/api/__init__.py +++ b/backend-compliance/compliance/api/__init__.py @@ -72,6 +72,7 @@ _ROUTER_MODULES = [ "whistleblower_routes", "tcf_routes", "founding_wizard_routes", + "licenses_routes", ] _loaded_count = 0 diff --git a/backend-compliance/compliance/api/licenses_routes.py b/backend-compliance/compliance/api/licenses_routes.py new file mode 100644 index 00000000..4f3c6bb6 --- /dev/null +++ b/backend-compliance/compliance/api/licenses_routes.py @@ -0,0 +1,306 @@ +"""License attribution endpoints — Task #23 Stufe 1-4. + +The audit (Task #22) classified all 314,811 canonical_controls into +license_rule 1/2/3. The frontend, PDF renderer, and tech-file generator +now need to surface that classification in the form of: + +- Stufe 1: a global /licenses overview page +- Stufe 2: an auto-footer in every exported PDF +- Stufe 3: an inline source badge on every rendered hazard/measure +- Stufe 4: a sources appendix in tech-file bundles + +This module exposes three endpoints that all four stages consume: + + GET /api/compliance/licenses/overview + Global aggregation by rule + per-source counts. Drives Stufe 1. + + POST /api/compliance/licenses/aggregate + Body: {"control_uuids": ["uuid1", ...]}. + Returns per-rule grouping with source breakdown. Used by PDF + footer (Stufe 2) and tech-file appendix (Stufe 4) to build the + "sources used in this document" list. + + GET /api/compliance/licenses/source-info/{control_uuid} + Single-control lookup for the inline source badge tooltip + (Stufe 3). Returns rule, source regulation, attribution text. + +Why a new module instead of extending canonical_control_routes: +- canonical_control_routes serves the legacy SPDX-style license matrix + (canonical_control_licenses + canonical_control_sources, ~10 rows). +- This module is built on regulation_registry (252 rows) + the + license_rule on each control. Both schemas coexist; this module + doesn't disturb the legacy endpoints. +""" + +from __future__ import annotations + +import logging +from typing import Any, Optional +from uuid import UUID + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from sqlalchemy import text +from sqlalchemy.orm import Session + +from classroom_engine.database import get_db + +router = APIRouter(prefix="/licenses", tags=["licenses"]) +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Rule labels — used by frontend renderer +# ============================================================================ + +RULE_LABELS = { + 1: { + "code": "R1", + "label_de": "Wörtlich übernehmbar", + "label_en": "Verbatim, no attribution required", + "render_full_text": True, + "attribution_required": False, + }, + 2: { + "code": "R2", + "label_de": "Wörtlich mit Attribution", + "label_en": "Verbatim with attribution", + "render_full_text": True, + "attribution_required": True, + }, + 3: { + "code": "R3", + "label_de": "Nur Identifier zitieren", + "label_en": "Identifier citation only", + "render_full_text": False, + "attribution_required": False, + }, +} + + +# ============================================================================ +# Response Schemas +# ============================================================================ + + +class SourceCount(BaseModel): + regulation_id: str + regulation_name_de: Optional[str] + license_rule: int + license_type: Optional[str] + attribution: Optional[str] + jurisdiction: Optional[str] + source_type: Optional[str] + n_controls: int + + +class RuleBucket(BaseModel): + rule: int + label_de: str + label_en: str + attribution_required: bool + render_full_text: bool + total_controls: int + distinct_sources: int + sources: list[SourceCount] + + +class OverviewResponse(BaseModel): + total_controls: int + buckets: list[RuleBucket] + + +class AggregateRequest(BaseModel): + control_uuids: list[UUID] + + +class AggregateResponse(BaseModel): + total_in_request: int + matched: int + buckets: list[RuleBucket] + + +class SourceInfo(BaseModel): + control_uuid: UUID + license_rule: Optional[int] + license_label_de: Optional[str] + attribution_required: bool + render_full_text: bool + regulation_id: Optional[str] + regulation_name_de: Optional[str] + license_type: Optional[str] + attribution: Optional[str] + source_url: Optional[str] + + +# ============================================================================ +# Endpoints +# ============================================================================ + + +def _bucket(rule: int, sources: list[SourceCount]) -> RuleBucket: + meta = RULE_LABELS.get(rule, RULE_LABELS[3]) + return RuleBucket( + rule=rule, + label_de=meta["label_de"], + label_en=meta["label_en"], + attribution_required=meta["attribution_required"], + render_full_text=meta["render_full_text"], + total_controls=sum(s.n_controls for s in sources), + distinct_sources=len(sources), + sources=sources, + ) + + +@router.get("/overview", response_model=OverviewResponse) +def licenses_overview(db: Session = Depends(get_db)) -> OverviewResponse: + """Global aggregation: total controls by rule, with per-source breakdown. + + Drives Stufe 1 (the /licenses page). + """ + rows = db.execute(text(""" + SELECT + COALESCE(cpl.source_regulation, '(no source)') AS regulation_name, + cc.license_rule, + COUNT(DISTINCT cc.id) AS n + FROM compliance.canonical_controls cc + LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id + WHERE cc.license_rule IS NOT NULL + GROUP BY 1, 2 + """)).fetchall() + + reg_rows = db.execute(text(""" + SELECT regulation_name_de, regulation_id, license_type, attribution, + jurisdiction, source_type + FROM compliance.regulation_registry + """)).fetchall() + reg_by_name = {r.regulation_name_de: r for r in reg_rows if r.regulation_name_de} + + by_rule: dict[int, list[SourceCount]] = {1: [], 2: [], 3: []} + seen: dict[tuple[int, str], int] = {} + total = 0 + for row in rows: + rule = int(row.license_rule) + name = row.regulation_name + n = int(row.n) + key = (rule, name) + # multiple cpl entries per control deduplicate via DISTINCT, but a + # control with several source_regulations still gets counted once + # per regulation — that's the design. + seen[key] = seen.get(key, 0) + n + total += n + + for (rule, name), n in seen.items(): + reg = reg_by_name.get(name) + by_rule.setdefault(rule, []).append(SourceCount( + regulation_id=reg.regulation_id if reg else name, + regulation_name_de=name, + license_rule=rule, + license_type=reg.license_type if reg else None, + attribution=reg.attribution if reg else None, + jurisdiction=reg.jurisdiction if reg else None, + source_type=reg.source_type if reg else None, + n_controls=n, + )) + + for r in by_rule.values(): + r.sort(key=lambda s: -s.n_controls) + buckets = [_bucket(rule, sources) for rule, sources in sorted(by_rule.items())] + return OverviewResponse(total_controls=total, buckets=buckets) + + +@router.post("/aggregate", response_model=AggregateResponse) +def aggregate_for_controls( + body: AggregateRequest, + db: Session = Depends(get_db), +) -> AggregateResponse: + """Per-control license aggregation for PDF footer (Stufe 2) and + tech-file sources appendix (Stufe 4). + + Returns a per-rule breakdown of which sources contributed to the + supplied control set. The frontend renderer turns this into the + "Verwendete Quellen" footer. + """ + if not body.control_uuids: + return AggregateResponse(total_in_request=0, matched=0, buckets=[]) + + rows = db.execute(text(""" + SELECT + COALESCE(cpl.source_regulation, '(unknown)') AS regulation_name, + cc.license_rule, + COUNT(DISTINCT cc.id) AS n + FROM compliance.canonical_controls cc + LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id + WHERE cc.id = ANY(:ids) AND cc.license_rule IS NOT NULL + GROUP BY 1, 2 + """), {"ids": [str(u) for u in body.control_uuids]}).fetchall() + + reg_rows = db.execute(text(""" + SELECT regulation_name_de, regulation_id, license_type, attribution, + jurisdiction, source_type + FROM compliance.regulation_registry + """)).fetchall() + reg_by_name = {r.regulation_name_de: r for r in reg_rows if r.regulation_name_de} + + by_rule: dict[int, list[SourceCount]] = {1: [], 2: [], 3: []} + matched_total = 0 + for row in rows: + rule = int(row.license_rule) + n = int(row.n) + matched_total += n + reg = reg_by_name.get(row.regulation_name) + by_rule.setdefault(rule, []).append(SourceCount( + regulation_id=reg.regulation_id if reg else row.regulation_name, + regulation_name_de=row.regulation_name, + license_rule=rule, + license_type=reg.license_type if reg else None, + attribution=reg.attribution if reg else None, + jurisdiction=reg.jurisdiction if reg else None, + source_type=reg.source_type if reg else None, + n_controls=n, + )) + for r in by_rule.values(): + r.sort(key=lambda s: -s.n_controls) + buckets = [_bucket(rule, sources) for rule, sources in sorted(by_rule.items()) if sources] + return AggregateResponse( + total_in_request=len(body.control_uuids), + matched=matched_total, + buckets=buckets, + ) + + +@router.get("/source-info/{control_uuid}", response_model=SourceInfo) +def source_info_for_control( + control_uuid: UUID, + db: Session = Depends(get_db), +) -> SourceInfo: + """Single-control source info for the inline source badge (Stufe 3). + + Used by the React `` component to populate its tooltip. + """ + row = db.execute(text(""" + SELECT cc.license_rule, cpl.source_regulation AS regulation_name, + r.regulation_id, r.license_type, r.attribution, r.url AS source_url + FROM compliance.canonical_controls cc + LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id + LEFT JOIN compliance.regulation_registry r ON r.regulation_name_de = cpl.source_regulation + WHERE cc.id = :uuid + LIMIT 1 + """), {"uuid": str(control_uuid)}).fetchone() + if row is None: + raise HTTPException(status_code=404, detail="control not found") + + rule = int(row.license_rule) if row.license_rule is not None else None + meta = RULE_LABELS.get(rule, {}) if rule else {} + return SourceInfo( + control_uuid=control_uuid, + license_rule=rule, + license_label_de=meta.get("label_de"), + attribution_required=meta.get("attribution_required", False), + render_full_text=meta.get("render_full_text", False), + regulation_id=row.regulation_id, + regulation_name_de=row.regulation_name, + license_type=row.license_type, + attribution=row.attribution, + source_url=row.source_url, + )