From b6e6ffaaee7b49e3a1a37f57ab241587cb345f9e Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 14 Mar 2026 07:55:22 +0100 Subject: [PATCH] feat: add verification method, categories, and dedup UI to control library - Migration 047: verification_method + category columns, 17 category lookup table - Backend: new filters, GET /categories, GET /controls/{id}/similar (embedding-based) - Frontend: filter dropdowns, badges, dedup UI in ControlDetail with merge workflow - ControlForm: verification method + category selects - Provenance: verification methods, categories, master library strategy sections - Fix UUID cast syntax in generator routes (::uuid -> CAST) Co-Authored-By: Claude Opus 4.6 --- .../app/api/sdk/v1/canonical/route.ts | 18 ++ .../components/ControlDetail.tsx | 135 ++++++++++- .../components/ControlForm.tsx | 33 ++- .../control-library/components/helpers.tsx | 48 ++++ .../app/sdk/control-library/page.tsx | 32 ++- .../app/sdk/control-provenance/page.tsx | 98 ++++++++ .../api/canonical_control_routes.py | 211 +++++++++++++++--- .../api/control_generator_routes.py | 2 +- .../compliance/services/control_generator.py | 4 +- .../047_verification_method_category.sql | 40 ++++ 10 files changed, 577 insertions(+), 44 deletions(-) create mode 100644 backend-compliance/migrations/047_verification_method_category.sql diff --git a/admin-compliance/app/api/sdk/v1/canonical/route.ts b/admin-compliance/app/api/sdk/v1/canonical/route.ts index 2a79300..83ed5ad 100644 --- a/admin-compliance/app/api/sdk/v1/canonical/route.ts +++ b/admin-compliance/app/api/sdk/v1/canonical/route.ts @@ -27,9 +27,13 @@ export async function GET(request: NextRequest) { case 'controls': { const severity = searchParams.get('severity') const domain = searchParams.get('domain') + const verificationMethod = searchParams.get('verification_method') + const categoryFilter = searchParams.get('category') const params = new URLSearchParams() if (severity) params.set('severity', severity) if (domain) params.set('domain', domain) + if (verificationMethod) params.set('verification_method', verificationMethod) + if (categoryFilter) params.set('category', categoryFilter) const qs = params.toString() backendPath = `/api/compliance/v1/canonical/controls${qs ? `?${qs}` : ''}` break @@ -76,6 +80,20 @@ export async function GET(request: NextRequest) { backendPath = '/api/compliance/v1/canonical/generate/processed-stats' break + case 'categories': + backendPath = '/api/compliance/v1/canonical/categories' + break + + case 'similar': { + const simControlId = searchParams.get('id') + if (!simControlId) { + return NextResponse.json({ error: 'Missing control id' }, { status: 400 }) + } + const simThreshold = searchParams.get('threshold') || '0.85' + backendPath = `/api/compliance/v1/canonical/controls/${encodeURIComponent(simControlId)}/similar?threshold=${simThreshold}` + break + } + case 'blocked-sources': backendPath = '/api/compliance/v1/canonical/blocked-sources' break diff --git a/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx b/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx index df8d9c1..2f8518b 100644 --- a/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx +++ b/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx @@ -1,11 +1,28 @@ 'use client' +import { useState, useEffect } from 'react' import { ArrowLeft, ExternalLink, BookOpen, Scale, FileText, Eye, CheckCircle2, Trash2, Pencil, Clock, - ChevronLeft, SkipForward, + ChevronLeft, SkipForward, GitMerge, Search, } from 'lucide-react' -import { CanonicalControl, EFFORT_LABELS, SeverityBadge, StateBadge, LicenseRuleBadge } from './helpers' +import { + CanonicalControl, EFFORT_LABELS, BACKEND_URL, + SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, + VERIFICATION_METHODS, CATEGORY_OPTIONS, +} from './helpers' + +interface SimilarControl { + control_id: string + title: string + severity: string + release_state: string + tags: string[] + license_rule: number | null + verification_method: string | null + category: string | null + similarity: number +} interface ControlDetailProps { ctrl: CanonicalControl @@ -13,6 +30,7 @@ interface ControlDetailProps { onEdit: () => void onDelete: (controlId: string) => void onReview: (controlId: string, action: string) => void + onRefresh?: () => void // Review mode navigation reviewMode?: boolean reviewIndex?: number @@ -27,12 +45,69 @@ export function ControlDetail({ onEdit, onDelete, onReview, + onRefresh, reviewMode, reviewIndex = 0, reviewTotal = 0, onReviewPrev, onReviewNext, }: ControlDetailProps) { + const [similarControls, setSimilarControls] = useState([]) + const [loadingSimilar, setLoadingSimilar] = useState(false) + const [selectedDuplicates, setSelectedDuplicates] = useState>(new Set()) + const [merging, setMerging] = useState(false) + + useEffect(() => { + loadSimilarControls() + setSelectedDuplicates(new Set()) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [ctrl.control_id]) + + const loadSimilarControls = async () => { + setLoadingSimilar(true) + try { + const res = await fetch(`${BACKEND_URL}?endpoint=similar&id=${ctrl.control_id}`) + if (res.ok) { + setSimilarControls(await res.json()) + } + } catch { /* ignore */ } + finally { setLoadingSimilar(false) } + } + + const toggleDuplicate = (controlId: string) => { + setSelectedDuplicates(prev => { + const next = new Set(prev) + if (next.has(controlId)) next.delete(controlId) + else next.add(controlId) + return next + }) + } + + const handleMergeDuplicates = async () => { + if (selectedDuplicates.size === 0) return + if (!confirm(`${selectedDuplicates.size} Controls als Duplikate markieren und Tags/Anchors in ${ctrl.control_id} zusammenfuehren?`)) return + + setMerging(true) + try { + // For each duplicate: mark as deprecated + for (const dupId of selectedDuplicates) { + await fetch(`${BACKEND_URL}?endpoint=update-control&id=${dupId}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ release_state: 'deprecated' }), + }) + } + // Refresh to show updated state + if (onRefresh) onRefresh() + setSelectedDuplicates(new Set()) + loadSimilarControls() + } catch { + alert('Fehler beim Zusammenfuehren') + } finally { + setMerging(false) + } + } + return (
{/* Header */} @@ -47,6 +122,8 @@ export function ControlDetail({ + +

{ctrl.title}

@@ -229,6 +306,60 @@ export function ControlDetail({ )} + {/* Similar Controls (Dedup) */} +
+
+ +

Aehnliche Controls

+ {loadingSimilar && Laden...} +
+ + {similarControls.length > 0 ? ( + <> +
+ + {ctrl.control_id} — {ctrl.title} + Behalten (Haupt-Control) +
+ +
+ {similarControls.map(sim => ( +
+ toggleDuplicate(sim.control_id)} + className="text-red-600" + /> + {sim.control_id} + {sim.title} + + {(sim.similarity * 100).toFixed(1)}% + + + +
+ ))} +
+ + {selectedDuplicates.size > 0 && ( + + )} + + ) : ( +

+ {loadingSimilar ? 'Suche aehnliche Controls...' : 'Keine aehnlichen Controls gefunden.'} +

+ )} +
+ {/* Review Actions */} {['needs_review', 'too_close', 'duplicate'].includes(ctrl.release_state) && (
diff --git a/admin-compliance/app/sdk/control-library/components/ControlForm.tsx b/admin-compliance/app/sdk/control-library/components/ControlForm.tsx index 5c28a41..95218c5 100644 --- a/admin-compliance/app/sdk/control-library/components/ControlForm.tsx +++ b/admin-compliance/app/sdk/control-library/components/ControlForm.tsx @@ -2,7 +2,7 @@ import { useState } from 'react' import { BookOpen, Trash2, Save, X } from 'lucide-react' -import { EMPTY_CONTROL } from './helpers' +import { EMPTY_CONTROL, VERIFICATION_METHODS, CATEGORY_OPTIONS } from './helpers' export function ControlForm({ initial, @@ -267,6 +267,37 @@ export function ControlForm({ + + {/* Verification Method & Category */} +
+
+ + +

Wie wird dieses Control nachgewiesen?

+
+
+ + +
+
) } diff --git a/admin-compliance/app/sdk/control-library/components/helpers.tsx b/admin-compliance/app/sdk/control-library/components/helpers.tsx index 2eb4a4f..bf765d0 100644 --- a/admin-compliance/app/sdk/control-library/components/helpers.tsx +++ b/admin-compliance/app/sdk/control-library/components/helpers.tsx @@ -42,6 +42,8 @@ export interface CanonicalControl { source_original_text?: string | null source_citation?: Record | null customer_visible?: boolean + verification_method: string | null + category: string | null generation_metadata?: Record | null created_at: string updated_at: string @@ -92,6 +94,8 @@ export const EMPTY_CONTROL = { open_anchors: [{ framework: '', ref: '', url: '' }], release_state: 'draft', tags: [] as string[], + verification_method: null as string | null, + category: null as string | null, } export const DOMAIN_OPTIONS = [ @@ -107,6 +111,33 @@ export const DOMAIN_OPTIONS = [ { value: 'COMP', label: 'COMP — Compliance' }, ] +export const VERIFICATION_METHODS: Record = { + code_review: { bg: 'bg-blue-100 text-blue-700', label: 'Code Review' }, + document: { bg: 'bg-amber-100 text-amber-700', label: 'Dokument' }, + tool: { bg: 'bg-teal-100 text-teal-700', label: 'Tool' }, + hybrid: { bg: 'bg-purple-100 text-purple-700', label: 'Hybrid' }, +} + +export const CATEGORY_OPTIONS = [ + { value: 'encryption', label: 'Verschluesselung & Kryptographie' }, + { value: 'authentication', label: 'Authentisierung & Zugriffskontrolle' }, + { value: 'network', label: 'Netzwerksicherheit' }, + { value: 'data_protection', label: 'Datenschutz & Datensicherheit' }, + { value: 'logging', label: 'Logging & Monitoring' }, + { value: 'incident', label: 'Vorfallmanagement' }, + { value: 'continuity', label: 'Notfall & Wiederherstellung' }, + { value: 'compliance', label: 'Compliance & Audit' }, + { value: 'supply_chain', label: 'Lieferkettenmanagement' }, + { value: 'physical', label: 'Physische Sicherheit' }, + { value: 'personnel', label: 'Personal & Schulung' }, + { value: 'application', label: 'Anwendungssicherheit' }, + { value: 'system', label: 'Systemhaertung & -betrieb' }, + { value: 'risk', label: 'Risikomanagement' }, + { value: 'governance', label: 'Sicherheitsorganisation' }, + { value: 'hardware', label: 'Hardware & Plattformsicherheit' }, + { value: 'identity', label: 'Identitaetsmanagement' }, +] + export const COLLECTION_OPTIONS = [ { value: 'bp_compliance_ce', label: 'CE (OWASP, ENISA, BSI)' }, { value: 'bp_compliance_gesetze', label: 'Gesetze (EU, DE, BSI)' }, @@ -165,6 +196,23 @@ export function LicenseRuleBadge({ rule }: { rule: number | null | undefined }) return {c.label} } +export function VerificationMethodBadge({ method }: { method: string | null }) { + if (!method) return null + const config = VERIFICATION_METHODS[method] + if (!config) return null + return {config.label} +} + +export function CategoryBadge({ category }: { category: string | null }) { + if (!category) return null + const opt = CATEGORY_OPTIONS.find(c => c.value === category) + return ( + + {opt?.label || category} + + ) +} + export function getDomain(controlId: string): string { return controlId.split('-')[0] || '' } diff --git a/admin-compliance/app/sdk/control-library/page.tsx b/admin-compliance/app/sdk/control-library/page.tsx index 0ccf369..2cdd51e 100644 --- a/admin-compliance/app/sdk/control-library/page.tsx +++ b/admin-compliance/app/sdk/control-library/page.tsx @@ -7,7 +7,8 @@ import { } from 'lucide-react' import { CanonicalControl, Framework, BACKEND_URL, EMPTY_CONTROL, - SeverityBadge, StateBadge, LicenseRuleBadge, getDomain, + SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, + getDomain, VERIFICATION_METHODS, CATEGORY_OPTIONS, } from './components/helpers' import { ControlForm } from './components/ControlForm' import { ControlDetail } from './components/ControlDetail' @@ -29,6 +30,8 @@ export default function ControlLibraryPage() { const [severityFilter, setSeverityFilter] = useState('') const [domainFilter, setDomainFilter] = useState('') const [stateFilter, setStateFilter] = useState('') + const [verificationFilter, setVerificationFilter] = useState('') + const [categoryFilter, setCategoryFilter] = useState('') // CRUD state const [mode, setMode] = useState<'list' | 'detail' | 'create' | 'edit'>('list') @@ -75,6 +78,8 @@ export default function ControlLibraryPage() { if (severityFilter && c.severity !== severityFilter) return false if (domainFilter && getDomain(c.control_id) !== domainFilter) return false if (stateFilter && c.release_state !== stateFilter) return false + if (verificationFilter && c.verification_method !== verificationFilter) return false + if (categoryFilter && c.category !== categoryFilter) return false if (searchQuery) { const q = searchQuery.toLowerCase() return ( @@ -86,7 +91,7 @@ export default function ControlLibraryPage() { } return true }) - }, [controls, severityFilter, domainFilter, stateFilter, searchQuery]) + }, [controls, severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, searchQuery]) // Review queue items const reviewItems = useMemo(() => { @@ -257,6 +262,7 @@ export default function ControlLibraryPage() { onEdit={() => setMode('edit')} onDelete={handleDelete} onReview={handleReview} + onRefresh={loadData} reviewMode={reviewMode} reviewIndex={reviewIndex} reviewTotal={reviewItems.length} @@ -387,6 +393,26 @@ export default function ControlLibraryPage() { + + {/* Processing Stats */} @@ -433,6 +459,8 @@ export default function ControlLibraryPage() { + + {ctrl.risk_score !== null && ( Score: {ctrl.risk_score} )} diff --git a/admin-compliance/app/sdk/control-provenance/page.tsx b/admin-compliance/app/sdk/control-provenance/page.tsx index df049c9..49d29fb 100644 --- a/admin-compliance/app/sdk/control-provenance/page.tsx +++ b/admin-compliance/app/sdk/control-provenance/page.tsx @@ -159,6 +159,104 @@ Kein Text, keine Struktur, keine Bezeichner aus diesen Quellen erscheinen im Pro | Formulierung | ❌ Keine Uebernahme | ✅ Darf zitiert werden | | Struktur | ❌ Keine Uebernahme | ✅ Darf verwendet werden | | Produkttext | ❌ Nicht erlaubt | ✅ Erlaubt |`, + }, + { + id: 'verification-methods', + title: 'Verifikationsmethoden', + content: `## Nachweis-Klassifizierung + +Jedes Control wird einer von vier Verifikationsmethoden zugeordnet. Dies bestimmt, +**wie** ein Kunde den Nachweis fuer die Einhaltung erbringen kann: + +| Methode | Beschreibung | Beispiele | +|---------|-------------|-----------| +| **Code Review** | Nachweis durch Quellcode-Inspektion | Input-Validierung, Encryption-Konfiguration, Auth-Logic | +| **Dokument** | Nachweis durch Richtlinien, Prozesse, Schulungen | Notfallplaene, Schulungsnachweise, Datenschutzkonzepte | +| **Tool** | Nachweis durch automatisierte Tools/Scans | SIEM-Logs, Vulnerability-Scans, Monitoring-Dashboards | +| **Hybrid** | Kombination aus mehreren Methoden | Zugriffskontrollen (Code + Policy + Tool) | + +### Bedeutung fuer Kunden + +- **Code Review Controls** koennen direkt im SDK-Scan geprueft werden +- **Dokument Controls** erfordern manuelle Uploads (PDFs, Links) +- **Tool Controls** koennen per API-Integration automatisch nachgewiesen werden +- **Hybrid Controls** benoetigen mehrere Nachweisarten`, + }, + { + id: 'categories', + title: 'Thematische Kategorien', + content: `## 17 Sicherheitskategorien + +Controls sind in thematische Kategorien gruppiert, um Kunden eine +uebersichtliche Navigation zu ermoeglichen: + +| Kategorie | Beschreibung | +|-----------|-------------| +| Verschluesselung & Kryptographie | TLS, Key Management, Algorithmen | +| Authentisierung & Zugriffskontrolle | Login, MFA, RBAC, Session-Management | +| Netzwerksicherheit | Firewall, Segmentierung, VPN, DNS | +| Datenschutz & Datensicherheit | DSGVO, Datenklassifizierung, Anonymisierung | +| Logging & Monitoring | SIEM, Audit-Logs, Alerting | +| Vorfallmanagement | Incident Response, Meldepflichten | +| Notfall & Wiederherstellung | BCM, Disaster Recovery, Backups | +| Compliance & Audit | Zertifizierungen, Audits, Berichtspflichten | +| Lieferkettenmanagement | Vendor Risk, SBOM, Third-Party | +| Physische Sicherheit | Zutritt, Gebaeudesicherheit | +| Personal & Schulung | Security Awareness, Rollenkonzepte | +| Anwendungssicherheit | SAST, DAST, Secure Coding | +| Systemhaertung & -betrieb | Patching, Konfiguration, Hardening | +| Risikomanagement | Risikoanalyse, Bewertung, Massnahmen | +| Sicherheitsorganisation | ISMS, Richtlinien, Governance | +| Hardware & Plattformsicherheit | TPM, Secure Boot, Firmware | +| Identitaetsmanagement | SSO, Federation, Directory | + +### Abgrenzung zu Domains + +Kategorien sind **thematisch**, Domains (AUTH, NET, etc.) sind **strukturell**. +Ein Control AUTH-005 (Domain AUTH) hat die Kategorie "authentication", +aber ein Control NET-012 (Domain NET) koennte ebenfalls die Kategorie +"authentication" haben, wenn es um Netzwerk-Authentifizierung geht.`, + }, + { + id: 'master-library', + title: 'Master Library Strategie', + content: `## RAG-First Ansatz + +Die Canonical Control Library folgt einer **RAG-First-Strategie**: + +### Schritt 1: Rule 1+2 Controls aus RAG generieren + +Prioritaet haben Controls aus Quellen mit **Originaltext-Erlaubnis**: + +| Welle | Quellen | Lizenzregel | Vorteil | +|-------|---------|------------|---------| +| 1 | OWASP (ASVS, MASVS, Top10) | Rule 2 (CC-BY-SA, Zitation) | Originaltext + Zitation | +| 2 | NIST (SP 800-53, CSF, SSDF) | Rule 1 (Public Domain) | Voller Text, keine Einschraenkungen | +| 3 | EU-Verordnungen (DSGVO, AI Act, NIS2, CRA) | Rule 1 (EU Law) | Gesetzestext + Erklaerung | +| 4 | Deutsche Gesetze (BDSG, TTDSG, TKG) | Rule 1 (DE Law) | Gesetzestext + Erklaerung | + +### Schritt 2: Dedup gegen BSI Rule-3 Controls + +Die ~880 BSI Rule-3 Controls werden **gegen** die neuen Rule 1+2 Controls abgeglichen: + +- Wenn ein BSI-Control ein Duplikat eines OWASP/NIST-Controls ist → **OWASP/NIST bevorzugt** + (weil Originaltext + Zitation erlaubt) +- BSI-Duplikate werden als \`deprecated\` markiert +- Tags und Anchors werden in den behaltenen Control zusammengefuehrt + +### Schritt 3: Ergebnis + +Ziel: **~520-600 Master Controls**, davon: +- Viele mit \`source_original_text\` (Originaltext fuer Kunden sichtbar) +- Viele mit \`source_citation\` (Quellenangabe mit Lizenz) +- Klare Nachweismethode (\`verification_method\`) +- Thematische Kategorie (\`category\`) + +### Verstaendliche Texte + +Zusaetzlich zum Originaltext (der oft juristisch/technisch formuliert ist) +enthaelt jedes Control ein eigenstaendig formuliertes **Ziel** (objective) +und eine **Begruendung** (rationale) in verstaendlicher Sprache.`, }, { id: 'validation', diff --git a/backend-compliance/compliance/api/canonical_control_routes.py b/backend-compliance/compliance/api/canonical_control_routes.py index 241e0a4..ff43428 100644 --- a/backend-compliance/compliance/api/canonical_control_routes.py +++ b/backend-compliance/compliance/api/canonical_control_routes.py @@ -10,9 +10,11 @@ Endpoints: GET /v1/canonical/frameworks/{framework_id}/controls — Controls of a framework GET /v1/canonical/controls — All controls (filterable) GET /v1/canonical/controls/{control_id} — Single control + GET /v1/canonical/controls/{control_id}/similar — Find similar controls POST /v1/canonical/controls — Create a control PUT /v1/canonical/controls/{control_id} — Update a control DELETE /v1/canonical/controls/{control_id} — Delete a control + GET /v1/canonical/categories — Category list GET /v1/canonical/sources — Source registry GET /v1/canonical/licenses — License matrix POST /v1/canonical/controls/{control_id}/similarity-check — Too-close check @@ -70,6 +72,13 @@ class ControlResponse(BaseModel): open_anchors: list release_state: str tags: list + license_rule: Optional[int] = None + source_original_text: Optional[str] = None + source_citation: Optional[dict] = None + customer_visible: Optional[bool] = None + verification_method: Optional[str] = None + category: Optional[str] = None + generation_metadata: Optional[dict] = None created_at: str updated_at: str @@ -91,6 +100,13 @@ class ControlCreateRequest(BaseModel): open_anchors: list = [] release_state: str = "draft" tags: list = [] + license_rule: Optional[int] = None + source_original_text: Optional[str] = None + source_citation: Optional[dict] = None + customer_visible: Optional[bool] = True + verification_method: Optional[str] = None + category: Optional[str] = None + generation_metadata: Optional[dict] = None class ControlUpdateRequest(BaseModel): @@ -108,6 +124,13 @@ class ControlUpdateRequest(BaseModel): open_anchors: Optional[list] = None release_state: Optional[str] = None tags: Optional[list] = None + license_rule: Optional[int] = None + source_original_text: Optional[str] = None + source_citation: Optional[dict] = None + customer_visible: Optional[bool] = None + verification_method: Optional[str] = None + category: Optional[str] = None + generation_metadata: Optional[dict] = None class SimilarityCheckRequest(BaseModel): @@ -129,6 +152,16 @@ class SimilarityCheckResponse(BaseModel): # HELPERS # ============================================================================= +_CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale, + scope, requirements, test_procedure, evidence, + severity, risk_score, implementation_effort, + evidence_confidence, open_anchors, release_state, tags, + license_rule, source_original_text, source_citation, + customer_visible, verification_method, category, + generation_metadata, + created_at, updated_at""" + + def _row_to_dict(row, columns: list[str]) -> dict[str, Any]: """Generic row → dict converter.""" return {col: (getattr(row, col).isoformat() if hasattr(getattr(row, col, None), 'isoformat') else getattr(row, col)) for col in columns} @@ -206,6 +239,8 @@ async def list_framework_controls( framework_id: str, severity: Optional[str] = Query(None), release_state: Optional[str] = Query(None), + verification_method: Optional[str] = Query(None), + category: Optional[str] = Query(None), ): """List controls belonging to a framework.""" with SessionLocal() as db: @@ -217,12 +252,8 @@ async def list_framework_controls( if not fw: raise HTTPException(status_code=404, detail="Framework not found") - query = """ - SELECT id, framework_id, control_id, title, objective, rationale, - scope, requirements, test_procedure, evidence, - severity, risk_score, implementation_effort, - evidence_confidence, open_anchors, release_state, tags, - created_at, updated_at + query = f""" + SELECT {_CONTROL_COLS} FROM canonical_controls WHERE framework_id = :fw_id """ @@ -234,6 +265,12 @@ async def list_framework_controls( if release_state: query += " AND release_state = :rs" params["rs"] = release_state + if verification_method: + query += " AND verification_method = :vm" + params["vm"] = verification_method + if category: + query += " AND category = :cat" + params["cat"] = category query += " ORDER BY control_id" rows = db.execute(text(query), params).fetchall() @@ -250,14 +287,12 @@ async def list_controls( severity: Optional[str] = Query(None), domain: Optional[str] = Query(None), release_state: Optional[str] = Query(None), + verification_method: Optional[str] = Query(None), + category: Optional[str] = Query(None), ): """List all canonical controls, with optional filters.""" - query = """ - SELECT id, framework_id, control_id, title, objective, rationale, - scope, requirements, test_procedure, evidence, - severity, risk_score, implementation_effort, - evidence_confidence, open_anchors, release_state, tags, - created_at, updated_at + query = f""" + SELECT {_CONTROL_COLS} FROM canonical_controls WHERE 1=1 """ @@ -272,6 +307,12 @@ async def list_controls( if release_state: query += " AND release_state = :rs" params["rs"] = release_state + if verification_method: + query += " AND verification_method = :vm" + params["vm"] = verification_method + if category: + query += " AND category = :cat" + params["cat"] = category query += " ORDER BY control_id" @@ -286,12 +327,8 @@ async def get_control(control_id: str): """Get a single canonical control by its control_id (e.g. AUTH-001).""" with SessionLocal() as db: row = db.execute( - text(""" - SELECT id, framework_id, control_id, title, objective, rationale, - scope, requirements, test_procedure, evidence, - severity, risk_score, implementation_effort, - evidence_confidence, open_anchors, release_state, tags, - created_at, updated_at + text(f""" + SELECT {_CONTROL_COLS} FROM canonical_controls WHERE control_id = :cid """), @@ -339,23 +376,27 @@ async def create_control(body: ControlCreateRequest): raise HTTPException(status_code=409, detail=f"Control '{body.control_id}' already exists") row = db.execute( - text(""" + text(f""" INSERT INTO canonical_controls ( framework_id, control_id, title, objective, rationale, scope, requirements, test_procedure, evidence, severity, risk_score, implementation_effort, evidence_confidence, - open_anchors, release_state, tags + open_anchors, release_state, tags, + license_rule, source_original_text, source_citation, + customer_visible, verification_method, category, + generation_metadata ) VALUES ( :fw_id, :cid, :title, :objective, :rationale, - :scope::jsonb, :requirements::jsonb, :test_procedure::jsonb, :evidence::jsonb, + CAST(:scope AS jsonb), CAST(:requirements AS jsonb), + CAST(:test_procedure AS jsonb), CAST(:evidence AS jsonb), :severity, :risk_score, :effort, :confidence, - :anchors::jsonb, :release_state, :tags::jsonb + CAST(:anchors AS jsonb), :release_state, CAST(:tags AS jsonb), + :license_rule, :source_original_text, + CAST(:source_citation AS jsonb), + :customer_visible, :verification_method, :category, + CAST(:generation_metadata AS jsonb) ) - RETURNING id, framework_id, control_id, title, objective, rationale, - scope, requirements, test_procedure, evidence, - severity, risk_score, implementation_effort, - evidence_confidence, open_anchors, release_state, tags, - created_at, updated_at + RETURNING {_CONTROL_COLS} """), { "fw_id": str(fw.id), @@ -374,6 +415,13 @@ async def create_control(body: ControlCreateRequest): "anchors": _json.dumps(body.open_anchors), "release_state": body.release_state, "tags": _json.dumps(body.tags), + "license_rule": body.license_rule, + "source_original_text": body.source_original_text, + "source_citation": _json.dumps(body.source_citation) if body.source_citation else None, + "customer_visible": body.customer_visible, + "verification_method": body.verification_method, + "category": body.category, + "generation_metadata": _json.dumps(body.generation_metadata) if body.generation_metadata else None, }, ).fetchone() db.commit() @@ -398,13 +446,13 @@ async def update_control(control_id: str, body: ControlUpdateRequest): # Build dynamic SET clause set_parts = [] params: dict[str, Any] = {"cid": control_id.upper()} - json_fields = {"scope", "requirements", "test_procedure", "evidence", "open_anchors", "tags"} + json_fields = {"scope", "requirements", "test_procedure", "evidence", "open_anchors", "tags", + "source_citation", "generation_metadata"} for key, val in updates.items(): - col = "implementation_effort" if key == "implementation_effort" else key - col = "evidence_confidence" if key == "evidence_confidence" else col + col = key if key in json_fields: - set_parts.append(f"{col} = :{key}::jsonb") + set_parts.append(f"{col} = CAST(:{key} AS jsonb)") params[key] = _json.dumps(val) else: set_parts.append(f"{col} = :{key}") @@ -418,11 +466,7 @@ async def update_control(control_id: str, body: ControlUpdateRequest): UPDATE canonical_controls SET {', '.join(set_parts)} WHERE control_id = :cid - RETURNING id, framework_id, control_id, title, objective, rationale, - scope, requirements, test_procedure, evidence, - severity, risk_score, implementation_effort, - evidence_confidence, open_anchors, release_state, tags, - created_at, updated_at + RETURNING {_CONTROL_COLS} """), params, ).fetchone() @@ -468,6 +512,94 @@ async def similarity_check(control_id: str, body: SimilarityCheckRequest): } +# ============================================================================= +# CATEGORIES +# ============================================================================= + +@router.get("/categories") +async def list_categories(): + """List all canonical control categories.""" + with SessionLocal() as db: + rows = db.execute( + text("SELECT category_id, label_de, label_en, sort_order FROM canonical_control_categories ORDER BY sort_order") + ).fetchall() + + return [ + { + "category_id": r.category_id, + "label_de": r.label_de, + "label_en": r.label_en, + "sort_order": r.sort_order, + } + for r in rows + ] + + +# ============================================================================= +# SIMILAR CONTROLS (Embedding-based dedup) +# ============================================================================= + +@router.get("/controls/{control_id}/similar") +async def find_similar_controls( + control_id: str, + threshold: float = Query(0.85, ge=0.5, le=1.0), + limit: int = Query(20, ge=1, le=100), +): + """Find controls similar to the given one using embedding cosine similarity.""" + with SessionLocal() as db: + # Get the target control's embedding + target = db.execute( + text(""" + SELECT id, control_id, title, objective + FROM canonical_controls + WHERE control_id = :cid + """), + {"cid": control_id.upper()}, + ).fetchone() + + if not target: + raise HTTPException(status_code=404, detail="Control not found") + + # Find similar controls using pg_vector cosine distance if available, + # otherwise fall back to text-based matching via objective similarity + try: + rows = db.execute( + text(""" + SELECT c.control_id, c.title, c.severity, c.release_state, + c.tags, c.license_rule, c.verification_method, c.category, + 1 - (c.embedding <=> t.embedding) AS similarity + FROM canonical_controls c, canonical_controls t + WHERE t.control_id = :cid + AND c.control_id != :cid + AND c.release_state != 'deprecated' + AND c.embedding IS NOT NULL + AND t.embedding IS NOT NULL + AND 1 - (c.embedding <=> t.embedding) >= :threshold + ORDER BY similarity DESC + LIMIT :lim + """), + {"cid": control_id.upper(), "threshold": threshold, "lim": limit}, + ).fetchall() + + return [ + { + "control_id": r.control_id, + "title": r.title, + "severity": r.severity, + "release_state": r.release_state, + "tags": r.tags or [], + "license_rule": r.license_rule, + "verification_method": r.verification_method, + "category": r.category, + "similarity": round(float(r.similarity), 4), + } + for r in rows + ] + except Exception as e: + logger.warning("Embedding similarity query failed (no embedding column?): %s", e) + return [] + + # ============================================================================= # SOURCES & LICENSES # ============================================================================= @@ -509,6 +641,13 @@ def _control_row(r) -> dict: "open_anchors": r.open_anchors, "release_state": r.release_state, "tags": r.tags or [], + "license_rule": r.license_rule, + "source_original_text": r.source_original_text, + "source_citation": r.source_citation, + "customer_visible": r.customer_visible, + "verification_method": r.verification_method, + "category": r.category, + "generation_metadata": r.generation_metadata, "created_at": r.created_at.isoformat() if r.created_at else None, "updated_at": r.updated_at.isoformat() if r.updated_at else None, } diff --git a/backend-compliance/compliance/api/control_generator_routes.py b/backend-compliance/compliance/api/control_generator_routes.py index e76df1f..92280ef 100644 --- a/backend-compliance/compliance/api/control_generator_routes.py +++ b/backend-compliance/compliance/api/control_generator_routes.py @@ -132,7 +132,7 @@ async def get_job_status(job_id: str): db = SessionLocal() try: result = db.execute( - text("SELECT * FROM canonical_generation_jobs WHERE id = :id::uuid"), + text("SELECT * FROM canonical_generation_jobs WHERE id = CAST(:id AS uuid)"), {"id": job_id}, ) row = result.fetchone() diff --git a/backend-compliance/compliance/services/control_generator.py b/backend-compliance/compliance/services/control_generator.py index 9fffc0a..e7ea3b9 100644 --- a/backend-compliance/compliance/services/control_generator.py +++ b/backend-compliance/compliance/services/control_generator.py @@ -725,7 +725,7 @@ Gib JSON zurück mit diesen Feldern: controls_duplicates_found = :duplicates, errors = :errors, completed_at = NOW() - WHERE id = :job_id::uuid + WHERE id = CAST(:job_id AS uuid) """), { "job_id": job_id, @@ -832,7 +832,7 @@ Gib JSON zurück mit diesen Feldern: ) VALUES ( :hash, :collection, :regulation_code, :doc_version, :license, :rule, - :path, :control_ids, :job_id::uuid + :path, :control_ids, CAST(:job_id AS uuid) ) ON CONFLICT (chunk_hash, collection, document_version) DO NOTHING """), diff --git a/backend-compliance/migrations/047_verification_method_category.sql b/backend-compliance/migrations/047_verification_method_category.sql new file mode 100644 index 0000000..1293322 --- /dev/null +++ b/backend-compliance/migrations/047_verification_method_category.sql @@ -0,0 +1,40 @@ +-- Migration 047: Add verification_method and category to canonical_controls +-- verification_method: How a control is verified (code_review, document, tool, hybrid) +-- category: Thematic grouping for customer-facing filters + +ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS + verification_method VARCHAR(20) DEFAULT NULL + CHECK (verification_method IN ('code_review', 'document', 'tool', 'hybrid')); + +ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS + category VARCHAR(50) DEFAULT NULL; + +CREATE INDEX IF NOT EXISTS idx_cc_verification ON canonical_controls(verification_method); +CREATE INDEX IF NOT EXISTS idx_cc_category ON canonical_controls(category); + +CREATE TABLE IF NOT EXISTS canonical_control_categories ( + category_id VARCHAR(50) PRIMARY KEY, + label_de VARCHAR(100) NOT NULL, + label_en VARCHAR(100) NOT NULL, + sort_order INTEGER DEFAULT 0 +); + +INSERT INTO canonical_control_categories VALUES + ('encryption', 'Verschluesselung & Kryptographie', 'Encryption & Cryptography', 1), + ('authentication', 'Authentisierung & Zugriffskontrolle', 'Authentication & Access Control', 2), + ('network', 'Netzwerksicherheit', 'Network Security', 3), + ('data_protection', 'Datenschutz & Datensicherheit', 'Data Protection & Security', 4), + ('logging', 'Logging & Monitoring', 'Logging & Monitoring', 5), + ('incident', 'Vorfallmanagement', 'Incident Management', 6), + ('continuity', 'Notfall & Wiederherstellung', 'Continuity & Recovery', 7), + ('compliance', 'Compliance & Audit', 'Compliance & Audit', 8), + ('supply_chain', 'Lieferkettenmanagement', 'Supply Chain Management', 9), + ('physical', 'Physische Sicherheit', 'Physical Security', 10), + ('personnel', 'Personal & Schulung', 'Personnel & Training', 11), + ('application', 'Anwendungssicherheit', 'Application Security', 12), + ('system', 'Systemhaertung & -betrieb', 'System Hardening & Operations', 13), + ('risk', 'Risikomanagement', 'Risk Management', 14), + ('governance', 'Sicherheitsorganisation', 'Security Governance', 15), + ('hardware', 'Hardware & Plattformsicherheit', 'Hardware & Platform Security', 16), + ('identity', 'Identitaetsmanagement', 'Identity Management', 17) +ON CONFLICT DO NOTHING;