feat(control-generator): 7-stage pipeline for RAG→LLM→Controls generation
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 45s
CI/CD / test-python-document-crawler (push) Has been cancelled
CI/CD / test-python-dsms-gateway (push) Has been cancelled
CI/CD / validate-canonical-controls (push) Has been cancelled
CI/CD / deploy-hetzner (push) Has been cancelled
CI/CD / test-python-backend-compliance (push) Has been cancelled

Implements the Control Generator Pipeline that systematically generates
canonical security controls from 150k+ RAG chunks across all compliance
collections (BSI, NIST, OWASP, ENISA, EU laws, German laws).

Three license rules enforced throughout:
- Rule 1 (free_use): Laws/Public Domain — original text preserved
- Rule 2 (citation_required): CC-BY/CC-BY-SA — text with citation
- Rule 3 (restricted): BSI/ISO — full reformulation, no source traces

New files:
- Migration 046: job tracking, chunk tracking, blocked sources tables
- control_generator.py: 7-stage pipeline (scan→classify→structure/reform→harmonize→anchor→store→mark)
- anchor_finder.py: RAG + DuckDuckGo open-source reference search
- control_generator_routes.py: REST API (generate, review, stats, blocked-sources)
- test_control_generator.py: license mapping, rule enforcement, anchor filtering tests

Modified:
- __init__.py: register control_generator_router
- route.ts: proxy generator/review/stats endpoints
- page.tsx: Generator modal, stats panel, state filter, review queue, license badges

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-13 09:03:37 +01:00
parent c87f07c99a
commit de19ef0684
8 changed files with 2404 additions and 9 deletions

View File

@@ -52,6 +52,45 @@ export async function GET(request: NextRequest) {
backendPath = '/api/compliance/v1/canonical/licenses'
break
// Generator endpoints
case 'generate-jobs':
backendPath = '/api/compliance/v1/canonical/generate/jobs'
break
case 'generate-status': {
const jobId = searchParams.get('jobId')
if (!jobId) {
return NextResponse.json({ error: 'Missing jobId' }, { status: 400 })
}
backendPath = `/api/compliance/v1/canonical/generate/status/${encodeURIComponent(jobId)}`
break
}
case 'review-queue': {
const state = searchParams.get('release_state') || 'needs_review'
backendPath = `/api/compliance/v1/canonical/generate/review-queue?release_state=${encodeURIComponent(state)}`
break
}
case 'processed-stats':
backendPath = '/api/compliance/v1/canonical/generate/processed-stats'
break
case 'blocked-sources':
backendPath = '/api/compliance/v1/canonical/blocked-sources'
break
case 'controls-customer': {
const custSeverity = searchParams.get('severity')
const custDomain = searchParams.get('domain')
const custParams = new URLSearchParams()
if (custSeverity) custParams.set('severity', custSeverity)
if (custDomain) custParams.set('domain', custDomain)
const custQs = custParams.toString()
backendPath = `/api/compliance/v1/canonical/controls-customer${custQs ? `?${custQs}` : ''}`
break
}
default:
return NextResponse.json({ error: `Unknown endpoint: ${endpoint}` }, { status: 400 })
}
@@ -95,6 +134,16 @@ export async function POST(request: NextRequest) {
if (endpoint === 'create-control') {
backendPath = '/api/compliance/v1/canonical/controls'
} else if (endpoint === 'generate') {
backendPath = '/api/compliance/v1/canonical/generate'
} else if (endpoint === 'review') {
const controlId = searchParams.get('id')
if (!controlId) {
return NextResponse.json({ error: 'Missing control id' }, { status: 400 })
}
backendPath = `/api/compliance/v1/canonical/generate/review/${encodeURIComponent(controlId)}`
} else if (endpoint === 'blocked-sources-cleanup') {
backendPath = '/api/compliance/v1/canonical/blocked-sources/cleanup'
} else if (endpoint === 'similarity-check') {
const controlId = searchParams.get('id')
if (!controlId) {

View File

@@ -5,6 +5,7 @@ import {
Shield, Search, ChevronRight, ArrowLeft, ExternalLink,
Filter, AlertTriangle, CheckCircle2, Info, Lock,
FileText, BookOpen, Scale, Plus, Pencil, Trash2, Save, X,
Zap, BarChart3, Eye, RefreshCw, Clock,
} from 'lucide-react'
// =============================================================================
@@ -44,6 +45,11 @@ interface CanonicalControl {
open_anchors: OpenAnchor[]
release_state: string
tags: string[]
license_rule?: number | null
source_original_text?: string | null
source_citation?: Record<string, string> | null
customer_visible?: boolean
generation_metadata?: Record<string, unknown> | null
created_at: string
updated_at: string
}
@@ -116,14 +122,34 @@ function StateBadge({ state }: { state: string }) {
review: 'bg-blue-100 text-blue-700',
approved: 'bg-green-100 text-green-700',
deprecated: 'bg-red-100 text-red-600',
needs_review: 'bg-yellow-100 text-yellow-800',
too_close: 'bg-red-100 text-red-700',
duplicate: 'bg-orange-100 text-orange-700',
}
const labels: Record<string, string> = {
needs_review: 'Review noetig',
too_close: 'Zu aehnlich',
duplicate: 'Duplikat',
}
return (
<span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${config[state] || config.draft}`}>
{state}
{labels[state] || state}
</span>
)
}
function LicenseRuleBadge({ rule }: { rule: number | null | undefined }) {
if (!rule) return null
const config: Record<number, { bg: string; label: string }> = {
1: { bg: 'bg-green-100 text-green-700', label: 'Free Use' },
2: { bg: 'bg-blue-100 text-blue-700', label: 'Zitation' },
3: { bg: 'bg-amber-100 text-amber-700', label: 'Reformuliert' },
}
const c = config[rule]
if (!c) return null
return <span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${c.bg}`}>{c.label}</span>
}
function getDomain(controlId: string): string {
return controlId.split('-')[0] || ''
}
@@ -419,6 +445,17 @@ export default function ControlLibraryPage() {
const [mode, setMode] = useState<'list' | 'detail' | 'create' | 'edit'>('list')
const [saving, setSaving] = useState(false)
// Generator state
const [showGenerator, setShowGenerator] = useState(false)
const [generating, setGenerating] = useState(false)
const [genResult, setGenResult] = useState<Record<string, unknown> | null>(null)
const [genDomain, setGenDomain] = useState('')
const [genMaxControls, setGenMaxControls] = useState(10)
const [genDryRun, setGenDryRun] = useState(true)
const [stateFilter, setStateFilter] = useState<string>('')
const [processedStats, setProcessedStats] = useState<Array<Record<string, unknown>>>([])
const [showStats, setShowStats] = useState(false)
// Load data
const loadData = useCallback(async () => {
try {
@@ -450,6 +487,7 @@ export default function ControlLibraryPage() {
return controls.filter(c => {
if (severityFilter && c.severity !== severityFilter) return false
if (domainFilter && getDomain(c.control_id) !== domainFilter) return false
if (stateFilter && c.release_state !== stateFilter) return false
if (searchQuery) {
const q = searchQuery.toLowerCase()
return (
@@ -461,7 +499,7 @@ export default function ControlLibraryPage() {
}
return true
})
}, [controls, severityFilter, domainFilter, searchQuery])
}, [controls, severityFilter, domainFilter, stateFilter, searchQuery])
// CRUD handlers
const handleCreate = async (data: typeof EMPTY_CONTROL) => {
@@ -526,6 +564,63 @@ export default function ControlLibraryPage() {
}
}
// Generator handlers
const handleGenerate = async () => {
setGenerating(true)
setGenResult(null)
try {
const res = await fetch(`${BACKEND_URL}?endpoint=generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
domain: genDomain || null,
max_controls: genMaxControls,
dry_run: genDryRun,
skip_web_search: false,
}),
})
if (!res.ok) {
const err = await res.json()
setGenResult({ status: 'error', message: err.error || err.details || 'Fehler' })
return
}
const data = await res.json()
setGenResult(data)
if (!genDryRun) {
await loadData()
}
} catch {
setGenResult({ status: 'error', message: 'Netzwerkfehler' })
} finally {
setGenerating(false)
}
}
const loadProcessedStats = async () => {
try {
const res = await fetch(`${BACKEND_URL}?endpoint=processed-stats`)
if (res.ok) {
const data = await res.json()
setProcessedStats(data.stats || [])
}
} catch { /* ignore */ }
}
const handleReview = async (controlId: string, action: string) => {
try {
const res = await fetch(`${BACKEND_URL}?endpoint=review&id=${controlId}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action }),
})
if (res.ok) {
await loadData()
setSelectedControl(null)
setMode('list')
}
} catch { /* ignore */ }
}
if (loading) {
return (
<div className="flex items-center justify-center h-96">
@@ -748,6 +843,98 @@ export default function ControlLibraryPage() {
</div>
</section>
)}
{/* License & Citation Info */}
{ctrl.license_rule && (
<section className="bg-blue-50 border border-blue-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-2">
<Scale className="w-4 h-4 text-blue-700" />
<h3 className="text-sm font-semibold text-blue-900">Lizenzinformationen</h3>
<LicenseRuleBadge rule={ctrl.license_rule} />
</div>
{ctrl.source_citation && (
<div className="text-xs text-blue-800 space-y-1">
<p><span className="font-medium">Quelle:</span> {ctrl.source_citation.source}</p>
{ctrl.source_citation.license && <p><span className="font-medium">Lizenz:</span> {ctrl.source_citation.license}</p>}
{ctrl.source_citation.license_notice && <p><span className="font-medium">Hinweis:</span> {ctrl.source_citation.license_notice}</p>}
{ctrl.source_citation.url && (
<a href={ctrl.source_citation.url} target="_blank" rel="noopener noreferrer" className="flex items-center gap-1 text-blue-600 hover:text-blue-800">
<ExternalLink className="w-3 h-3" /> Originalquelle
</a>
)}
</div>
)}
{ctrl.source_original_text && (
<details className="mt-2">
<summary className="text-xs text-blue-600 cursor-pointer hover:text-blue-800">Originaltext anzeigen</summary>
<p className="mt-1 text-xs text-gray-700 bg-white rounded p-2 border border-blue-100 max-h-40 overflow-y-auto">{ctrl.source_original_text}</p>
</details>
)}
{ctrl.license_rule === 3 && (
<p className="text-xs text-amber-700 mt-2 flex items-center gap-1">
<Lock className="w-3 h-3" />
Eigenstaendig formuliert keine Originalquelle gespeichert
</p>
)}
</section>
)}
{/* Generation Metadata (internal) */}
{ctrl.generation_metadata && (
<section className="bg-gray-50 border border-gray-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-2">
<Clock className="w-4 h-4 text-gray-500" />
<h3 className="text-sm font-semibold text-gray-700">Generierungsdetails (intern)</h3>
</div>
<div className="text-xs text-gray-600 space-y-1">
<p>Pfad: {String(ctrl.generation_metadata.processing_path || '-')}</p>
{ctrl.generation_metadata.similarity_status && (
<p className="text-red-600">Similarity: {String(ctrl.generation_metadata.similarity_status)}</p>
)}
{Array.isArray(ctrl.generation_metadata.similar_controls) && (
<div>
<p className="font-medium">Aehnliche Controls:</p>
{(ctrl.generation_metadata.similar_controls as Array<Record<string, unknown>>).map((s, i) => (
<p key={i} className="ml-2">{String(s.control_id)} {String(s.title)} ({String(s.similarity)})</p>
))}
</div>
)}
</div>
</section>
)}
{/* Review Actions */}
{['needs_review', 'too_close', 'duplicate'].includes(ctrl.release_state) && (
<section className="bg-yellow-50 border border-yellow-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<Eye className="w-4 h-4 text-yellow-700" />
<h3 className="text-sm font-semibold text-yellow-900">Review erforderlich</h3>
</div>
<div className="flex items-center gap-2">
<button
onClick={() => handleReview(ctrl.control_id, 'approve')}
className="px-3 py-1.5 text-sm text-white bg-green-600 rounded-lg hover:bg-green-700"
>
<CheckCircle2 className="w-3.5 h-3.5 inline mr-1" />
Akzeptieren
</button>
<button
onClick={() => handleReview(ctrl.control_id, 'reject')}
className="px-3 py-1.5 text-sm text-white bg-red-600 rounded-lg hover:bg-red-700"
>
<Trash2 className="w-3.5 h-3.5 inline mr-1" />
Ablehnen
</button>
<button
onClick={() => setMode('edit')}
className="px-3 py-1.5 text-sm text-gray-600 border border-gray-300 rounded-lg hover:bg-gray-50"
>
<Pencil className="w-3.5 h-3.5 inline mr-1" />
Ueberarbeiten
</button>
</div>
</section>
)}
</div>
</div>
)
@@ -772,13 +959,29 @@ export default function ControlLibraryPage() {
</p>
</div>
</div>
<button
onClick={() => setMode('create')}
className="flex items-center gap-1.5 px-3 py-2 text-sm text-white bg-purple-600 rounded-lg hover:bg-purple-700"
>
<Plus className="w-4 h-4" />
Neues Control
</button>
<div className="flex items-center gap-2">
<button
onClick={() => { setShowStats(!showStats); if (!showStats) loadProcessedStats() }}
className="flex items-center gap-1.5 px-3 py-2 text-sm text-gray-600 border border-gray-300 rounded-lg hover:bg-gray-50"
>
<BarChart3 className="w-4 h-4" />
Stats
</button>
<button
onClick={() => setShowGenerator(true)}
className="flex items-center gap-1.5 px-3 py-2 text-sm text-white bg-amber-600 rounded-lg hover:bg-amber-700"
>
<Zap className="w-4 h-4" />
Generator
</button>
<button
onClick={() => setMode('create')}
className="flex items-center gap-1.5 px-3 py-2 text-sm text-white bg-purple-600 rounded-lg hover:bg-purple-700"
>
<Plus className="w-4 h-4" />
Neues Control
</button>
</div>
</div>
{/* Frameworks */}
@@ -829,9 +1032,131 @@ export default function ControlLibraryPage() {
<option key={d} value={d}>{d}</option>
))}
</select>
<select
value={stateFilter}
onChange={e => setStateFilter(e.target.value)}
className="text-sm border border-gray-300 rounded-lg px-3 py-2 focus:outline-none focus:ring-2 focus:ring-purple-500"
>
<option value="">Alle Status</option>
<option value="draft">Draft</option>
<option value="approved">Approved</option>
<option value="needs_review">Review noetig</option>
<option value="too_close">Zu aehnlich</option>
<option value="duplicate">Duplikat</option>
</select>
</div>
{/* Processing Stats */}
{showStats && processedStats.length > 0 && (
<div className="mt-3 p-3 bg-gray-50 rounded-lg">
<h4 className="text-xs font-semibold text-gray-700 mb-2">Verarbeitungsfortschritt</h4>
<div className="grid grid-cols-3 gap-3">
{processedStats.map((s, i) => (
<div key={i} className="text-xs">
<span className="font-medium text-gray-700">{String(s.collection)}</span>
<div className="flex gap-2 mt-1 text-gray-500">
<span>{String(s.processed_chunks)} verarbeitet</span>
<span>{String(s.direct_adopted)} direkt</span>
<span>{String(s.llm_reformed)} reformuliert</span>
</div>
</div>
))}
</div>
</div>
)}
</div>
{/* Generator Modal */}
{showGenerator && (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/40">
<div className="bg-white rounded-xl shadow-xl w-full max-w-lg p-6 mx-4">
<div className="flex items-center justify-between mb-4">
<div className="flex items-center gap-2">
<Zap className="w-5 h-5 text-amber-600" />
<h2 className="text-lg font-semibold text-gray-900">Control Generator</h2>
</div>
<button onClick={() => { setShowGenerator(false); setGenResult(null) }} className="text-gray-400 hover:text-gray-600">
<X className="w-5 h-5" />
</button>
</div>
<div className="space-y-4">
<div>
<label className="block text-xs font-medium text-gray-600 mb-1">Domain (optional)</label>
<select value={genDomain} onChange={e => setGenDomain(e.target.value)} className="w-full px-3 py-2 text-sm border border-gray-300 rounded-lg">
<option value="">Alle Domains</option>
<option value="AUTH">AUTH Authentifizierung</option>
<option value="CRYPT">CRYPT Kryptographie</option>
<option value="NET">NET Netzwerk</option>
<option value="DATA">DATA Datenschutz</option>
<option value="LOG">LOG Logging</option>
<option value="ACC">ACC Zugriffskontrolle</option>
<option value="SEC">SEC Sicherheit</option>
<option value="INC">INC Incident Response</option>
<option value="AI">AI Kuenstliche Intelligenz</option>
<option value="COMP">COMP Compliance</option>
</select>
</div>
<div>
<label className="block text-xs font-medium text-gray-600 mb-1">Max. Controls: {genMaxControls}</label>
<input
type="range" min="1" max="100" step="1"
value={genMaxControls}
onChange={e => setGenMaxControls(parseInt(e.target.value))}
className="w-full"
/>
</div>
<div className="flex items-center gap-2">
<input
type="checkbox"
id="dryRun"
checked={genDryRun}
onChange={e => setGenDryRun(e.target.checked)}
className="rounded border-gray-300"
/>
<label htmlFor="dryRun" className="text-sm text-gray-700">Dry Run (Vorschau ohne Speicherung)</label>
</div>
<button
onClick={handleGenerate}
disabled={generating}
className="w-full py-2 text-sm text-white bg-amber-600 rounded-lg hover:bg-amber-700 disabled:opacity-50 flex items-center justify-center gap-2"
>
{generating ? (
<><RefreshCw className="w-4 h-4 animate-spin" /> Generiere...</>
) : (
<><Zap className="w-4 h-4" /> Generierung starten</>
)}
</button>
{/* Results */}
{genResult && (
<div className={`p-4 rounded-lg text-sm ${genResult.status === 'error' ? 'bg-red-50 text-red-800' : 'bg-green-50 text-green-800'}`}>
<p className="font-medium mb-1">{String(genResult.message || genResult.status)}</p>
{genResult.status !== 'error' && (
<div className="grid grid-cols-2 gap-1 text-xs mt-2">
<span>Chunks gescannt: {String(genResult.total_chunks_scanned)}</span>
<span>Controls generiert: {String(genResult.controls_generated)}</span>
<span>Verifiziert: {String(genResult.controls_verified)}</span>
<span>Review noetig: {String(genResult.controls_needs_review)}</span>
<span>Zu aehnlich: {String(genResult.controls_too_close)}</span>
<span>Duplikate: {String(genResult.controls_duplicates_found)}</span>
</div>
)}
{Array.isArray(genResult.errors) && (genResult.errors as string[]).length > 0 && (
<div className="mt-2 text-xs text-red-600">
{(genResult.errors as string[]).slice(0, 3).map((e, i) => <p key={i}>{e}</p>)}
</div>
)}
</div>
)}
</div>
</div>
</div>
)}
{/* Control List */}
<div className="flex-1 overflow-y-auto p-6">
<div className="space-y-3">
@@ -847,6 +1172,7 @@ export default function ControlLibraryPage() {
<span className="text-xs font-mono text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded">{ctrl.control_id}</span>
<SeverityBadge severity={ctrl.severity} />
<StateBadge state={ctrl.release_state} />
<LicenseRuleBadge rule={ctrl.license_rule} />
{ctrl.risk_score !== null && (
<span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span>
)}