feat(control-library): document-grouped batching, generation strategy tracking, sort by source
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 31s
CI/CD / test-python-backend-compliance (push) Successful in 31s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Successful in 2s

- Group chunks by regulation_code before batching for better LLM context
- Add generation_strategy column (ungrouped=v1, document_grouped=v2)
- Add v1/v2 badge to control cards in frontend
- Add sort-by-source option with visual group headers
- Add frontend page tests (18 tests)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-15 15:10:52 +01:00
parent 0d95c3bb44
commit c8fd9cc780
9 changed files with 1000 additions and 137 deletions

View File

@@ -25,22 +25,35 @@ export async function GET(request: NextRequest) {
break break
case 'controls': { case 'controls': {
const severity = searchParams.get('severity') const controlParams = new URLSearchParams()
const domain = searchParams.get('domain') const passthrough = ['severity', 'domain', 'verification_method', 'category',
const verificationMethod = searchParams.get('verification_method') 'target_audience', 'source', 'search', 'sort', 'order', 'limit', 'offset']
const categoryFilter = searchParams.get('category') for (const key of passthrough) {
const targetAudience = searchParams.get('target_audience') const val = searchParams.get(key)
const params = new URLSearchParams() if (val) controlParams.set(key, val)
if (severity) params.set('severity', severity) }
if (domain) params.set('domain', domain) const qs = controlParams.toString()
if (verificationMethod) params.set('verification_method', verificationMethod)
if (categoryFilter) params.set('category', categoryFilter)
if (targetAudience) params.set('target_audience', targetAudience)
const qs = params.toString()
backendPath = `/api/compliance/v1/canonical/controls${qs ? `?${qs}` : ''}` backendPath = `/api/compliance/v1/canonical/controls${qs ? `?${qs}` : ''}`
break break
} }
case 'controls-count': {
const countParams = new URLSearchParams()
const countPassthrough = ['severity', 'domain', 'verification_method', 'category',
'target_audience', 'source', 'search']
for (const key of countPassthrough) {
const val = searchParams.get(key)
if (val) countParams.set(key, val)
}
const countQs = countParams.toString()
backendPath = `/api/compliance/v1/canonical/controls-count${countQs ? `?${countQs}` : ''}`
break
}
case 'controls-meta':
backendPath = '/api/compliance/v1/canonical/controls-meta'
break
case 'control': { case 'control': {
const controlId = searchParams.get('id') const controlId = searchParams.get('id')
if (!controlId) { if (!controlId) {

View File

@@ -0,0 +1,322 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen, waitFor, fireEvent, act } from '@testing-library/react'
import ControlLibraryPage from '../page'
// ============================================================================
// Mock data
// ============================================================================
const MOCK_FRAMEWORK = {
id: 'fw-1',
framework_id: 'bp_security_v1',
name: 'BreakPilot Security',
version: '1.0',
description: 'Test framework',
release_state: 'draft',
}
const MOCK_CONTROL = {
id: 'ctrl-1',
framework_id: 'fw-1',
control_id: 'AUTH-001',
title: 'Multi-Factor Authentication',
objective: 'Require MFA for all admin accounts.',
rationale: 'Passwords alone are insufficient.',
scope: {},
requirements: ['MFA for admin'],
test_procedure: ['Test admin login'],
evidence: [{ type: 'config', description: 'MFA enabled' }],
severity: 'high',
risk_score: 4.0,
implementation_effort: 'm',
evidence_confidence: null,
open_anchors: [{ framework: 'OWASP', ref: 'V2.8', url: 'https://owasp.org' }],
release_state: 'draft',
tags: ['mfa'],
license_rule: 1,
source_original_text: null,
source_citation: { source: 'DSGVO' },
customer_visible: true,
verification_method: 'automated',
category: 'authentication',
target_audience: 'developer',
generation_metadata: null,
generation_strategy: 'ungrouped',
created_at: '2026-03-15T10:00:00+00:00',
updated_at: '2026-03-15T10:00:00+00:00',
}
const MOCK_META = {
total: 1,
domains: [{ domain: 'AUTH', count: 1 }],
sources: [{ source: 'DSGVO', count: 1 }],
no_source_count: 0,
}
// ============================================================================
// Fetch mock
// ============================================================================
function createFetchMock(overrides?: Record<string, unknown>) {
const responses: Record<string, unknown> = {
frameworks: [MOCK_FRAMEWORK],
controls: [MOCK_CONTROL],
'controls-count': { total: 1 },
'controls-meta': MOCK_META,
...overrides,
}
return vi.fn((url: string) => {
const urlStr = typeof url === 'string' ? url : ''
// Match endpoint param
const match = urlStr.match(/endpoint=([^&]+)/)
const endpoint = match?.[1] || ''
const data = responses[endpoint] ?? []
return Promise.resolve({
ok: true,
status: 200,
json: () => Promise.resolve(data),
})
})
}
// ============================================================================
// Tests
// ============================================================================
describe('ControlLibraryPage', () => {
let fetchMock: ReturnType<typeof createFetchMock>
beforeEach(() => {
fetchMock = createFetchMock()
global.fetch = fetchMock as unknown as typeof fetch
})
afterEach(() => {
vi.restoreAllMocks()
})
it('renders the page header', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('Canonical Control Library')).toBeInTheDocument()
})
})
it('shows control count from meta', async () => {
fetchMock = createFetchMock({ 'controls-meta': { ...MOCK_META, total: 42 } })
global.fetch = fetchMock as unknown as typeof fetch
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText(/42 Security Controls/)).toBeInTheDocument()
})
})
it('renders control list with data', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('AUTH-001')).toBeInTheDocument()
expect(screen.getByText('Multi-Factor Authentication')).toBeInTheDocument()
})
})
it('shows timestamp on control cards', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
// The date should be rendered in German locale format
expect(screen.getByText(/15\.03\.26/)).toBeInTheDocument()
})
})
it('shows source citation on control cards', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('DSGVO')).toBeInTheDocument()
})
})
it('fetches with limit and offset params', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(fetchMock).toHaveBeenCalled()
})
// Find the controls fetch call
const controlsCalls = fetchMock.mock.calls.filter(
(call: unknown[]) => typeof call[0] === 'string' && (call[0] as string).includes('endpoint=controls&')
)
expect(controlsCalls.length).toBeGreaterThan(0)
const url = controlsCalls[0][0] as string
expect(url).toContain('limit=50')
expect(url).toContain('offset=0')
})
it('fetches controls-count alongside controls', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
const countCalls = fetchMock.mock.calls.filter(
(call: unknown[]) => typeof call[0] === 'string' && (call[0] as string).includes('endpoint=controls-count')
)
expect(countCalls.length).toBeGreaterThan(0)
})
})
it('fetches controls-meta on mount', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
const metaCalls = fetchMock.mock.calls.filter(
(call: unknown[]) => typeof call[0] === 'string' && (call[0] as string).includes('endpoint=controls-meta')
)
expect(metaCalls.length).toBeGreaterThan(0)
})
})
it('renders domain dropdown from meta', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('AUTH (1)')).toBeInTheDocument()
})
})
it('renders source dropdown from meta', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
// The source option should appear in the dropdown
expect(screen.getByText('DSGVO (1)')).toBeInTheDocument()
})
})
it('has sort dropdown with all sort options', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('Sortierung: ID')).toBeInTheDocument()
expect(screen.getByText('Nach Quelle')).toBeInTheDocument()
expect(screen.getByText('Neueste zuerst')).toBeInTheDocument()
expect(screen.getByText('Aelteste zuerst')).toBeInTheDocument()
})
})
it('sends sort params when sorting by newest', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('AUTH-001')).toBeInTheDocument()
})
// Clear previous calls
fetchMock.mockClear()
// Change sort to newest
const sortSelect = screen.getByDisplayValue('Sortierung: ID')
await act(async () => {
fireEvent.change(sortSelect, { target: { value: 'newest' } })
})
await waitFor(() => {
const controlsCalls = fetchMock.mock.calls.filter(
(call: unknown[]) => typeof call[0] === 'string' && (call[0] as string).includes('endpoint=controls&')
)
expect(controlsCalls.length).toBeGreaterThan(0)
const url = controlsCalls[0][0] as string
expect(url).toContain('sort=created_at')
expect(url).toContain('order=desc')
})
})
it('sends search param after debounce', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText('AUTH-001')).toBeInTheDocument()
})
fetchMock.mockClear()
const searchInput = screen.getByPlaceholderText(/Controls durchsuchen/)
await act(async () => {
fireEvent.change(searchInput, { target: { value: 'encryption' } })
})
// Wait for debounce (400ms)
await waitFor(
() => {
const controlsCalls = fetchMock.mock.calls.filter(
(call: unknown[]) => typeof call[0] === 'string' && (call[0] as string).includes('search=encryption')
)
expect(controlsCalls.length).toBeGreaterThan(0)
},
{ timeout: 1000 }
)
})
it('shows empty state when no controls', async () => {
fetchMock = createFetchMock({
controls: [],
'controls-count': { total: 0 },
'controls-meta': { ...MOCK_META, total: 0 },
})
global.fetch = fetchMock as unknown as typeof fetch
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText(/Noch keine Controls/)).toBeInTheDocument()
})
})
it('shows "Keine Controls gefunden" when filter matches nothing', async () => {
fetchMock = createFetchMock({
controls: [],
'controls-count': { total: 0 },
'controls-meta': { ...MOCK_META, total: 50 },
})
global.fetch = fetchMock as unknown as typeof fetch
render(<ControlLibraryPage />)
// Wait for initial load to finish
await waitFor(() => {
expect(screen.getByPlaceholderText(/Controls durchsuchen/)).toBeInTheDocument()
})
// Trigger a search to have a filter active
const searchInput = screen.getByPlaceholderText(/Controls durchsuchen/)
await act(async () => {
fireEvent.change(searchInput, { target: { value: 'zzzzzzz' } })
})
await waitFor(
() => {
expect(screen.getByText('Keine Controls gefunden.')).toBeInTheDocument()
},
{ timeout: 1000 }
)
})
it('has a refresh button', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByTitle('Aktualisieren')).toBeInTheDocument()
})
})
it('renders pagination info', async () => {
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText(/Seite 1 von 1/)).toBeInTheDocument()
})
})
it('shows pagination buttons for many controls', async () => {
fetchMock = createFetchMock({
'controls-count': { total: 150 },
'controls-meta': { ...MOCK_META, total: 150 },
})
global.fetch = fetchMock as unknown as typeof fetch
render(<ControlLibraryPage />)
await waitFor(() => {
expect(screen.getByText(/Seite 1 von 3/)).toBeInTheDocument()
})
})
})

View File

@@ -46,6 +46,7 @@ export interface CanonicalControl {
category: string | null category: string | null
target_audience: string | null target_audience: string | null
generation_metadata?: Record<string, unknown> | null generation_metadata?: Record<string, unknown> | null
generation_strategy?: string | null
created_at: string created_at: string
updated_at: string updated_at: string
} }
@@ -229,6 +230,16 @@ export function TargetAudienceBadge({ audience }: { audience: string | null }) {
return <span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${config.bg}`}>{config.label}</span> return <span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${config.bg}`}>{config.label}</span>
} }
export function GenerationStrategyBadge({ strategy }: { strategy: string | null | undefined }) {
if (!strategy || strategy === 'ungrouped') {
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">v1</span>
}
if (strategy === 'document_grouped') {
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-emerald-100 text-emerald-700">v2</span>
}
return null
}
export function getDomain(controlId: string): string { export function getDomain(controlId: string): string {
return controlId.split('-')[0] || '' return controlId.split('-')[0] || ''
} }

View File

@@ -1,33 +1,50 @@
'use client' 'use client'
import { useState, useEffect, useMemo, useCallback } from 'react' import { useState, useEffect, useCallback, useRef } from 'react'
import { import {
Shield, Search, ChevronRight, ChevronLeft, Filter, Lock, Shield, Search, ChevronRight, ChevronLeft, Filter, Lock,
BookOpen, Plus, Zap, BarChart3, ListChecks, BookOpen, Plus, Zap, BarChart3, ListChecks,
ChevronsLeft, ChevronsRight, ChevronsLeft, ChevronsRight, ArrowUpDown, Clock, RefreshCw,
} from 'lucide-react' } from 'lucide-react'
import { import {
CanonicalControl, Framework, BACKEND_URL, EMPTY_CONTROL, CanonicalControl, Framework, BACKEND_URL, EMPTY_CONTROL,
SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, TargetAudienceBadge, SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, TargetAudienceBadge,
getDomain, VERIFICATION_METHODS, CATEGORY_OPTIONS, TARGET_AUDIENCE_OPTIONS, GenerationStrategyBadge,
VERIFICATION_METHODS, CATEGORY_OPTIONS, TARGET_AUDIENCE_OPTIONS,
} from './components/helpers' } from './components/helpers'
import { ControlForm } from './components/ControlForm' import { ControlForm } from './components/ControlForm'
import { ControlDetail } from './components/ControlDetail' import { ControlDetail } from './components/ControlDetail'
import { GeneratorModal } from './components/GeneratorModal' import { GeneratorModal } from './components/GeneratorModal'
// ============================================================================= // =============================================================================
// CONTROL LIBRARY PAGE // TYPES
// ============================================================================= // =============================================================================
interface ControlsMeta {
total: number
domains: Array<{ domain: string; count: number }>
sources: Array<{ source: string; count: number }>
no_source_count: number
}
// =============================================================================
// CONTROL LIBRARY PAGE — Server-Side Pagination
// =============================================================================
const PAGE_SIZE = 50
export default function ControlLibraryPage() { export default function ControlLibraryPage() {
const [frameworks, setFrameworks] = useState<Framework[]>([]) const [frameworks, setFrameworks] = useState<Framework[]>([])
const [controls, setControls] = useState<CanonicalControl[]>([]) const [controls, setControls] = useState<CanonicalControl[]>([])
const [totalCount, setTotalCount] = useState(0)
const [meta, setMeta] = useState<ControlsMeta | null>(null)
const [selectedControl, setSelectedControl] = useState<CanonicalControl | null>(null) const [selectedControl, setSelectedControl] = useState<CanonicalControl | null>(null)
const [loading, setLoading] = useState(true) const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
// Filters // Filters
const [searchQuery, setSearchQuery] = useState('') const [searchQuery, setSearchQuery] = useState('')
const [debouncedSearch, setDebouncedSearch] = useState('')
const [severityFilter, setSeverityFilter] = useState<string>('') const [severityFilter, setSeverityFilter] = useState<string>('')
const [domainFilter, setDomainFilter] = useState<string>('') const [domainFilter, setDomainFilter] = useState<string>('')
const [stateFilter, setStateFilter] = useState<string>('') const [stateFilter, setStateFilter] = useState<string>('')
@@ -35,6 +52,7 @@ export default function ControlLibraryPage() {
const [categoryFilter, setCategoryFilter] = useState<string>('') const [categoryFilter, setCategoryFilter] = useState<string>('')
const [audienceFilter, setAudienceFilter] = useState<string>('') const [audienceFilter, setAudienceFilter] = useState<string>('')
const [sourceFilter, setSourceFilter] = useState<string>('') const [sourceFilter, setSourceFilter] = useState<string>('')
const [sortBy, setSortBy] = useState<'id' | 'newest' | 'oldest' | 'source'>('id')
// CRUD state // CRUD state
const [mode, setMode] = useState<'list' | 'detail' | 'create' | 'edit'>('list') const [mode, setMode] = useState<'list' | 'detail' | 'create' | 'edit'>('list')
@@ -47,98 +65,111 @@ export default function ControlLibraryPage() {
// Pagination // Pagination
const [currentPage, setCurrentPage] = useState(1) const [currentPage, setCurrentPage] = useState(1)
const PAGE_SIZE = 50
// Review mode // Review mode
const [reviewMode, setReviewMode] = useState(false) const [reviewMode, setReviewMode] = useState(false)
const [reviewIndex, setReviewIndex] = useState(0) const [reviewIndex, setReviewIndex] = useState(0)
const [reviewItems, setReviewItems] = useState<CanonicalControl[]>([])
const [reviewCount, setReviewCount] = useState(0)
// Load data // Debounce search
const loadData = useCallback(async () => { const searchTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
useEffect(() => {
if (searchTimer.current) clearTimeout(searchTimer.current)
searchTimer.current = setTimeout(() => setDebouncedSearch(searchQuery), 400)
return () => { if (searchTimer.current) clearTimeout(searchTimer.current) }
}, [searchQuery])
// Build query params for backend
const buildParams = useCallback((extra?: Record<string, string>) => {
const p = new URLSearchParams()
if (severityFilter) p.set('severity', severityFilter)
if (domainFilter) p.set('domain', domainFilter)
if (stateFilter) p.set('release_state', stateFilter)
if (verificationFilter) p.set('verification_method', verificationFilter)
if (categoryFilter) p.set('category', categoryFilter)
if (audienceFilter) p.set('target_audience', audienceFilter)
if (sourceFilter) p.set('source', sourceFilter)
if (debouncedSearch) p.set('search', debouncedSearch)
if (extra) for (const [k, v] of Object.entries(extra)) p.set(k, v)
return p.toString()
}, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, debouncedSearch])
// Load metadata (domains, sources — once + on refresh)
const loadMeta = useCallback(async () => {
try {
const [fwRes, metaRes] = await Promise.all([
fetch(`${BACKEND_URL}?endpoint=frameworks`),
fetch(`${BACKEND_URL}?endpoint=controls-meta`),
])
if (fwRes.ok) setFrameworks(await fwRes.json())
if (metaRes.ok) setMeta(await metaRes.json())
} catch { /* ignore */ }
}, [])
// Load controls page
const loadControls = useCallback(async () => {
try { try {
setLoading(true) setLoading(true)
const [fwRes, ctrlRes] = await Promise.all([
fetch(`${BACKEND_URL}?endpoint=frameworks`), // Determine sort
fetch(`${BACKEND_URL}?endpoint=controls`), const sortField = sortBy === 'id' ? 'control_id' : sortBy === 'source' ? 'source' : 'created_at'
const sortOrder = sortBy === 'newest' ? 'desc' : sortBy === 'oldest' ? 'asc' : 'asc'
const offset = (currentPage - 1) * PAGE_SIZE
const qs = buildParams({
sort: sortField,
order: sortOrder,
limit: String(PAGE_SIZE),
offset: String(offset),
})
const countQs = buildParams()
const [ctrlRes, countRes] = await Promise.all([
fetch(`${BACKEND_URL}?endpoint=controls&${qs}`),
fetch(`${BACKEND_URL}?endpoint=controls-count&${countQs}`),
]) ])
if (fwRes.ok) setFrameworks(await fwRes.json())
if (ctrlRes.ok) setControls(await ctrlRes.json()) if (ctrlRes.ok) setControls(await ctrlRes.json())
if (countRes.ok) {
const data = await countRes.json()
setTotalCount(data.total || 0)
}
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Laden') setError(err instanceof Error ? err.message : 'Fehler beim Laden')
} finally { } finally {
setLoading(false) setLoading(false)
} }
}, [buildParams, sortBy, currentPage])
// Load review count
const loadReviewCount = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}?endpoint=controls-count&release_state=needs_review`)
if (res.ok) {
const data = await res.json()
setReviewCount(data.total || 0)
}
} catch { /* ignore */ }
}, []) }, [])
useEffect(() => { loadData() }, [loadData]) // Initial load
useEffect(() => { loadMeta(); loadReviewCount() }, [loadMeta, loadReviewCount])
// Derived: unique domains // Load controls when filters/page/sort change
const domains = useMemo(() => { useEffect(() => { loadControls() }, [loadControls])
const set = new Set(controls.map(c => getDomain(c.control_id)))
return Array.from(set).sort()
}, [controls])
// Derived: unique document sources (sorted by frequency)
const documentSources = useMemo(() => {
const counts = new Map<string, number>()
let noSource = 0
for (const c of controls) {
const src = c.source_citation?.source
if (src) {
counts.set(src, (counts.get(src) || 0) + 1)
} else {
noSource++
}
}
const sorted = Array.from(counts.entries()).sort((a, b) => b[1] - a[1])
return { sources: sorted, noSourceCount: noSource }
}, [controls])
// Filtered controls
const filteredControls = useMemo(() => {
return controls.filter(c => {
if (severityFilter && c.severity !== severityFilter) return false
if (domainFilter && getDomain(c.control_id) !== domainFilter) return false
if (stateFilter && c.release_state !== stateFilter) return false
if (verificationFilter && c.verification_method !== verificationFilter) return false
if (categoryFilter && c.category !== categoryFilter) return false
if (audienceFilter && c.target_audience !== audienceFilter) return false
if (sourceFilter) {
const src = c.source_citation?.source || ''
if (sourceFilter === '__none__') {
if (src) return false
} else {
if (src !== sourceFilter) return false
}
}
if (searchQuery) {
const q = searchQuery.toLowerCase()
return (
c.control_id.toLowerCase().includes(q) ||
c.title.toLowerCase().includes(q) ||
c.objective.toLowerCase().includes(q) ||
c.tags.some(t => t.toLowerCase().includes(q))
)
}
return true
})
}, [controls, severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, searchQuery])
// Reset page when filters change // Reset page when filters change
useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, searchQuery]) useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, debouncedSearch, sortBy])
// Pagination // Pagination
const totalPages = Math.max(1, Math.ceil(filteredControls.length / PAGE_SIZE)) const totalPages = Math.max(1, Math.ceil(totalCount / PAGE_SIZE))
const paginatedControls = useMemo(() => {
const start = (currentPage - 1) * PAGE_SIZE
return filteredControls.slice(start, start + PAGE_SIZE)
}, [filteredControls, currentPage])
// Review queue items // Full reload (after CRUD)
const reviewItems = useMemo(() => { const fullReload = useCallback(async () => {
return controls.filter(c => ['needs_review', 'too_close', 'duplicate'].includes(c.release_state)) await Promise.all([loadControls(), loadMeta(), loadReviewCount()])
}, [controls]) }, [loadControls, loadMeta, loadReviewCount])
// CRUD handlers // CRUD handlers
const handleCreate = async (data: typeof EMPTY_CONTROL) => { const handleCreate = async (data: typeof EMPTY_CONTROL) => {
@@ -154,7 +185,7 @@ export default function ControlLibraryPage() {
alert(`Fehler: ${err.error || err.details || 'Unbekannt'}`) alert(`Fehler: ${err.error || err.details || 'Unbekannt'}`)
return return
} }
await loadData() await fullReload()
setMode('list') setMode('list')
} catch { } catch {
alert('Netzwerkfehler') alert('Netzwerkfehler')
@@ -177,7 +208,7 @@ export default function ControlLibraryPage() {
alert(`Fehler: ${err.error || err.details || 'Unbekannt'}`) alert(`Fehler: ${err.error || err.details || 'Unbekannt'}`)
return return
} }
await loadData() await fullReload()
setSelectedControl(null) setSelectedControl(null)
setMode('list') setMode('list')
} catch { } catch {
@@ -195,7 +226,7 @@ export default function ControlLibraryPage() {
alert('Fehler beim Loeschen') alert('Fehler beim Loeschen')
return return
} }
await loadData() await fullReload()
setSelectedControl(null) setSelectedControl(null)
setMode('list') setMode('list')
} catch { } catch {
@@ -211,11 +242,10 @@ export default function ControlLibraryPage() {
body: JSON.stringify({ action }), body: JSON.stringify({ action }),
}) })
if (res.ok) { if (res.ok) {
await loadData() await fullReload()
if (reviewMode) { if (reviewMode) {
const remaining = controls.filter(c => const remaining = reviewItems.filter(c => c.control_id !== controlId)
['needs_review', 'too_close', 'duplicate'].includes(c.release_state) && c.control_id !== controlId setReviewItems(remaining)
)
if (remaining.length > 0) { if (remaining.length > 0) {
const nextIdx = Math.min(reviewIndex, remaining.length - 1) const nextIdx = Math.min(reviewIndex, remaining.length - 1)
setReviewIndex(nextIdx) setReviewIndex(nextIdx)
@@ -243,16 +273,25 @@ export default function ControlLibraryPage() {
} catch { /* ignore */ } } catch { /* ignore */ }
} }
const enterReviewMode = () => { const enterReviewMode = async () => {
if (reviewItems.length === 0) return // Load review items from backend
setReviewMode(true) try {
setReviewIndex(0) const res = await fetch(`${BACKEND_URL}?endpoint=controls&release_state=needs_review&limit=200`)
setSelectedControl(reviewItems[0]) if (res.ok) {
setMode('detail') const items = await res.json()
if (items.length > 0) {
setReviewItems(items)
setReviewMode(true)
setReviewIndex(0)
setSelectedControl(items[0])
setMode('detail')
}
}
} catch { /* ignore */ }
} }
// Loading // Loading
if (loading) { if (loading && controls.length === 0) {
return ( return (
<div className="flex items-center justify-center h-96"> <div className="flex items-center justify-center h-96">
<div className="animate-spin rounded-full h-8 w-8 border-2 border-purple-600 border-t-transparent" /> <div className="animate-spin rounded-full h-8 w-8 border-2 border-purple-600 border-t-transparent" />
@@ -304,7 +343,7 @@ export default function ControlLibraryPage() {
onEdit={() => setMode('edit')} onEdit={() => setMode('edit')}
onDelete={handleDelete} onDelete={handleDelete}
onReview={handleReview} onReview={handleReview}
onRefresh={loadData} onRefresh={fullReload}
reviewMode={reviewMode} reviewMode={reviewMode}
reviewIndex={reviewIndex} reviewIndex={reviewIndex}
reviewTotal={reviewItems.length} reviewTotal={reviewItems.length}
@@ -336,19 +375,18 @@ export default function ControlLibraryPage() {
<div> <div>
<h1 className="text-lg font-semibold text-gray-900">Canonical Control Library</h1> <h1 className="text-lg font-semibold text-gray-900">Canonical Control Library</h1>
<p className="text-xs text-gray-500"> <p className="text-xs text-gray-500">
{controls.length} unabhaengig formulierte Security Controls {' '} {meta?.total ?? totalCount} Security Controls
{controls.reduce((sum, c) => sum + c.open_anchors.length, 0)} Open-Source-Referenzen
</p> </p>
</div> </div>
</div> </div>
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
{reviewItems.length > 0 && ( {reviewCount > 0 && (
<button <button
onClick={enterReviewMode} onClick={enterReviewMode}
className="flex items-center gap-1.5 px-3 py-2 text-sm text-white bg-yellow-600 rounded-lg hover:bg-yellow-700" className="flex items-center gap-1.5 px-3 py-2 text-sm text-white bg-yellow-600 rounded-lg hover:bg-yellow-700"
> >
<ListChecks className="w-4 h-4" /> <ListChecks className="w-4 h-4" />
Review ({reviewItems.length}) Review ({reviewCount})
</button> </button>
)} )}
<button <button
@@ -394,12 +432,19 @@ export default function ControlLibraryPage() {
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" /> <Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
<input <input
type="text" type="text"
placeholder="Controls durchsuchen..." placeholder="Controls durchsuchen (ID, Titel, Objective)..."
value={searchQuery} value={searchQuery}
onChange={e => setSearchQuery(e.target.value)} onChange={e => setSearchQuery(e.target.value)}
className="w-full pl-9 pr-4 py-2 text-sm border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-purple-500" className="w-full pl-9 pr-4 py-2 text-sm border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-purple-500"
/> />
</div> </div>
<button
onClick={() => { loadControls(); loadMeta(); loadReviewCount() }}
className="p-2 text-gray-400 hover:text-purple-600"
title="Aktualisieren"
>
<RefreshCw className={`w-4 h-4 ${loading ? 'animate-spin' : ''}`} />
</button>
</div> </div>
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
<Filter className="w-4 h-4 text-gray-400" /> <Filter className="w-4 h-4 text-gray-400" />
@@ -420,8 +465,8 @@ export default function ControlLibraryPage() {
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500" className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
> >
<option value="">Domain</option> <option value="">Domain</option>
{domains.map(d => ( {(meta?.domains || []).map(d => (
<option key={d} value={d}>{d}</option> <option key={d.domain} value={d.domain}>{d.domain} ({d.count})</option>
))} ))}
</select> </select>
<select <select
@@ -472,11 +517,23 @@ export default function ControlLibraryPage() {
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500 max-w-[220px]" className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500 max-w-[220px]"
> >
<option value="">Dokumentenursprung</option> <option value="">Dokumentenursprung</option>
<option value="__none__">Ohne Quelle ({documentSources.noSourceCount})</option> {meta && <option value="__none__">Ohne Quelle ({meta.no_source_count})</option>}
{documentSources.sources.map(([src, count]) => ( {(meta?.sources || []).map(s => (
<option key={src} value={src}>{src} ({count})</option> <option key={s.source} value={s.source}>{s.source} ({s.count})</option>
))} ))}
</select> </select>
<span className="text-gray-300 mx-1">|</span>
<ArrowUpDown className="w-4 h-4 text-gray-400" />
<select
value={sortBy}
onChange={e => setSortBy(e.target.value as 'id' | 'newest' | 'oldest' | 'source')}
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
>
<option value="id">Sortierung: ID</option>
<option value="source">Nach Quelle</option>
<option value="newest">Neueste zuerst</option>
<option value="oldest">Aelteste zuerst</option>
</select>
</div> </div>
</div> </div>
@@ -504,15 +561,16 @@ export default function ControlLibraryPage() {
{showGenerator && ( {showGenerator && (
<GeneratorModal <GeneratorModal
onClose={() => setShowGenerator(false)} onClose={() => setShowGenerator(false)}
onComplete={() => loadData()} onComplete={() => fullReload()}
/> />
)} )}
{/* Pagination Header */} {/* Pagination Header */}
<div className="px-6 py-2 bg-gray-50 border-b border-gray-200 flex items-center justify-between text-xs text-gray-500"> <div className="px-6 py-2 bg-gray-50 border-b border-gray-200 flex items-center justify-between text-xs text-gray-500">
<span> <span>
{filteredControls.length} Controls gefunden {totalCount} Controls gefunden
{filteredControls.length !== controls.length && ` (von ${controls.length} gesamt)`} {totalCount !== (meta?.total ?? totalCount) && ` (von ${meta?.total} gesamt)`}
{loading && <span className="ml-2 text-purple-500">Lade...</span>}
</span> </span>
<span>Seite {currentPage} von {totalPages}</span> <span>Seite {currentPage} von {totalPages}</span>
</div> </div>
@@ -520,9 +578,22 @@ export default function ControlLibraryPage() {
{/* Control List */} {/* Control List */}
<div className="flex-1 overflow-y-auto p-6"> <div className="flex-1 overflow-y-auto p-6">
<div className="space-y-3"> <div className="space-y-3">
{paginatedControls.map(ctrl => ( {controls.map((ctrl, idx) => {
// Show source group header when sorting by source
const prevSource = idx > 0 ? (controls[idx - 1].source_citation?.source || 'Ohne Quelle') : null
const curSource = ctrl.source_citation?.source || 'Ohne Quelle'
const showSourceHeader = sortBy === 'source' && curSource !== prevSource
return (
<div key={ctrl.control_id}>
{showSourceHeader && (
<div className="flex items-center gap-2 pt-3 pb-1">
<div className="h-px flex-1 bg-blue-200" />
<span className="text-xs font-semibold text-blue-700 bg-blue-50 px-2 py-0.5 rounded whitespace-nowrap">{curSource}</span>
<div className="h-px flex-1 bg-blue-200" />
</div>
)}
<button <button
key={ctrl.control_id}
onClick={() => { setSelectedControl(ctrl); setMode('detail') }} onClick={() => { setSelectedControl(ctrl); setMode('detail') }}
className="w-full text-left bg-white border border-gray-200 rounded-lg p-4 hover:border-purple-300 hover:shadow-sm transition-all group" className="w-full text-left bg-white border border-gray-200 rounded-lg p-4 hover:border-purple-300 hover:shadow-sm transition-all group"
> >
@@ -536,6 +607,7 @@ export default function ControlLibraryPage() {
<VerificationMethodBadge method={ctrl.verification_method} /> <VerificationMethodBadge method={ctrl.verification_method} />
<CategoryBadge category={ctrl.category} /> <CategoryBadge category={ctrl.category} />
<TargetAudienceBadge audience={ctrl.target_audience} /> <TargetAudienceBadge audience={ctrl.target_audience} />
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
{ctrl.risk_score !== null && ( {ctrl.risk_score !== null && (
<span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span> <span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span>
)} )}
@@ -543,7 +615,7 @@ export default function ControlLibraryPage() {
<h3 className="text-sm font-medium text-gray-900 group-hover:text-purple-700">{ctrl.title}</h3> <h3 className="text-sm font-medium text-gray-900 group-hover:text-purple-700">{ctrl.title}</h3>
<p className="text-xs text-gray-500 mt-1 line-clamp-2">{ctrl.objective}</p> <p className="text-xs text-gray-500 mt-1 line-clamp-2">{ctrl.objective}</p>
{/* Open anchors summary */} {/* Open anchors summary + timestamp */}
<div className="flex items-center gap-2 mt-2"> <div className="flex items-center gap-2 mt-2">
<BookOpen className="w-3 h-3 text-green-600" /> <BookOpen className="w-3 h-3 text-green-600" />
<span className="text-xs text-green-700"> <span className="text-xs text-green-700">
@@ -555,16 +627,23 @@ export default function ControlLibraryPage() {
<span className="text-xs text-blue-600">{ctrl.source_citation.source}</span> <span className="text-xs text-blue-600">{ctrl.source_citation.source}</span>
</> </>
)} )}
<span className="text-gray-300">|</span>
<Clock className="w-3 h-3 text-gray-400" />
<span className="text-xs text-gray-400" title={ctrl.created_at}>
{ctrl.created_at ? new Date(ctrl.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' }) : ''}
</span>
</div> </div>
</div> </div>
<ChevronRight className="w-4 h-4 text-gray-300 group-hover:text-purple-500 flex-shrink-0 mt-1 ml-4" /> <ChevronRight className="w-4 h-4 text-gray-300 group-hover:text-purple-500 flex-shrink-0 mt-1 ml-4" />
</div> </div>
</button> </button>
))} </div>
)
})}
{filteredControls.length === 0 && ( {controls.length === 0 && !loading && (
<div className="text-center py-12 text-gray-400 text-sm"> <div className="text-center py-12 text-gray-400 text-sm">
{controls.length === 0 {totalCount === 0 && !debouncedSearch && !severityFilter && !domainFilter
? 'Noch keine Controls vorhanden. Klicke auf "Neues Control" um zu starten.' ? 'Noch keine Controls vorhanden. Klicke auf "Neues Control" um zu starten.'
: 'Keine Controls gefunden.'} : 'Keine Controls gefunden.'}
</div> </div>

View File

@@ -80,6 +80,7 @@ class ControlResponse(BaseModel):
category: Optional[str] = None category: Optional[str] = None
target_audience: Optional[str] = None target_audience: Optional[str] = None
generation_metadata: Optional[dict] = None generation_metadata: Optional[dict] = None
generation_strategy: Optional[str] = "ungrouped"
created_at: str created_at: str
updated_at: str updated_at: str
@@ -161,7 +162,7 @@ _CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale,
evidence_confidence, open_anchors, release_state, tags, evidence_confidence, open_anchors, release_state, tags,
license_rule, source_original_text, source_citation, license_rule, source_original_text, source_citation,
customer_visible, verification_method, category, customer_visible, verification_method, category,
target_audience, generation_metadata, target_audience, generation_metadata, generation_strategy,
created_at, updated_at""" created_at, updated_at"""
@@ -297,8 +298,14 @@ async def list_controls(
verification_method: Optional[str] = Query(None), verification_method: Optional[str] = Query(None),
category: Optional[str] = Query(None), category: Optional[str] = Query(None),
target_audience: Optional[str] = Query(None), target_audience: Optional[str] = Query(None),
source: Optional[str] = Query(None, description="Filter by source_citation->source"),
search: Optional[str] = Query(None, description="Full-text search in control_id, title, objective"),
sort: Optional[str] = Query("control_id", description="Sort field: control_id, created_at, severity"),
order: Optional[str] = Query("asc", description="Sort order: asc or desc"),
limit: Optional[int] = Query(None, ge=1, le=5000, description="Max results"),
offset: Optional[int] = Query(None, ge=0, description="Offset for pagination"),
): ):
"""List all canonical controls, with optional filters.""" """List canonical controls with filters, search, sorting and pagination."""
query = f""" query = f"""
SELECT {_CONTROL_COLS} SELECT {_CONTROL_COLS}
FROM canonical_controls FROM canonical_controls
@@ -324,8 +331,35 @@ async def list_controls(
if target_audience: if target_audience:
query += " AND target_audience = :ta" query += " AND target_audience = :ta"
params["ta"] = target_audience params["ta"] = target_audience
if source:
if source == "__none__":
query += " AND (source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = '')"
else:
query += " AND source_citation->>'source' = :src"
params["src"] = source
if search:
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
params["q"] = f"%{search}%"
query += " ORDER BY control_id" # Sorting
sort_col = "control_id"
if sort in ("created_at", "updated_at", "severity", "control_id"):
sort_col = sort
elif sort == "source":
sort_col = "source_citation->>'source'"
sort_dir = "DESC" if order and order.lower() == "desc" else "ASC"
if sort == "source":
# Group by source first, then by control_id within each source
query += f" ORDER BY {sort_col} {sort_dir} NULLS LAST, control_id ASC"
else:
query += f" ORDER BY {sort_col} {sort_dir}"
if limit is not None:
query += " LIMIT :lim"
params["lim"] = limit
if offset is not None:
query += " OFFSET :off"
params["off"] = offset
with SessionLocal() as db: with SessionLocal() as db:
rows = db.execute(text(query), params).fetchall() rows = db.execute(text(query), params).fetchall()
@@ -333,6 +367,87 @@ async def list_controls(
return [_control_row(r) for r in rows] return [_control_row(r) for r in rows]
@router.get("/controls-count")
async def count_controls(
severity: Optional[str] = Query(None),
domain: Optional[str] = Query(None),
release_state: Optional[str] = Query(None),
verification_method: Optional[str] = Query(None),
category: Optional[str] = Query(None),
target_audience: Optional[str] = Query(None),
source: Optional[str] = Query(None),
search: Optional[str] = Query(None),
):
"""Count controls matching filters (for pagination)."""
query = "SELECT count(*) FROM canonical_controls WHERE 1=1"
params: dict[str, Any] = {}
if severity:
query += " AND severity = :sev"
params["sev"] = severity
if domain:
query += " AND LEFT(control_id, LENGTH(:dom)) = :dom"
params["dom"] = domain.upper()
if release_state:
query += " AND release_state = :rs"
params["rs"] = release_state
if verification_method:
query += " AND verification_method = :vm"
params["vm"] = verification_method
if category:
query += " AND category = :cat"
params["cat"] = category
if target_audience:
query += " AND target_audience = :ta"
params["ta"] = target_audience
if source:
if source == "__none__":
query += " AND (source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = '')"
else:
query += " AND source_citation->>'source' = :src"
params["src"] = source
if search:
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
params["q"] = f"%{search}%"
with SessionLocal() as db:
total = db.execute(text(query), params).scalar()
return {"total": total}
@router.get("/controls-meta")
async def controls_meta():
"""Return aggregated metadata for filter dropdowns (domains, sources, counts)."""
with SessionLocal() as db:
total = db.execute(text("SELECT count(*) FROM canonical_controls")).scalar()
domains = db.execute(text("""
SELECT UPPER(SPLIT_PART(control_id, '-', 1)) as domain, count(*) as cnt
FROM canonical_controls
GROUP BY domain ORDER BY domain
""")).fetchall()
sources = db.execute(text("""
SELECT source_citation->>'source' as src, count(*) as cnt
FROM canonical_controls
WHERE source_citation->>'source' IS NOT NULL AND source_citation->>'source' != ''
GROUP BY src ORDER BY cnt DESC
""")).fetchall()
no_source = db.execute(text("""
SELECT count(*) FROM canonical_controls
WHERE source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = ''
""")).scalar()
return {
"total": total,
"domains": [{"domain": r[0], "count": r[1]} for r in domains],
"sources": [{"source": r[0], "count": r[1]} for r in sources],
"no_source_count": no_source,
}
@router.get("/controls/{control_id}") @router.get("/controls/{control_id}")
async def get_control(control_id: str): async def get_control(control_id: str):
"""Get a single canonical control by its control_id (e.g. AUTH-001).""" """Get a single canonical control by its control_id (e.g. AUTH-001)."""
@@ -661,6 +776,7 @@ def _control_row(r) -> dict:
"category": r.category, "category": r.category,
"target_audience": r.target_audience, "target_audience": r.target_audience,
"generation_metadata": r.generation_metadata, "generation_metadata": r.generation_metadata,
"generation_strategy": getattr(r, "generation_strategy", "ungrouped"),
"created_at": r.created_at.isoformat() if r.created_at else None, "created_at": r.created_at.isoformat() if r.created_at else None,
"updated_at": r.updated_at.isoformat() if r.updated_at else None, "updated_at": r.updated_at.isoformat() if r.updated_at else None,
} }

View File

@@ -23,6 +23,7 @@ import logging
import os import os
import re import re
import uuid import uuid
from collections import defaultdict
from dataclasses import dataclass, field, asdict from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Dict, List, Optional, Set from typing import Dict, List, Optional, Set
@@ -368,6 +369,7 @@ class GeneratedControl:
source_citation: Optional[dict] = None source_citation: Optional[dict] = None
customer_visible: bool = True customer_visible: bool = True
generation_metadata: dict = field(default_factory=dict) generation_metadata: dict = field(default_factory=dict)
generation_strategy: str = "ungrouped" # ungrouped | document_grouped
# Classification fields # Classification fields
verification_method: Optional[str] = None # code_review, document, tool, hybrid verification_method: Optional[str] = None # code_review, document, tool, hybrid
category: Optional[str] = None # one of 17 categories category: Optional[str] = None # one of 17 categories
@@ -940,6 +942,24 @@ Gib JSON zurück mit diesen Feldern:
license_infos: list[dict], license_infos: list[dict],
) -> list[Optional[GeneratedControl]]: ) -> list[Optional[GeneratedControl]]:
"""Structure multiple free-use/citation chunks in a single Anthropic call.""" """Structure multiple free-use/citation chunks in a single Anthropic call."""
# Build document context header if chunks share a regulation
regulations_in_batch = set(c.regulation_name for c in chunks)
doc_context = ""
if len(regulations_in_batch) == 1:
reg_name = next(iter(regulations_in_batch))
articles = sorted(set(c.article or "?" for c in chunks))
doc_context = (
f"\nDOKUMENTKONTEXT: Alle {len(chunks)} Chunks stammen aus demselben Gesetz: {reg_name}.\n"
f"Betroffene Artikel/Abschnitte: {', '.join(articles)}.\n"
f"Nutze diesen Zusammenhang fuer eine kohaerente, aufeinander abgestimmte Formulierung der Controls.\n"
f"Vermeide Redundanzen zwischen den Controls — jedes soll einen eigenen Aspekt abdecken.\n"
)
elif len(regulations_in_batch) <= 3:
doc_context = (
f"\nDOKUMENTKONTEXT: Die Chunks stammen aus {len(regulations_in_batch)} Gesetzen: "
f"{', '.join(regulations_in_batch)}.\n"
)
chunk_entries = [] chunk_entries = []
for idx, (chunk, lic) in enumerate(zip(chunks, license_infos)): for idx, (chunk, lic) in enumerate(zip(chunks, license_infos)):
source_name = lic.get("name", chunk.regulation_name) source_name = lic.get("name", chunk.regulation_name)
@@ -952,20 +972,21 @@ Gib JSON zurück mit diesen Feldern:
joined = "\n\n".join(chunk_entries) joined = "\n\n".join(chunk_entries)
prompt = f"""Strukturiere die folgenden {len(chunks)} Gesetzestexte jeweils als eigenstaendiges Security/Compliance Control. prompt = f"""Strukturiere die folgenden {len(chunks)} Gesetzestexte jeweils als eigenstaendiges Security/Compliance Control.
Du DARFST den Originaltext verwenden (Quellen sind jeweils angegeben). Du DARFST den Originaltext verwenden (Quellen sind jeweils angegeben).
{doc_context}
WICHTIG: WICHTIG:
- Erstelle fuer JEDEN Chunk ein separates Control mit verstaendlicher, praxisorientierter Formulierung. - Erstelle fuer JEDEN Chunk ein separates Control mit verstaendlicher, praxisorientierter Formulierung.
- Jedes Control muss eigenstaendig und vollstaendig sein — nicht auf andere Controls verweisen. - Jedes Control muss eigenstaendig und vollstaendig sein — nicht auf andere Controls verweisen.
- Qualitaet ist wichtiger als Geschwindigkeit. Jedes Control muss die gleiche Qualitaet haben wie ein einzeln erstelltes. - Qualitaet ist wichtiger als Geschwindigkeit. Jedes Control muss die gleiche Qualitaet haben wie ein einzeln erstelltes.
- Antworte IMMER auf Deutsch.
Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat diese Felder: Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat diese Felder:
- chunk_index: 1-basierter Index des Chunks (1, 2, 3, ...) - chunk_index: 1-basierter Index des Chunks (1, 2, 3, ...)
- title: Kurzer praegnanter Titel (max 100 Zeichen) - title: Kurzer praegnanter Titel auf Deutsch (max 100 Zeichen)
- objective: Was soll erreicht werden? (1-3 Saetze) - objective: Was soll erreicht werden? (1-3 Saetze, Deutsch)
- rationale: Warum ist das wichtig? (1-2 Saetze) - rationale: Warum ist das wichtig? (1-2 Saetze, Deutsch)
- requirements: Liste von konkreten Anforderungen (Strings) - requirements: Liste von konkreten Anforderungen (Strings, Deutsch)
- test_procedure: Liste von Pruefschritten (Strings) - test_procedure: Liste von Pruefschritten (Strings, Deutsch)
- evidence: Liste von Nachweisdokumenten (Strings) - evidence: Liste von Nachweisdokumenten (Strings, Deutsch)
- severity: low/medium/high/critical - severity: low/medium/high/critical
- tags: Liste von Tags - tags: Liste von Tags
@@ -1003,13 +1024,16 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
control.customer_visible = True control.customer_visible = True
control.verification_method = _detect_verification_method(chunk.text) control.verification_method = _detect_verification_method(chunk.text)
control.category = _detect_category(chunk.text) control.category = _detect_category(chunk.text)
same_doc = len(set(c.regulation_code for c in chunks)) == 1
control.generation_metadata = { control.generation_metadata = {
"processing_path": "structured_batch", "processing_path": "structured_batch",
"license_rule": lic["rule"], "license_rule": lic["rule"],
"source_regulation": chunk.regulation_code, "source_regulation": chunk.regulation_code,
"source_article": chunk.article, "source_article": chunk.article,
"batch_size": len(chunks), "batch_size": len(chunks),
"document_grouped": same_doc,
} }
control.generation_strategy = "document_grouped" if same_doc else "ungrouped"
controls[idx] = control controls[idx] = control
return controls return controls
@@ -1369,7 +1393,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
open_anchors, release_state, tags, open_anchors, release_state, tags,
license_rule, source_original_text, source_citation, license_rule, source_original_text, source_citation,
customer_visible, generation_metadata, customer_visible, generation_metadata,
verification_method, category verification_method, category, generation_strategy
) VALUES ( ) VALUES (
:framework_id, :control_id, :title, :objective, :rationale, :framework_id, :control_id, :title, :objective, :rationale,
:scope, :requirements, :test_procedure, :evidence, :scope, :requirements, :test_procedure, :evidence,
@@ -1377,7 +1401,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
:open_anchors, :release_state, :tags, :open_anchors, :release_state, :tags,
:license_rule, :source_original_text, :source_citation, :license_rule, :source_original_text, :source_citation,
:customer_visible, :generation_metadata, :customer_visible, :generation_metadata,
:verification_method, :category :verification_method, :category, :generation_strategy
) )
ON CONFLICT (framework_id, control_id) DO NOTHING ON CONFLICT (framework_id, control_id) DO NOTHING
RETURNING id RETURNING id
@@ -1405,6 +1429,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
"generation_metadata": json.dumps(control.generation_metadata) if control.generation_metadata else None, "generation_metadata": json.dumps(control.generation_metadata) if control.generation_metadata else None,
"verification_method": control.verification_method, "verification_method": control.verification_method,
"category": control.category, "category": control.category,
"generation_strategy": control.generation_strategy,
}, },
) )
self.db.commit() self.db.commit()
@@ -1479,21 +1504,48 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
self._update_job(job_id, result) self._update_job(job_id, result)
return result return result
# ── Group chunks by document (regulation_code) for coherent batching ──
doc_groups: dict[str, list[RAGSearchResult]] = defaultdict(list)
for chunk in chunks:
group_key = chunk.regulation_code or "unknown"
doc_groups[group_key].append(chunk)
# Sort chunks within each group by article for sequential context
for key in doc_groups:
doc_groups[key].sort(key=lambda c: (c.article or "", c.paragraph or ""))
logger.info(
"Grouped %d chunks into %d document groups for coherent batching",
len(chunks), len(doc_groups),
)
# Flatten back: chunks from same document are now adjacent
chunks = []
for group_list in doc_groups.values():
chunks.extend(group_list)
# Process chunks — batch mode (N chunks per Anthropic API call) # Process chunks — batch mode (N chunks per Anthropic API call)
BATCH_SIZE = config.batch_size or 5 BATCH_SIZE = config.batch_size or 5
controls_count = 0 controls_count = 0
chunks_skipped_prefilter = 0 chunks_skipped_prefilter = 0
pending_batch: list[tuple[RAGSearchResult, dict]] = [] # (chunk, license_info) pending_batch: list[tuple[RAGSearchResult, dict]] = [] # (chunk, license_info)
current_batch_regulation: Optional[str] = None # Track regulation for group-aware flushing
async def _flush_batch(): async def _flush_batch():
"""Send pending batch to Anthropic and process results.""" """Send pending batch to Anthropic and process results."""
nonlocal controls_count nonlocal controls_count, current_batch_regulation
if not pending_batch: if not pending_batch:
return return
batch = pending_batch.copy() batch = pending_batch.copy()
pending_batch.clear() pending_batch.clear()
current_batch_regulation = None
logger.info("Processing batch of %d chunks via single API call...", len(batch)) # Log which document this batch belongs to
regs_in_batch = set(c.regulation_code for c, _ in batch)
logger.info(
"Processing batch of %d chunks (docs: %s) via single API call...",
len(batch), ", ".join(regs_in_batch),
)
try: try:
batch_controls = await self._process_batch(batch, config, job_id) batch_controls = await self._process_batch(batch, config, job_id)
except Exception as e: except Exception as e:
@@ -1514,6 +1566,9 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
self._mark_chunk_processed(chunk, lic_info, "no_control", [], job_id) self._mark_chunk_processed(chunk, lic_info, "no_control", [], job_id)
continue continue
# Mark as document_grouped strategy
control.generation_strategy = "document_grouped"
# Count by state # Count by state
if control.release_state == "too_close": if control.release_state == "too_close":
result.controls_too_close += 1 result.controls_too_close += 1
@@ -1567,12 +1622,18 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Objekten. Jedes Objekt hat di
# Classify license and add to batch # Classify license and add to batch
license_info = self._classify_license(chunk) license_info = self._classify_license(chunk)
pending_batch.append((chunk, license_info)) chunk_regulation = chunk.regulation_code or "unknown"
# Flush when batch is full # Flush when: batch is full OR regulation changes (group boundary)
if len(pending_batch) >= BATCH_SIZE: if pending_batch and (
len(pending_batch) >= BATCH_SIZE
or chunk_regulation != current_batch_regulation
):
await _flush_batch() await _flush_batch()
pending_batch.append((chunk, license_info))
current_batch_regulation = chunk_regulation
except Exception as e: except Exception as e:
error_msg = f"Error processing chunk {chunk.regulation_code}/{chunk.article}: {e}" error_msg = f"Error processing chunk {chunk.regulation_code}/{chunk.article}: {e}"
logger.error(error_msg) logger.error(error_msg)

View File

@@ -0,0 +1,23 @@
-- 057: Add batch processing paths to canonical_processed_chunks
-- New values: structured_batch, llm_reform_batch (used by batch control generation)
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_processed_chunks') THEN
ALTER TABLE canonical_processed_chunks
DROP CONSTRAINT IF EXISTS canonical_processed_chunks_processing_path_check;
ALTER TABLE canonical_processed_chunks
ADD CONSTRAINT canonical_processed_chunks_processing_path_check
CHECK (processing_path IN (
'structured',
'llm_reform',
'skipped',
'prefilter_skip',
'no_control',
'store_failed',
'error',
'structured_batch',
'llm_reform_batch'
));
END IF;
END $$;

View File

@@ -0,0 +1,8 @@
-- Migration 058: Add generation_strategy column to canonical_controls
-- Tracks whether a control was generated with document-grouped or ungrouped batching
ALTER TABLE canonical_controls
ADD COLUMN IF NOT EXISTS generation_strategy TEXT NOT NULL DEFAULT 'ungrouped';
COMMENT ON COLUMN canonical_controls.generation_strategy IS
'How chunks were batched during generation: ungrouped (random), document_grouped (by regulation+article)';

View File

@@ -1,17 +1,36 @@
"""Tests for Canonical Control Library routes (canonical_control_routes.py).""" """Tests for Canonical Control Library routes (canonical_control_routes.py).
Includes:
- Model validation tests (FrameworkResponse, ControlResponse, etc.)
- _control_row conversion tests
- Server-side pagination, sorting, search, source filter tests
- /controls-count and /controls-meta endpoint tests
"""
import pytest import pytest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
from datetime import datetime, timezone from datetime import datetime, timezone
from fastapi import FastAPI
from fastapi.testclient import TestClient
from compliance.api.canonical_control_routes import ( from compliance.api.canonical_control_routes import (
FrameworkResponse, FrameworkResponse,
ControlResponse, ControlResponse,
SimilarityCheckRequest, SimilarityCheckRequest,
SimilarityCheckResponse, SimilarityCheckResponse,
_control_row, _control_row,
router,
) )
# ---------------------------------------------------------------------------
# TestClient setup for endpoint tests
# ---------------------------------------------------------------------------
_app = FastAPI()
_app.include_router(router, prefix="/api/compliance")
_client = TestClient(_app)
class TestFrameworkResponse: class TestFrameworkResponse:
"""Tests for FrameworkResponse model.""" """Tests for FrameworkResponse model."""
@@ -175,6 +194,7 @@ class TestControlRowConversion:
], ],
"release_state": "draft", "release_state": "draft",
"tags": ["mfa"], "tags": ["mfa"],
"generation_strategy": "ungrouped",
"created_at": now, "created_at": now,
"updated_at": now, "updated_at": now,
} }
@@ -223,3 +243,213 @@ class TestControlRowConversion:
result = _control_row(row) result = _control_row(row)
assert result["created_at"] is None assert result["created_at"] is None
assert result["updated_at"] is None assert result["updated_at"] is None
def test_generation_strategy_default(self):
row = self._make_row()
result = _control_row(row)
assert result["generation_strategy"] == "ungrouped"
def test_generation_strategy_document_grouped(self):
row = self._make_row(generation_strategy="document_grouped")
result = _control_row(row)
assert result["generation_strategy"] == "document_grouped"
# =============================================================================
# ENDPOINT TESTS — Server-Side Pagination, Sort, Search, Source Filter
# =============================================================================
def _make_mock_row(**overrides):
"""Build a mock Row with all canonical_controls columns."""
now = datetime.now(timezone.utc)
defaults = {
"id": "uuid-ctrl-1",
"framework_id": "uuid-fw-1",
"control_id": "AUTH-001",
"title": "Test Control",
"objective": "Test obj",
"rationale": "Test rat",
"scope": {},
"requirements": ["Req 1"],
"test_procedure": ["Test 1"],
"evidence": [],
"severity": "high",
"risk_score": 3.0,
"implementation_effort": "m",
"evidence_confidence": None,
"open_anchors": [],
"release_state": "draft",
"tags": [],
"license_rule": 1,
"source_original_text": None,
"source_citation": None,
"customer_visible": True,
"verification_method": "automated",
"category": "authentication",
"target_audience": "developer",
"generation_metadata": {},
"generation_strategy": "ungrouped",
"created_at": now,
"updated_at": now,
}
defaults.update(overrides)
mock = MagicMock()
for k, v in defaults.items():
setattr(mock, k, v)
return mock
def _session_returning(rows=None, scalar=None):
"""Create a mock SessionLocal that returns rows or scalar."""
db = MagicMock()
result = MagicMock()
if rows is not None:
result.fetchall.return_value = rows
if scalar is not None:
result.scalar.return_value = scalar
db.execute.return_value = result
db.__enter__ = MagicMock(return_value=db)
db.__exit__ = MagicMock(return_value=False)
return db
class TestListControlsPagination:
"""GET /controls with limit/offset."""
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_limit_param_in_sql(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[_make_mock_row()])
resp = _client.get("/api/compliance/v1/canonical/controls?limit=10&offset=20")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "LIMIT" in sql
assert "OFFSET" in sql
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_no_limit_by_default(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "LIMIT" not in sql
class TestListControlsSorting:
"""GET /controls with sort/order."""
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_sort_created_at_desc(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls?sort=created_at&order=desc")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "created_at DESC" in sql
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_default_sort_control_id_asc(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "control_id ASC" in sql
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_sql_injection_in_sort_blocked(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls?sort=1;DROP+TABLE")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "DROP" not in sql
assert "control_id" in sql
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_sort_by_source(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls?sort=source&order=asc")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "source_citation" in sql
assert "control_id ASC" in sql # secondary sort within source group
class TestListControlsSearch:
"""GET /controls with search."""
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_search_uses_ilike(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls?search=encryption")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "ILIKE" in sql
params = mock_cls.return_value.__enter__().execute.call_args[0][1]
assert params["q"] == "%encryption%"
class TestListControlsSourceFilter:
"""GET /controls with source filter."""
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_specific_source(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls?source=DSGVO")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "source_citation" in sql
params = mock_cls.return_value.__enter__().execute.call_args[0][1]
assert params["src"] == "DSGVO"
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_no_source_filter(self, mock_cls):
mock_cls.return_value = _session_returning(rows=[])
resp = _client.get("/api/compliance/v1/canonical/controls?source=__none__")
assert resp.status_code == 200
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "IS NULL" in sql
class TestControlsCount:
"""GET /controls-count."""
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_returns_total(self, mock_cls):
mock_cls.return_value = _session_returning(scalar=42)
resp = _client.get("/api/compliance/v1/canonical/controls-count")
assert resp.status_code == 200
assert resp.json() == {"total": 42}
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_with_filters(self, mock_cls):
mock_cls.return_value = _session_returning(scalar=5)
resp = _client.get("/api/compliance/v1/canonical/controls-count?severity=critical&search=mfa")
assert resp.status_code == 200
assert resp.json() == {"total": 5}
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
assert "severity" in sql
assert "ILIKE" in sql
class TestControlsMeta:
"""GET /controls-meta."""
@patch("compliance.api.canonical_control_routes.SessionLocal")
def test_returns_structure(self, mock_cls):
db = MagicMock()
db.__enter__ = MagicMock(return_value=db)
db.__exit__ = MagicMock(return_value=False)
# 4 sequential execute() calls
total_r = MagicMock(); total_r.scalar.return_value = 100
domain_r = MagicMock(); domain_r.fetchall.return_value = []
source_r = MagicMock(); source_r.fetchall.return_value = []
nosrc_r = MagicMock(); nosrc_r.scalar.return_value = 20
db.execute.side_effect = [total_r, domain_r, source_r, nosrc_r]
mock_cls.return_value = db
resp = _client.get("/api/compliance/v1/canonical/controls-meta")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 100
assert data["no_source_count"] == 20
assert isinstance(data["domains"], list)
assert isinstance(data["sources"], list)