Compare commits

...

4 Commits

Author SHA1 Message Date
Benjamin Admin
295c18c6f7 feat: add DECOMPOSITION_LLM_MODEL env var for runtime model switching
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 46s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 23s
CI/CD / test-python-dsms-gateway (push) Successful in 19s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Successful in 6s
Allows switching between Haiku 4.5 and Sonnet 4.6 for Pass 0b
without rebuilding the backend container.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:20:10 +01:00
Benjamin Admin
649a3c5e4e perf: switch Pass 0b default model to Haiku 4.5
Benchmark shows Haiku is 2.5x faster than Sonnet at 5x lower cost
for this JSON structuring task. Quality is equivalent.
$142 vs $705 for 75K obligations, ~2.8 days vs ~7 days.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:12:01 +01:00
Benjamin Admin
bdd2f6fa0f fix: cap Anthropic max_tokens to 16384 for Pass 0b batches
Previous formula (batch_size * 1500) exceeded Claude's 16K output limit
for batch_size > 10, causing API failures and Ollama fallback.
New formula: min(16384, max(4096, batch_size * 500))

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 08:50:45 +01:00
Benjamin Admin
ac6134ce6d feat: control_parent_links population + traceability API + frontend
- _write_atomic_control() now uses RETURNING id and inserts into
  control_parent_links (M:N) with source_regulation, source_article,
  and obligation_candidate_id parsed from parent's source_citation
- New _parse_citation() helper for JSONB source_citation extraction
- New GET /controls/{id}/traceability endpoint returning full chain:
  parent links with obligations, child controls, source_count
- Backend: control_type filter (atomic/rich) for controls + count
- Frontend: Rechtsgrundlagen section in ControlDetail showing all
  parent links per source regulation with obligation text + strength
- Frontend: Atomic/Rich filter dropdown in Control Library list
- Frontend: GenerationStrategyBadge recognizes 'pass0b' strategy
- Tests: 3 new tests for parent_link creation + citation parsing,
  existing batch test mock updated for RETURNING clause

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 08:14:29 +01:00
8 changed files with 514 additions and 45 deletions

View File

@@ -27,7 +27,7 @@ export async function GET(request: NextRequest) {
case 'controls': { case 'controls': {
const controlParams = new URLSearchParams() const controlParams = new URLSearchParams()
const passthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category', const passthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category',
'target_audience', 'source', 'search', 'sort', 'order', 'limit', 'offset'] 'target_audience', 'source', 'search', 'control_type', 'sort', 'order', 'limit', 'offset']
for (const key of passthrough) { for (const key of passthrough) {
const val = searchParams.get(key) const val = searchParams.get(key)
if (val) controlParams.set(key, val) if (val) controlParams.set(key, val)
@@ -40,7 +40,7 @@ export async function GET(request: NextRequest) {
case 'controls-count': { case 'controls-count': {
const countParams = new URLSearchParams() const countParams = new URLSearchParams()
const countPassthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category', const countPassthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category',
'target_audience', 'source', 'search'] 'target_audience', 'source', 'search', 'control_type']
for (const key of countPassthrough) { for (const key of countPassthrough) {
const val = searchParams.get(key) const val = searchParams.get(key)
if (val) countParams.set(key, val) if (val) countParams.set(key, val)
@@ -99,6 +99,15 @@ export async function GET(request: NextRequest) {
backendPath = '/api/compliance/v1/canonical/categories' backendPath = '/api/compliance/v1/canonical/categories'
break break
case 'traceability': {
const traceId = searchParams.get('id')
if (!traceId) {
return NextResponse.json({ error: 'Missing control id' }, { status: 400 })
}
backendPath = `/api/compliance/v1/canonical/controls/${encodeURIComponent(traceId)}/traceability`
break
}
case 'similar': { case 'similar': {
const simControlId = searchParams.get('id') const simControlId = searchParams.get('id')
if (!simControlId) { if (!simControlId) {

View File

@@ -1,10 +1,10 @@
'use client' 'use client'
import { useState, useEffect } from 'react' import { useState, useEffect, useCallback } from 'react'
import { import {
ArrowLeft, ExternalLink, BookOpen, Scale, FileText, ArrowLeft, ExternalLink, BookOpen, Scale, FileText,
Eye, CheckCircle2, Trash2, Pencil, Clock, Eye, CheckCircle2, Trash2, Pencil, Clock,
ChevronLeft, SkipForward, GitMerge, Search, ChevronLeft, SkipForward, GitMerge, Search, Landmark,
} from 'lucide-react' } from 'lucide-react'
import { import {
CanonicalControl, EFFORT_LABELS, BACKEND_URL, CanonicalControl, EFFORT_LABELS, BACKEND_URL,
@@ -25,6 +25,37 @@ interface SimilarControl {
similarity: number similarity: number
} }
interface ParentLink {
parent_control_id: string
parent_title: string
link_type: string
confidence: number
source_regulation: string | null
source_article: string | null
parent_citation: Record<string, string> | null
obligation: {
text: string
action: string
object: string
normative_strength: string
} | null
}
interface TraceabilityData {
control_id: string
title: string
is_atomic: boolean
parent_links: ParentLink[]
children: Array<{
control_id: string
title: string
category: string
severity: string
decomposition_method: string
}>
source_count: number
}
interface ControlDetailProps { interface ControlDetailProps {
ctrl: CanonicalControl ctrl: CanonicalControl
onBack: () => void onBack: () => void
@@ -57,9 +88,23 @@ export function ControlDetail({
const [loadingSimilar, setLoadingSimilar] = useState(false) const [loadingSimilar, setLoadingSimilar] = useState(false)
const [selectedDuplicates, setSelectedDuplicates] = useState<Set<string>>(new Set()) const [selectedDuplicates, setSelectedDuplicates] = useState<Set<string>>(new Set())
const [merging, setMerging] = useState(false) const [merging, setMerging] = useState(false)
const [traceability, setTraceability] = useState<TraceabilityData | null>(null)
const [loadingTrace, setLoadingTrace] = useState(false)
const loadTraceability = useCallback(async () => {
setLoadingTrace(true)
try {
const res = await fetch(`${BACKEND_URL}?endpoint=traceability&id=${ctrl.control_id}`)
if (res.ok) {
setTraceability(await res.json())
}
} catch { /* ignore */ }
finally { setLoadingTrace(false) }
}, [ctrl.control_id])
useEffect(() => { useEffect(() => {
loadSimilarControls() loadSimilarControls()
loadTraceability()
setSelectedDuplicates(new Set()) setSelectedDuplicates(new Set())
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, [ctrl.control_id]) }, [ctrl.control_id])
@@ -242,8 +287,79 @@ export function ControlDetail({
</section> </section>
)} )}
{/* Parent Control (atomare Controls) */} {/* Rechtsgrundlagen / Traceability (atomic controls) */}
{ctrl.parent_control_uuid && ( {traceability && traceability.parent_links.length > 0 && (
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<Landmark className="w-4 h-4 text-violet-600" />
<h3 className="text-sm font-semibold text-violet-900">
Rechtsgrundlagen ({traceability.source_count} {traceability.source_count === 1 ? 'Quelle' : 'Quellen'})
</h3>
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
{loadingTrace && <span className="text-xs text-violet-400">Laden...</span>}
</div>
<div className="space-y-3">
{traceability.parent_links.map((link, i) => (
<div key={i} className="bg-white/60 border border-violet-100 rounded-lg p-3">
<div className="flex items-start gap-2">
<Scale className="w-4 h-4 text-violet-500 mt-0.5 flex-shrink-0" />
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 flex-wrap">
{link.source_regulation && (
<span className="text-sm font-semibold text-violet-900">{link.source_regulation}</span>
)}
{link.source_article && (
<span className="text-sm text-violet-700">{link.source_article}</span>
)}
{!link.source_regulation && link.parent_citation?.source && (
<span className="text-sm font-semibold text-violet-900">
{link.parent_citation.source}
{link.parent_citation.article && `${link.parent_citation.article}`}
</span>
)}
<span className={`text-xs px-1.5 py-0.5 rounded ${
link.link_type === 'decomposition' ? 'bg-violet-100 text-violet-600' :
link.link_type === 'dedup_merge' ? 'bg-blue-100 text-blue-600' :
'bg-gray-100 text-gray-600'
}`}>
{link.link_type === 'decomposition' ? 'Ableitung' :
link.link_type === 'dedup_merge' ? 'Dedup' :
link.link_type}
</span>
</div>
<p className="text-xs text-violet-600 mt-1">
via{' '}
<span className="font-mono font-medium text-purple-700 bg-purple-50 px-1 py-0.5 rounded">
{link.parent_control_id}
</span>
{link.parent_title && (
<span className="text-violet-500 ml-1"> {link.parent_title}</span>
)}
</p>
{link.obligation && (
<p className="text-xs text-violet-500 mt-1.5 bg-violet-50 rounded p-2">
<span className={`inline-block mr-1.5 px-1.5 py-0.5 rounded text-xs font-medium ${
link.obligation.normative_strength === 'must' ? 'bg-red-100 text-red-700' :
link.obligation.normative_strength === 'should' ? 'bg-amber-100 text-amber-700' :
'bg-green-100 text-green-700'
}`}>
{link.obligation.normative_strength === 'must' ? 'MUSS' :
link.obligation.normative_strength === 'should' ? 'SOLL' : 'KANN'}
</span>
{link.obligation.text.slice(0, 200)}
{link.obligation.text.length > 200 ? '...' : ''}
</p>
)}
</div>
</div>
</div>
))}
</div>
</section>
)}
{/* Fallback: simple parent display when traceability not loaded yet */}
{ctrl.parent_control_uuid && (!traceability || traceability.parent_links.length === 0) && !loadingTrace && (
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4"> <section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-1"> <div className="flex items-center gap-2 mb-1">
<GitMerge className="w-4 h-4 text-violet-600" /> <GitMerge className="w-4 h-4 text-violet-600" />
@@ -259,12 +375,27 @@ export function ControlDetail({
<span className="text-violet-700 ml-1"> {ctrl.parent_control_title}</span> <span className="text-violet-700 ml-1"> {ctrl.parent_control_title}</span>
)} )}
</p> </p>
{ctrl.generation_metadata?.obligation_text && ( </section>
<p className="text-xs text-violet-600 mt-2 bg-violet-100/50 rounded p-2"> )}
Obligation: {String(ctrl.generation_metadata.obligation_text).slice(0, 300)}
{String(ctrl.generation_metadata.obligation_text).length > 300 ? '...' : ''} {/* Child controls (rich controls that have atomic children) */}
</p> {traceability && traceability.children.length > 0 && (
)} <section className="bg-emerald-50 border border-emerald-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<GitMerge className="w-4 h-4 text-emerald-600" />
<h3 className="text-sm font-semibold text-emerald-900">
Abgeleitete Controls ({traceability.children.length})
</h3>
</div>
<div className="space-y-1.5">
{traceability.children.map((child) => (
<div key={child.control_id} className="flex items-center gap-2 text-sm">
<span className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded">{child.control_id}</span>
<span className="text-gray-700 flex-1 truncate">{child.title}</span>
<SeverityBadge severity={child.severity} />
</div>
))}
</div>
</section> </section>
)} )}

View File

@@ -282,7 +282,7 @@ export function GenerationStrategyBadge({ strategy }: { strategy: string | null
if (strategy === 'phase74_gap_fill') { if (strategy === 'phase74_gap_fill') {
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-blue-100 text-blue-700">v5 Gap</span> return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-blue-100 text-blue-700">v5 Gap</span>
} }
if (strategy === 'pass0b_atomic') { if (strategy === 'pass0b_atomic' || strategy === 'pass0b') {
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-violet-100 text-violet-700">Atomar</span> return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-violet-100 text-violet-700">Atomar</span>
} }
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">{strategy}</span> return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">{strategy}</span>

View File

@@ -53,6 +53,7 @@ export default function ControlLibraryPage() {
const [categoryFilter, setCategoryFilter] = useState<string>('') const [categoryFilter, setCategoryFilter] = useState<string>('')
const [audienceFilter, setAudienceFilter] = useState<string>('') const [audienceFilter, setAudienceFilter] = useState<string>('')
const [sourceFilter, setSourceFilter] = useState<string>('') const [sourceFilter, setSourceFilter] = useState<string>('')
const [typeFilter, setTypeFilter] = useState<string>('')
const [sortBy, setSortBy] = useState<'id' | 'newest' | 'oldest' | 'source'>('id') const [sortBy, setSortBy] = useState<'id' | 'newest' | 'oldest' | 'source'>('id')
// CRUD state // CRUD state
@@ -94,10 +95,11 @@ export default function ControlLibraryPage() {
if (categoryFilter) p.set('category', categoryFilter) if (categoryFilter) p.set('category', categoryFilter)
if (audienceFilter) p.set('target_audience', audienceFilter) if (audienceFilter) p.set('target_audience', audienceFilter)
if (sourceFilter) p.set('source', sourceFilter) if (sourceFilter) p.set('source', sourceFilter)
if (typeFilter) p.set('control_type', typeFilter)
if (debouncedSearch) p.set('search', debouncedSearch) if (debouncedSearch) p.set('search', debouncedSearch)
if (extra) for (const [k, v] of Object.entries(extra)) p.set(k, v) if (extra) for (const [k, v] of Object.entries(extra)) p.set(k, v)
return p.toString() return p.toString()
}, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, debouncedSearch]) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, debouncedSearch])
// Load metadata (domains, sources — once + on refresh) // Load metadata (domains, sources — once + on refresh)
const loadMeta = useCallback(async () => { const loadMeta = useCallback(async () => {
@@ -165,7 +167,7 @@ export default function ControlLibraryPage() {
useEffect(() => { loadControls() }, [loadControls]) useEffect(() => { loadControls() }, [loadControls])
// Reset page when filters change // Reset page when filters change
useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, debouncedSearch, sortBy]) useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, debouncedSearch, sortBy])
// Pagination // Pagination
const totalPages = Math.max(1, Math.ceil(totalCount / PAGE_SIZE)) const totalPages = Math.max(1, Math.ceil(totalCount / PAGE_SIZE))
@@ -664,6 +666,15 @@ export default function ControlLibraryPage() {
<option key={s.source} value={s.source}>{s.source} ({s.count})</option> <option key={s.source} value={s.source}>{s.source} ({s.count})</option>
))} ))}
</select> </select>
<select
value={typeFilter}
onChange={e => setTypeFilter(e.target.value)}
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
>
<option value="">Alle Typen</option>
<option value="rich">Rich Controls</option>
<option value="atomic">Atomare Controls</option>
</select>
<span className="text-gray-300 mx-1">|</span> <span className="text-gray-300 mx-1">|</span>
<ArrowUpDown className="w-4 h-4 text-gray-400" /> <ArrowUpDown className="w-4 h-4 text-gray-400" />
<select <select

View File

@@ -10,6 +10,7 @@ Endpoints:
GET /v1/canonical/frameworks/{framework_id}/controls — Controls of a framework GET /v1/canonical/frameworks/{framework_id}/controls — Controls of a framework
GET /v1/canonical/controls — All controls (filterable) GET /v1/canonical/controls — All controls (filterable)
GET /v1/canonical/controls/{control_id} — Single control GET /v1/canonical/controls/{control_id} — Single control
GET /v1/canonical/controls/{control_id}/traceability — Traceability chain
GET /v1/canonical/controls/{control_id}/similar — Find similar controls GET /v1/canonical/controls/{control_id}/similar — Find similar controls
POST /v1/canonical/controls — Create a control POST /v1/canonical/controls — Create a control
PUT /v1/canonical/controls/{control_id} — Update a control PUT /v1/canonical/controls/{control_id} — Update a control
@@ -314,6 +315,7 @@ async def list_controls(
target_audience: Optional[str] = Query(None), target_audience: Optional[str] = Query(None),
source: Optional[str] = Query(None, description="Filter by source_citation->source"), source: Optional[str] = Query(None, description="Filter by source_citation->source"),
search: Optional[str] = Query(None, description="Full-text search in control_id, title, objective"), search: Optional[str] = Query(None, description="Full-text search in control_id, title, objective"),
control_type: Optional[str] = Query(None, description="Filter: atomic, rich, or all"),
sort: Optional[str] = Query("control_id", description="Sort field: control_id, created_at, severity"), sort: Optional[str] = Query("control_id", description="Sort field: control_id, created_at, severity"),
order: Optional[str] = Query("asc", description="Sort order: asc or desc"), order: Optional[str] = Query("asc", description="Sort order: asc or desc"),
limit: Optional[int] = Query(None, ge=1, le=5000, description="Max results"), limit: Optional[int] = Query(None, ge=1, le=5000, description="Max results"),
@@ -351,6 +353,10 @@ async def list_controls(
else: else:
query += " AND source_citation->>'source' = :src" query += " AND source_citation->>'source' = :src"
params["src"] = source params["src"] = source
if control_type == "atomic":
query += " AND decomposition_method = 'pass0b'"
elif control_type == "rich":
query += " AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')"
if search: if search:
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)" query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
params["q"] = f"%{search}%" params["q"] = f"%{search}%"
@@ -391,6 +397,7 @@ async def count_controls(
target_audience: Optional[str] = Query(None), target_audience: Optional[str] = Query(None),
source: Optional[str] = Query(None), source: Optional[str] = Query(None),
search: Optional[str] = Query(None), search: Optional[str] = Query(None),
control_type: Optional[str] = Query(None),
): ):
"""Count controls matching filters (for pagination).""" """Count controls matching filters (for pagination)."""
query = "SELECT count(*) FROM canonical_controls WHERE 1=1" query = "SELECT count(*) FROM canonical_controls WHERE 1=1"
@@ -420,6 +427,10 @@ async def count_controls(
else: else:
query += " AND source_citation->>'source' = :src" query += " AND source_citation->>'source' = :src"
params["src"] = source params["src"] = source
if control_type == "atomic":
query += " AND decomposition_method = 'pass0b'"
elif control_type == "rich":
query += " AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')"
if search: if search:
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)" query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
params["q"] = f"%{search}%" params["q"] = f"%{search}%"
@@ -481,6 +492,134 @@ async def get_control(control_id: str):
return _control_row(row) return _control_row(row)
@router.get("/controls/{control_id}/traceability")
async def get_control_traceability(control_id: str):
"""Get the full traceability chain for a control.
For atomic controls: shows all parent links with source regulations,
articles, and the obligation chain.
For rich controls: shows child atomic controls derived from them.
"""
with SessionLocal() as db:
# Get control UUID
ctrl = db.execute(
text("""
SELECT id, control_id, title, parent_control_uuid,
decomposition_method, source_citation
FROM canonical_controls WHERE control_id = :cid
"""),
{"cid": control_id.upper()},
).fetchone()
if not ctrl:
raise HTTPException(status_code=404, detail="Control not found")
result: dict[str, Any] = {
"control_id": ctrl.control_id,
"title": ctrl.title,
"is_atomic": ctrl.decomposition_method == "pass0b",
}
ctrl_uuid = str(ctrl.id)
# Parent links (M:N) — for atomic controls
parent_links = db.execute(
text("""
SELECT cpl.parent_control_uuid, cpl.link_type,
cpl.confidence, cpl.source_regulation,
cpl.source_article, cpl.obligation_candidate_id,
cc.control_id AS parent_control_id,
cc.title AS parent_title,
cc.source_citation AS parent_citation,
oc.obligation_text, oc.action, oc.object,
oc.normative_strength
FROM control_parent_links cpl
JOIN canonical_controls cc ON cc.id = cpl.parent_control_uuid
LEFT JOIN obligation_candidates oc ON oc.id = cpl.obligation_candidate_id
WHERE cpl.control_uuid = CAST(:uid AS uuid)
ORDER BY cpl.source_regulation, cpl.source_article
"""),
{"uid": ctrl_uuid},
).fetchall()
result["parent_links"] = [
{
"parent_control_id": pl.parent_control_id,
"parent_title": pl.parent_title,
"link_type": pl.link_type,
"confidence": float(pl.confidence) if pl.confidence else 1.0,
"source_regulation": pl.source_regulation,
"source_article": pl.source_article,
"parent_citation": pl.parent_citation,
"obligation": {
"text": pl.obligation_text,
"action": pl.action,
"object": pl.object,
"normative_strength": pl.normative_strength,
} if pl.obligation_text else None,
}
for pl in parent_links
]
# Also include the 1:1 parent (backwards compat) if not already in links
if ctrl.parent_control_uuid:
parent_uuids_in_links = {
str(pl.parent_control_uuid) for pl in parent_links
}
parent_uuid_str = str(ctrl.parent_control_uuid)
if parent_uuid_str not in parent_uuids_in_links:
legacy = db.execute(
text("""
SELECT control_id, title, source_citation
FROM canonical_controls WHERE id = CAST(:uid AS uuid)
"""),
{"uid": parent_uuid_str},
).fetchone()
if legacy:
result["parent_links"].insert(0, {
"parent_control_id": legacy.control_id,
"parent_title": legacy.title,
"link_type": "decomposition",
"confidence": 1.0,
"source_regulation": None,
"source_article": None,
"parent_citation": legacy.source_citation,
"obligation": None,
})
# Child controls — for rich controls
children = db.execute(
text("""
SELECT control_id, title, category, severity,
decomposition_method
FROM canonical_controls
WHERE parent_control_uuid = CAST(:uid AS uuid)
ORDER BY control_id
"""),
{"uid": ctrl_uuid},
).fetchall()
result["children"] = [
{
"control_id": ch.control_id,
"title": ch.title,
"category": ch.category,
"severity": ch.severity,
"decomposition_method": ch.decomposition_method,
}
for ch in children
]
# Unique source regulations count
regs = set()
for pl in result["parent_links"]:
if pl.get("source_regulation"):
regs.add(pl["source_regulation"])
result["source_count"] = len(regs)
return result
# ============================================================================= # =============================================================================
# CONTROL CRUD (CREATE / UPDATE / DELETE) # CONTROL CRUD (CREATE / UPDATE / DELETE)
# ============================================================================= # =============================================================================

View File

@@ -39,7 +39,7 @@ logger = logging.getLogger(__name__)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
ANTHROPIC_MODEL = os.getenv("DECOMPOSITION_LLM_MODEL", "claude-sonnet-4-6") ANTHROPIC_MODEL = os.getenv("DECOMPOSITION_LLM_MODEL", "claude-haiku-4-5-20251001")
DECOMPOSITION_BATCH_SIZE = int(os.getenv("DECOMPOSITION_BATCH_SIZE", "5")) DECOMPOSITION_BATCH_SIZE = int(os.getenv("DECOMPOSITION_BATCH_SIZE", "5"))
LLM_TIMEOUT = float(os.getenv("DECOMPOSITION_LLM_TIMEOUT", "120")) LLM_TIMEOUT = float(os.getenv("DECOMPOSITION_LLM_TIMEOUT", "120"))
ANTHROPIC_API_URL = "https://api.anthropic.com/v1" ANTHROPIC_API_URL = "https://api.anthropic.com/v1"
@@ -955,6 +955,12 @@ class DecompositionPass:
logger.info("Pass 0a: %s", stats) logger.info("Pass 0a: %s", stats)
return stats return stats
_NORMATIVE_STRENGTH_MAP = {
"muss": "must", "must": "must",
"soll": "should", "should": "should",
"kann": "may", "may": "may",
}
def _process_pass0a_obligations( def _process_pass0a_obligations(
self, self,
raw_obligations: list[dict], raw_obligations: list[dict],
@@ -964,6 +970,10 @@ class DecompositionPass:
) -> None: ) -> None:
"""Validate and write obligation candidates from LLM output.""" """Validate and write obligation candidates from LLM output."""
for idx, raw in enumerate(raw_obligations): for idx, raw in enumerate(raw_obligations):
raw_strength = raw.get("normative_strength", "must").lower().strip()
normative_strength = self._NORMATIVE_STRENGTH_MAP.get(
raw_strength, "must"
)
cand = ObligationCandidate( cand = ObligationCandidate(
candidate_id=f"OC-{control_id}-{idx + 1:02d}", candidate_id=f"OC-{control_id}-{idx + 1:02d}",
parent_control_uuid=control_uuid, parent_control_uuid=control_uuid,
@@ -971,7 +981,7 @@ class DecompositionPass:
action=raw.get("action", ""), action=raw.get("action", ""),
object_=raw.get("object", ""), object_=raw.get("object", ""),
condition=raw.get("condition"), condition=raw.get("condition"),
normative_strength=raw.get("normative_strength", "must"), normative_strength=normative_strength,
is_test_obligation=bool(raw.get("is_test_obligation", False)), is_test_obligation=bool(raw.get("is_test_obligation", False)),
is_reporting_obligation=bool(raw.get("is_reporting_obligation", False)), is_reporting_obligation=bool(raw.get("is_reporting_obligation", False)),
) )
@@ -1091,7 +1101,7 @@ class DecompositionPass:
llm_response = await _llm_anthropic( llm_response = await _llm_anthropic(
prompt=prompt, prompt=prompt,
system_prompt=_PASS0B_SYSTEM_PROMPT, system_prompt=_PASS0B_SYSTEM_PROMPT,
max_tokens=max(8192, len(batch) * 1500), max_tokens=min(16384, max(4096, len(batch) * 500)),
) )
stats["llm_calls"] += 1 stats["llm_calls"] += 1
results_by_id = _parse_json_object(llm_response) results_by_id = _parse_json_object(llm_response)
@@ -1246,9 +1256,7 @@ class DecompositionPass:
seq = self._next_atomic_seq(obl["parent_control_id"]) seq = self._next_atomic_seq(obl["parent_control_id"])
atomic.candidate_id = f"{obl['parent_control_id']}-A{seq:02d}" atomic.candidate_id = f"{obl['parent_control_id']}-A{seq:02d}"
self._write_atomic_control( new_uuid = self._write_atomic_control(atomic, obl)
atomic, obl["parent_uuid"], obl["candidate_id"]
)
self.db.execute( self.db.execute(
text(""" text("""
@@ -1260,7 +1268,7 @@ class DecompositionPass:
) )
# Index in Qdrant for future dedup checks # Index in Qdrant for future dedup checks
if self._dedup: if self._dedup and new_uuid:
pattern_id_val = None pattern_id_val = None
pid_row2 = self.db.execute(text( pid_row2 = self.db.execute(text(
"SELECT pattern_id FROM canonical_controls WHERE id = CAST(:uid AS uuid)" "SELECT pattern_id FROM canonical_controls WHERE id = CAST(:uid AS uuid)"
@@ -1268,13 +1276,9 @@ class DecompositionPass:
if pid_row2: if pid_row2:
pattern_id_val = pid_row2[0] pattern_id_val = pid_row2[0]
# Get the UUID of the newly inserted control if pattern_id_val:
new_row = self.db.execute(text(
"SELECT id::text FROM canonical_controls WHERE control_id = :cid ORDER BY created_at DESC LIMIT 1"
), {"cid": atomic.candidate_id}).fetchone()
if new_row and pattern_id_val:
await self._dedup.index_control( await self._dedup.index_control(
control_uuid=new_row[0], control_uuid=new_uuid,
control_id=atomic.candidate_id, control_id=atomic.candidate_id,
title=atomic.title, title=atomic.title,
action=obl.get("action", ""), action=obl.get("action", ""),
@@ -1505,43 +1509,88 @@ class DecompositionPass:
) )
def _write_atomic_control( def _write_atomic_control(
self, atomic: AtomicControlCandidate, self, atomic: AtomicControlCandidate, obl: dict,
parent_uuid: str, candidate_id: str, ) -> Optional[str]:
) -> None: """Insert an atomic control and create parent link.
"""Insert an atomic control into canonical_controls."""
self.db.execute( Returns the UUID of the newly created control, or None on failure.
"""
parent_uuid = obl["parent_uuid"]
candidate_id = obl["candidate_id"]
result = self.db.execute(
text(""" text("""
INSERT INTO canonical_controls ( INSERT INTO canonical_controls (
control_id, title, objective, requirements, control_id, title, objective, rationale,
test_procedure, evidence, severity, category, scope, requirements,
test_procedure, evidence, severity,
open_anchors, category,
release_state, parent_control_uuid, release_state, parent_control_uuid,
decomposition_method, decomposition_method,
generation_metadata generation_metadata,
framework_id,
generation_strategy, pipeline_version
) VALUES ( ) VALUES (
:control_id, :title, :objective, :control_id, :title, :objective, :rationale,
:requirements, :test_procedure, :evidence, :scope, :requirements,
:severity, :category, 'draft', :test_procedure, :evidence,
:severity, :open_anchors, :category,
'draft',
CAST(:parent_uuid AS uuid), 'pass0b', CAST(:parent_uuid AS uuid), 'pass0b',
:gen_meta :gen_meta,
CAST(:framework_id AS uuid),
'pass0b', 2
) )
RETURNING id::text
"""), """),
{ {
"control_id": atomic.candidate_id, "control_id": atomic.candidate_id,
"title": atomic.title, "title": atomic.title,
"objective": atomic.objective, "objective": atomic.objective,
"rationale": getattr(atomic, "rationale", None) or "Aus Obligation abgeleitet.",
"scope": json.dumps({}),
"requirements": json.dumps(atomic.requirements), "requirements": json.dumps(atomic.requirements),
"test_procedure": json.dumps(atomic.test_procedure), "test_procedure": json.dumps(atomic.test_procedure),
"evidence": json.dumps(atomic.evidence), "evidence": json.dumps(atomic.evidence),
"severity": atomic.severity, "severity": atomic.severity,
"open_anchors": json.dumps([]),
"category": atomic.category, "category": atomic.category,
"parent_uuid": parent_uuid, "parent_uuid": parent_uuid,
"gen_meta": json.dumps({ "gen_meta": json.dumps({
"decomposition_source": candidate_id, "decomposition_source": candidate_id,
"decomposition_method": "pass0b", "decomposition_method": "pass0b",
}), }),
"framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
}, },
) )
row = result.fetchone()
new_uuid = row[0] if row else None
# Create M:N parent link (control_parent_links)
if new_uuid:
citation = _parse_citation(obl.get("parent_citation", ""))
self.db.execute(
text("""
INSERT INTO control_parent_links
(control_uuid, parent_control_uuid, link_type, confidence,
source_regulation, source_article, obligation_candidate_id)
VALUES
(CAST(:cu AS uuid), CAST(:pu AS uuid), 'decomposition', 1.0,
:sr, :sa, CAST(:oci AS uuid))
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
"""),
{
"cu": new_uuid,
"pu": parent_uuid,
"sr": citation.get("source", ""),
"sa": citation.get("article", ""),
"oci": obl["oc_id"],
},
)
return new_uuid
def _next_atomic_seq(self, parent_control_id: str) -> int: def _next_atomic_seq(self, parent_control_id: str) -> int:
"""Get the next sequence number for atomic controls under a parent.""" """Get the next sequence number for atomic controls under a parent."""
result = self.db.execute( result = self.db.execute(
@@ -2004,6 +2053,22 @@ def _format_citation(citation) -> str:
return str(citation) return str(citation)
def _parse_citation(citation) -> dict:
"""Parse source_citation JSONB into a dict with source/article/paragraph."""
if not citation:
return {}
if isinstance(citation, dict):
return citation
if isinstance(citation, str):
try:
c = json.loads(citation)
if isinstance(c, dict):
return c
except (json.JSONDecodeError, TypeError):
pass
return {}
def _compute_extraction_confidence(flags: dict) -> float: def _compute_extraction_confidence(flags: dict) -> float:
"""Compute confidence score from quality flags.""" """Compute confidence score from quality flags."""
score = 0.0 score = 0.0

View File

@@ -1118,10 +1118,15 @@ class TestDecompositionPassAnthropicBatch:
call_count[0] += 1 call_count[0] += 1
if call_count[0] == 1: if call_count[0] == 1:
return mock_rows # SELECT candidates return mock_rows # SELECT candidates
# _next_atomic_seq calls (every 3rd after first: 2, 5, 8, ...) # _next_atomic_seq calls: call 2 (control 1), call 6 (control 2)
if call_count[0] in (2, 5): if call_count[0] in (2, 6):
return mock_seq return mock_seq
return MagicMock() # INSERT/UPDATE # INSERT RETURNING calls: call 3 (control 1), call 7 (control 2)
if call_count[0] in (3, 7):
mock_insert = MagicMock()
mock_insert.fetchone.return_value = (f"new-uuid-{call_count[0]}",)
return mock_insert
return MagicMock() # parent_links INSERT / UPDATE
mock_db.execute.side_effect = side_effect mock_db.execute.side_effect = side_effect
batched_response = json.dumps({ batched_response = json.dumps({
@@ -1608,12 +1613,16 @@ class TestPass0bWithEnrichment:
mock_db = MagicMock() mock_db = MagicMock()
mock_seq = MagicMock() mock_seq = MagicMock()
mock_seq.fetchone.return_value = (0,) mock_seq.fetchone.return_value = (0,)
mock_insert = MagicMock()
mock_insert.fetchone.return_value = ("new-uuid-1",)
call_count = [0] call_count = [0]
def side_effect(*args, **kwargs): def side_effect(*args, **kwargs):
call_count[0] += 1 call_count[0] += 1
if call_count[0] == 1: if call_count[0] == 1:
return mock_seq # _next_atomic_seq return mock_seq # _next_atomic_seq
if call_count[0] == 2:
return mock_insert # INSERT RETURNING id
return MagicMock() return MagicMock()
mock_db.execute.side_effect = side_effect mock_db.execute.side_effect = side_effect
@@ -1623,12 +1632,20 @@ class TestPass0bWithEnrichment:
decomp._process_pass0b_control(obl, parsed, stats) decomp._process_pass0b_control(obl, parsed, stats)
) )
# _write_atomic_control is call #2: db.execute(text(...), {params}) # _write_atomic_control INSERT is call #2: db.execute(text(...), {params})
insert_call = mock_db.execute.call_args_list[1] insert_call = mock_db.execute.call_args_list[1]
# positional args: (text_obj, params_dict) # positional args: (text_obj, params_dict)
insert_params = insert_call[0][1] insert_params = insert_call[0][1]
assert insert_params["severity"] == "medium" assert insert_params["severity"] == "medium"
# parent_link INSERT is call #3
link_call = mock_db.execute.call_args_list[2]
link_query = str(link_call[0][0])
assert "control_parent_links" in link_query
link_params = link_call[0][1]
assert link_params["cu"] == "new-uuid-1"
assert link_params["pu"] == "p-uuid"
def test_test_obligation_gets_testing_category(self): def test_test_obligation_gets_testing_category(self):
"""Test obligations should get category='testing'.""" """Test obligations should get category='testing'."""
obl = { obl = {
@@ -1664,12 +1681,16 @@ class TestPass0bWithEnrichment:
mock_db = MagicMock() mock_db = MagicMock()
mock_seq = MagicMock() mock_seq = MagicMock()
mock_seq.fetchone.return_value = (0,) mock_seq.fetchone.return_value = (0,)
mock_insert = MagicMock()
mock_insert.fetchone.return_value = ("new-uuid-2",)
call_count = [0] call_count = [0]
def side_effect(*args, **kwargs): def side_effect(*args, **kwargs):
call_count[0] += 1 call_count[0] += 1
if call_count[0] == 1: if call_count[0] == 1:
return mock_seq return mock_seq
if call_count[0] == 2:
return mock_insert # INSERT RETURNING id
return MagicMock() return MagicMock()
mock_db.execute.side_effect = side_effect mock_db.execute.side_effect = side_effect
@@ -1679,7 +1700,99 @@ class TestPass0bWithEnrichment:
decomp._process_pass0b_control(obl, parsed, stats) decomp._process_pass0b_control(obl, parsed, stats)
) )
# _write_atomic_control is call #2: db.execute(text(...), {params}) # _write_atomic_control INSERT is call #2: db.execute(text(...), {params})
insert_call = mock_db.execute.call_args_list[1] insert_call = mock_db.execute.call_args_list[1]
insert_params = insert_call[0][1] insert_params = insert_call[0][1]
assert insert_params["category"] == "testing" assert insert_params["category"] == "testing"
def test_parent_link_created_with_source_citation(self):
"""_write_atomic_control inserts a row into control_parent_links
with source_regulation and source_article parsed from parent_citation."""
import json as _json
obl = {
"oc_id": "oc-link-1",
"candidate_id": "OC-DSGVO-01",
"parent_uuid": "p-uuid-dsgvo",
"obligation_text": "Daten minimieren",
"action": "minimieren",
"object": "personenbezogene Daten",
"is_test": False,
"is_reporting": False,
"parent_title": "Datenminimierung",
"parent_category": "privacy",
"parent_citation": _json.dumps({
"source": "DSGVO",
"article": "Art. 5 Abs. 1 lit. c",
"paragraph": "",
}),
"parent_severity": "high",
"parent_control_id": "PRIV-001",
"source_ref": "DSGVO Art. 5 Abs. 1 lit. c",
"trigger_type": "continuous",
"is_implementation_specific": False,
}
parsed = {
"title": "Personenbezogene Daten minimieren",
"objective": "Nur erforderliche Daten erheben",
"requirements": ["Datenminimierung"],
"test_procedure": ["Audit"],
"evidence": ["Protokoll"],
"severity": "high",
"category": "privacy",
}
stats = {"controls_created": 0, "candidates_processed": 0,
"llm_failures": 0, "dedup_linked": 0, "dedup_review": 0}
mock_db = MagicMock()
mock_seq = MagicMock()
mock_seq.fetchone.return_value = (0,)
mock_insert = MagicMock()
mock_insert.fetchone.return_value = ("new-uuid-dsgvo",)
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return mock_seq
if call_count[0] == 2:
return mock_insert
return MagicMock()
mock_db.execute.side_effect = side_effect
import asyncio
decomp = DecompositionPass(db=mock_db)
asyncio.get_event_loop().run_until_complete(
decomp._process_pass0b_control(obl, parsed, stats)
)
# Call #3 is the parent_link INSERT
link_call = mock_db.execute.call_args_list[2]
link_query = str(link_call[0][0])
assert "control_parent_links" in link_query
link_params = link_call[0][1]
assert link_params["cu"] == "new-uuid-dsgvo"
assert link_params["pu"] == "p-uuid-dsgvo"
assert link_params["sr"] == "DSGVO"
assert link_params["sa"] == "Art. 5 Abs. 1 lit. c"
assert link_params["oci"] == "oc-link-1"
def test_parse_citation_handles_formats(self):
"""_parse_citation handles JSON string, dict, empty, and invalid."""
import json as _json
from compliance.services.decomposition_pass import _parse_citation
# JSON string
result = _parse_citation(_json.dumps({"source": "NIS2", "article": "Art. 21"}))
assert result["source"] == "NIS2"
assert result["article"] == "Art. 21"
# Already a dict
result = _parse_citation({"source": "DSGVO", "article": "Art. 5"})
assert result["source"] == "DSGVO"
# Empty / None
assert _parse_citation("") == {}
assert _parse_citation(None) == {}
# Invalid JSON
assert _parse_citation("not json") == {}

View File

@@ -107,6 +107,7 @@ services:
COMPLIANCE_LLM_TEMPERATURE: ${COMPLIANCE_LLM_TEMPERATURE:-0.3} COMPLIANCE_LLM_TEMPERATURE: ${COMPLIANCE_LLM_TEMPERATURE:-0.3}
COMPLIANCE_LLM_TIMEOUT: ${COMPLIANCE_LLM_TIMEOUT:-120} COMPLIANCE_LLM_TIMEOUT: ${COMPLIANCE_LLM_TIMEOUT:-120}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
DECOMPOSITION_LLM_MODEL: ${DECOMPOSITION_LLM_MODEL:-claude-haiku-4-5-20251001}
SMTP_HOST: ${SMTP_HOST:-bp-core-mailpit} SMTP_HOST: ${SMTP_HOST:-bp-core-mailpit}
SMTP_PORT: ${SMTP_PORT:-1025} SMTP_PORT: ${SMTP_PORT:-1025}
SMTP_USERNAME: ${SMTP_USERNAME:-} SMTP_USERNAME: ${SMTP_USERNAME:-}