feat: control_parent_links population + traceability API + frontend

- _write_atomic_control() now uses RETURNING id and inserts into
  control_parent_links (M:N) with source_regulation, source_article,
  and obligation_candidate_id parsed from parent's source_citation
- New _parse_citation() helper for JSONB source_citation extraction
- New GET /controls/{id}/traceability endpoint returning full chain:
  parent links with obligations, child controls, source_count
- Backend: control_type filter (atomic/rich) for controls + count
- Frontend: Rechtsgrundlagen section in ControlDetail showing all
  parent links per source regulation with obligation text + strength
- Frontend: Atomic/Rich filter dropdown in Control Library list
- Frontend: GenerationStrategyBadge recognizes 'pass0b' strategy
- Tests: 3 new tests for parent_link creation + citation parsing,
  existing batch test mock updated for RETURNING clause

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 08:14:29 +01:00
parent 0027f78fc5
commit ac6134ce6d
7 changed files with 511 additions and 43 deletions

View File

@@ -27,7 +27,7 @@ export async function GET(request: NextRequest) {
case 'controls': {
const controlParams = new URLSearchParams()
const passthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category',
'target_audience', 'source', 'search', 'sort', 'order', 'limit', 'offset']
'target_audience', 'source', 'search', 'control_type', 'sort', 'order', 'limit', 'offset']
for (const key of passthrough) {
const val = searchParams.get(key)
if (val) controlParams.set(key, val)
@@ -40,7 +40,7 @@ export async function GET(request: NextRequest) {
case 'controls-count': {
const countParams = new URLSearchParams()
const countPassthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category',
'target_audience', 'source', 'search']
'target_audience', 'source', 'search', 'control_type']
for (const key of countPassthrough) {
const val = searchParams.get(key)
if (val) countParams.set(key, val)
@@ -99,6 +99,15 @@ export async function GET(request: NextRequest) {
backendPath = '/api/compliance/v1/canonical/categories'
break
case 'traceability': {
const traceId = searchParams.get('id')
if (!traceId) {
return NextResponse.json({ error: 'Missing control id' }, { status: 400 })
}
backendPath = `/api/compliance/v1/canonical/controls/${encodeURIComponent(traceId)}/traceability`
break
}
case 'similar': {
const simControlId = searchParams.get('id')
if (!simControlId) {

View File

@@ -1,10 +1,10 @@
'use client'
import { useState, useEffect } from 'react'
import { useState, useEffect, useCallback } from 'react'
import {
ArrowLeft, ExternalLink, BookOpen, Scale, FileText,
Eye, CheckCircle2, Trash2, Pencil, Clock,
ChevronLeft, SkipForward, GitMerge, Search,
ChevronLeft, SkipForward, GitMerge, Search, Landmark,
} from 'lucide-react'
import {
CanonicalControl, EFFORT_LABELS, BACKEND_URL,
@@ -25,6 +25,37 @@ interface SimilarControl {
similarity: number
}
interface ParentLink {
parent_control_id: string
parent_title: string
link_type: string
confidence: number
source_regulation: string | null
source_article: string | null
parent_citation: Record<string, string> | null
obligation: {
text: string
action: string
object: string
normative_strength: string
} | null
}
interface TraceabilityData {
control_id: string
title: string
is_atomic: boolean
parent_links: ParentLink[]
children: Array<{
control_id: string
title: string
category: string
severity: string
decomposition_method: string
}>
source_count: number
}
interface ControlDetailProps {
ctrl: CanonicalControl
onBack: () => void
@@ -57,9 +88,23 @@ export function ControlDetail({
const [loadingSimilar, setLoadingSimilar] = useState(false)
const [selectedDuplicates, setSelectedDuplicates] = useState<Set<string>>(new Set())
const [merging, setMerging] = useState(false)
const [traceability, setTraceability] = useState<TraceabilityData | null>(null)
const [loadingTrace, setLoadingTrace] = useState(false)
const loadTraceability = useCallback(async () => {
setLoadingTrace(true)
try {
const res = await fetch(`${BACKEND_URL}?endpoint=traceability&id=${ctrl.control_id}`)
if (res.ok) {
setTraceability(await res.json())
}
} catch { /* ignore */ }
finally { setLoadingTrace(false) }
}, [ctrl.control_id])
useEffect(() => {
loadSimilarControls()
loadTraceability()
setSelectedDuplicates(new Set())
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [ctrl.control_id])
@@ -242,8 +287,79 @@ export function ControlDetail({
</section>
)}
{/* Parent Control (atomare Controls) */}
{ctrl.parent_control_uuid && (
{/* Rechtsgrundlagen / Traceability (atomic controls) */}
{traceability && traceability.parent_links.length > 0 && (
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<Landmark className="w-4 h-4 text-violet-600" />
<h3 className="text-sm font-semibold text-violet-900">
Rechtsgrundlagen ({traceability.source_count} {traceability.source_count === 1 ? 'Quelle' : 'Quellen'})
</h3>
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
{loadingTrace && <span className="text-xs text-violet-400">Laden...</span>}
</div>
<div className="space-y-3">
{traceability.parent_links.map((link, i) => (
<div key={i} className="bg-white/60 border border-violet-100 rounded-lg p-3">
<div className="flex items-start gap-2">
<Scale className="w-4 h-4 text-violet-500 mt-0.5 flex-shrink-0" />
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 flex-wrap">
{link.source_regulation && (
<span className="text-sm font-semibold text-violet-900">{link.source_regulation}</span>
)}
{link.source_article && (
<span className="text-sm text-violet-700">{link.source_article}</span>
)}
{!link.source_regulation && link.parent_citation?.source && (
<span className="text-sm font-semibold text-violet-900">
{link.parent_citation.source}
{link.parent_citation.article && `${link.parent_citation.article}`}
</span>
)}
<span className={`text-xs px-1.5 py-0.5 rounded ${
link.link_type === 'decomposition' ? 'bg-violet-100 text-violet-600' :
link.link_type === 'dedup_merge' ? 'bg-blue-100 text-blue-600' :
'bg-gray-100 text-gray-600'
}`}>
{link.link_type === 'decomposition' ? 'Ableitung' :
link.link_type === 'dedup_merge' ? 'Dedup' :
link.link_type}
</span>
</div>
<p className="text-xs text-violet-600 mt-1">
via{' '}
<span className="font-mono font-medium text-purple-700 bg-purple-50 px-1 py-0.5 rounded">
{link.parent_control_id}
</span>
{link.parent_title && (
<span className="text-violet-500 ml-1"> {link.parent_title}</span>
)}
</p>
{link.obligation && (
<p className="text-xs text-violet-500 mt-1.5 bg-violet-50 rounded p-2">
<span className={`inline-block mr-1.5 px-1.5 py-0.5 rounded text-xs font-medium ${
link.obligation.normative_strength === 'must' ? 'bg-red-100 text-red-700' :
link.obligation.normative_strength === 'should' ? 'bg-amber-100 text-amber-700' :
'bg-green-100 text-green-700'
}`}>
{link.obligation.normative_strength === 'must' ? 'MUSS' :
link.obligation.normative_strength === 'should' ? 'SOLL' : 'KANN'}
</span>
{link.obligation.text.slice(0, 200)}
{link.obligation.text.length > 200 ? '...' : ''}
</p>
)}
</div>
</div>
</div>
))}
</div>
</section>
)}
{/* Fallback: simple parent display when traceability not loaded yet */}
{ctrl.parent_control_uuid && (!traceability || traceability.parent_links.length === 0) && !loadingTrace && (
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-1">
<GitMerge className="w-4 h-4 text-violet-600" />
@@ -259,12 +375,27 @@ export function ControlDetail({
<span className="text-violet-700 ml-1"> {ctrl.parent_control_title}</span>
)}
</p>
{ctrl.generation_metadata?.obligation_text && (
<p className="text-xs text-violet-600 mt-2 bg-violet-100/50 rounded p-2">
Obligation: {String(ctrl.generation_metadata.obligation_text).slice(0, 300)}
{String(ctrl.generation_metadata.obligation_text).length > 300 ? '...' : ''}
</p>
)}
</section>
)}
{/* Child controls (rich controls that have atomic children) */}
{traceability && traceability.children.length > 0 && (
<section className="bg-emerald-50 border border-emerald-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<GitMerge className="w-4 h-4 text-emerald-600" />
<h3 className="text-sm font-semibold text-emerald-900">
Abgeleitete Controls ({traceability.children.length})
</h3>
</div>
<div className="space-y-1.5">
{traceability.children.map((child) => (
<div key={child.control_id} className="flex items-center gap-2 text-sm">
<span className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded">{child.control_id}</span>
<span className="text-gray-700 flex-1 truncate">{child.title}</span>
<SeverityBadge severity={child.severity} />
</div>
))}
</div>
</section>
)}

View File

@@ -282,7 +282,7 @@ export function GenerationStrategyBadge({ strategy }: { strategy: string | null
if (strategy === 'phase74_gap_fill') {
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-blue-100 text-blue-700">v5 Gap</span>
}
if (strategy === 'pass0b_atomic') {
if (strategy === 'pass0b_atomic' || strategy === 'pass0b') {
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-violet-100 text-violet-700">Atomar</span>
}
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">{strategy}</span>

View File

@@ -53,6 +53,7 @@ export default function ControlLibraryPage() {
const [categoryFilter, setCategoryFilter] = useState<string>('')
const [audienceFilter, setAudienceFilter] = useState<string>('')
const [sourceFilter, setSourceFilter] = useState<string>('')
const [typeFilter, setTypeFilter] = useState<string>('')
const [sortBy, setSortBy] = useState<'id' | 'newest' | 'oldest' | 'source'>('id')
// CRUD state
@@ -94,10 +95,11 @@ export default function ControlLibraryPage() {
if (categoryFilter) p.set('category', categoryFilter)
if (audienceFilter) p.set('target_audience', audienceFilter)
if (sourceFilter) p.set('source', sourceFilter)
if (typeFilter) p.set('control_type', typeFilter)
if (debouncedSearch) p.set('search', debouncedSearch)
if (extra) for (const [k, v] of Object.entries(extra)) p.set(k, v)
return p.toString()
}, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, debouncedSearch])
}, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, debouncedSearch])
// Load metadata (domains, sources — once + on refresh)
const loadMeta = useCallback(async () => {
@@ -165,7 +167,7 @@ export default function ControlLibraryPage() {
useEffect(() => { loadControls() }, [loadControls])
// Reset page when filters change
useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, debouncedSearch, sortBy])
useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, debouncedSearch, sortBy])
// Pagination
const totalPages = Math.max(1, Math.ceil(totalCount / PAGE_SIZE))
@@ -664,6 +666,15 @@ export default function ControlLibraryPage() {
<option key={s.source} value={s.source}>{s.source} ({s.count})</option>
))}
</select>
<select
value={typeFilter}
onChange={e => setTypeFilter(e.target.value)}
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
>
<option value="">Alle Typen</option>
<option value="rich">Rich Controls</option>
<option value="atomic">Atomare Controls</option>
</select>
<span className="text-gray-300 mx-1">|</span>
<ArrowUpDown className="w-4 h-4 text-gray-400" />
<select

View File

@@ -10,6 +10,7 @@ Endpoints:
GET /v1/canonical/frameworks/{framework_id}/controls — Controls of a framework
GET /v1/canonical/controls — All controls (filterable)
GET /v1/canonical/controls/{control_id} — Single control
GET /v1/canonical/controls/{control_id}/traceability — Traceability chain
GET /v1/canonical/controls/{control_id}/similar — Find similar controls
POST /v1/canonical/controls — Create a control
PUT /v1/canonical/controls/{control_id} — Update a control
@@ -314,6 +315,7 @@ async def list_controls(
target_audience: Optional[str] = Query(None),
source: Optional[str] = Query(None, description="Filter by source_citation->source"),
search: Optional[str] = Query(None, description="Full-text search in control_id, title, objective"),
control_type: Optional[str] = Query(None, description="Filter: atomic, rich, or all"),
sort: Optional[str] = Query("control_id", description="Sort field: control_id, created_at, severity"),
order: Optional[str] = Query("asc", description="Sort order: asc or desc"),
limit: Optional[int] = Query(None, ge=1, le=5000, description="Max results"),
@@ -351,6 +353,10 @@ async def list_controls(
else:
query += " AND source_citation->>'source' = :src"
params["src"] = source
if control_type == "atomic":
query += " AND decomposition_method = 'pass0b'"
elif control_type == "rich":
query += " AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')"
if search:
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
params["q"] = f"%{search}%"
@@ -391,6 +397,7 @@ async def count_controls(
target_audience: Optional[str] = Query(None),
source: Optional[str] = Query(None),
search: Optional[str] = Query(None),
control_type: Optional[str] = Query(None),
):
"""Count controls matching filters (for pagination)."""
query = "SELECT count(*) FROM canonical_controls WHERE 1=1"
@@ -420,6 +427,10 @@ async def count_controls(
else:
query += " AND source_citation->>'source' = :src"
params["src"] = source
if control_type == "atomic":
query += " AND decomposition_method = 'pass0b'"
elif control_type == "rich":
query += " AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')"
if search:
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
params["q"] = f"%{search}%"
@@ -481,6 +492,134 @@ async def get_control(control_id: str):
return _control_row(row)
@router.get("/controls/{control_id}/traceability")
async def get_control_traceability(control_id: str):
"""Get the full traceability chain for a control.
For atomic controls: shows all parent links with source regulations,
articles, and the obligation chain.
For rich controls: shows child atomic controls derived from them.
"""
with SessionLocal() as db:
# Get control UUID
ctrl = db.execute(
text("""
SELECT id, control_id, title, parent_control_uuid,
decomposition_method, source_citation
FROM canonical_controls WHERE control_id = :cid
"""),
{"cid": control_id.upper()},
).fetchone()
if not ctrl:
raise HTTPException(status_code=404, detail="Control not found")
result: dict[str, Any] = {
"control_id": ctrl.control_id,
"title": ctrl.title,
"is_atomic": ctrl.decomposition_method == "pass0b",
}
ctrl_uuid = str(ctrl.id)
# Parent links (M:N) — for atomic controls
parent_links = db.execute(
text("""
SELECT cpl.parent_control_uuid, cpl.link_type,
cpl.confidence, cpl.source_regulation,
cpl.source_article, cpl.obligation_candidate_id,
cc.control_id AS parent_control_id,
cc.title AS parent_title,
cc.source_citation AS parent_citation,
oc.obligation_text, oc.action, oc.object,
oc.normative_strength
FROM control_parent_links cpl
JOIN canonical_controls cc ON cc.id = cpl.parent_control_uuid
LEFT JOIN obligation_candidates oc ON oc.id = cpl.obligation_candidate_id
WHERE cpl.control_uuid = CAST(:uid AS uuid)
ORDER BY cpl.source_regulation, cpl.source_article
"""),
{"uid": ctrl_uuid},
).fetchall()
result["parent_links"] = [
{
"parent_control_id": pl.parent_control_id,
"parent_title": pl.parent_title,
"link_type": pl.link_type,
"confidence": float(pl.confidence) if pl.confidence else 1.0,
"source_regulation": pl.source_regulation,
"source_article": pl.source_article,
"parent_citation": pl.parent_citation,
"obligation": {
"text": pl.obligation_text,
"action": pl.action,
"object": pl.object,
"normative_strength": pl.normative_strength,
} if pl.obligation_text else None,
}
for pl in parent_links
]
# Also include the 1:1 parent (backwards compat) if not already in links
if ctrl.parent_control_uuid:
parent_uuids_in_links = {
str(pl.parent_control_uuid) for pl in parent_links
}
parent_uuid_str = str(ctrl.parent_control_uuid)
if parent_uuid_str not in parent_uuids_in_links:
legacy = db.execute(
text("""
SELECT control_id, title, source_citation
FROM canonical_controls WHERE id = CAST(:uid AS uuid)
"""),
{"uid": parent_uuid_str},
).fetchone()
if legacy:
result["parent_links"].insert(0, {
"parent_control_id": legacy.control_id,
"parent_title": legacy.title,
"link_type": "decomposition",
"confidence": 1.0,
"source_regulation": None,
"source_article": None,
"parent_citation": legacy.source_citation,
"obligation": None,
})
# Child controls — for rich controls
children = db.execute(
text("""
SELECT control_id, title, category, severity,
decomposition_method
FROM canonical_controls
WHERE parent_control_uuid = CAST(:uid AS uuid)
ORDER BY control_id
"""),
{"uid": ctrl_uuid},
).fetchall()
result["children"] = [
{
"control_id": ch.control_id,
"title": ch.title,
"category": ch.category,
"severity": ch.severity,
"decomposition_method": ch.decomposition_method,
}
for ch in children
]
# Unique source regulations count
regs = set()
for pl in result["parent_links"]:
if pl.get("source_regulation"):
regs.add(pl["source_regulation"])
result["source_count"] = len(regs)
return result
# =============================================================================
# CONTROL CRUD (CREATE / UPDATE / DELETE)
# =============================================================================

View File

@@ -955,6 +955,12 @@ class DecompositionPass:
logger.info("Pass 0a: %s", stats)
return stats
_NORMATIVE_STRENGTH_MAP = {
"muss": "must", "must": "must",
"soll": "should", "should": "should",
"kann": "may", "may": "may",
}
def _process_pass0a_obligations(
self,
raw_obligations: list[dict],
@@ -964,6 +970,10 @@ class DecompositionPass:
) -> None:
"""Validate and write obligation candidates from LLM output."""
for idx, raw in enumerate(raw_obligations):
raw_strength = raw.get("normative_strength", "must").lower().strip()
normative_strength = self._NORMATIVE_STRENGTH_MAP.get(
raw_strength, "must"
)
cand = ObligationCandidate(
candidate_id=f"OC-{control_id}-{idx + 1:02d}",
parent_control_uuid=control_uuid,
@@ -971,7 +981,7 @@ class DecompositionPass:
action=raw.get("action", ""),
object_=raw.get("object", ""),
condition=raw.get("condition"),
normative_strength=raw.get("normative_strength", "must"),
normative_strength=normative_strength,
is_test_obligation=bool(raw.get("is_test_obligation", False)),
is_reporting_obligation=bool(raw.get("is_reporting_obligation", False)),
)
@@ -1246,9 +1256,7 @@ class DecompositionPass:
seq = self._next_atomic_seq(obl["parent_control_id"])
atomic.candidate_id = f"{obl['parent_control_id']}-A{seq:02d}"
self._write_atomic_control(
atomic, obl["parent_uuid"], obl["candidate_id"]
)
new_uuid = self._write_atomic_control(atomic, obl)
self.db.execute(
text("""
@@ -1260,7 +1268,7 @@ class DecompositionPass:
)
# Index in Qdrant for future dedup checks
if self._dedup:
if self._dedup and new_uuid:
pattern_id_val = None
pid_row2 = self.db.execute(text(
"SELECT pattern_id FROM canonical_controls WHERE id = CAST(:uid AS uuid)"
@@ -1268,13 +1276,9 @@ class DecompositionPass:
if pid_row2:
pattern_id_val = pid_row2[0]
# Get the UUID of the newly inserted control
new_row = self.db.execute(text(
"SELECT id::text FROM canonical_controls WHERE control_id = :cid ORDER BY created_at DESC LIMIT 1"
), {"cid": atomic.candidate_id}).fetchone()
if new_row and pattern_id_val:
if pattern_id_val:
await self._dedup.index_control(
control_uuid=new_row[0],
control_uuid=new_uuid,
control_id=atomic.candidate_id,
title=atomic.title,
action=obl.get("action", ""),
@@ -1505,43 +1509,88 @@ class DecompositionPass:
)
def _write_atomic_control(
self, atomic: AtomicControlCandidate,
parent_uuid: str, candidate_id: str,
) -> None:
"""Insert an atomic control into canonical_controls."""
self.db.execute(
self, atomic: AtomicControlCandidate, obl: dict,
) -> Optional[str]:
"""Insert an atomic control and create parent link.
Returns the UUID of the newly created control, or None on failure.
"""
parent_uuid = obl["parent_uuid"]
candidate_id = obl["candidate_id"]
result = self.db.execute(
text("""
INSERT INTO canonical_controls (
control_id, title, objective, requirements,
test_procedure, evidence, severity, category,
control_id, title, objective, rationale,
scope, requirements,
test_procedure, evidence, severity,
open_anchors, category,
release_state, parent_control_uuid,
decomposition_method,
generation_metadata
generation_metadata,
framework_id,
generation_strategy, pipeline_version
) VALUES (
:control_id, :title, :objective,
:requirements, :test_procedure, :evidence,
:severity, :category, 'draft',
:control_id, :title, :objective, :rationale,
:scope, :requirements,
:test_procedure, :evidence,
:severity, :open_anchors, :category,
'draft',
CAST(:parent_uuid AS uuid), 'pass0b',
:gen_meta
:gen_meta,
CAST(:framework_id AS uuid),
'pass0b', 2
)
RETURNING id::text
"""),
{
"control_id": atomic.candidate_id,
"title": atomic.title,
"objective": atomic.objective,
"rationale": getattr(atomic, "rationale", None) or "Aus Obligation abgeleitet.",
"scope": json.dumps({}),
"requirements": json.dumps(atomic.requirements),
"test_procedure": json.dumps(atomic.test_procedure),
"evidence": json.dumps(atomic.evidence),
"severity": atomic.severity,
"open_anchors": json.dumps([]),
"category": atomic.category,
"parent_uuid": parent_uuid,
"gen_meta": json.dumps({
"decomposition_source": candidate_id,
"decomposition_method": "pass0b",
}),
"framework_id": "14b1bdd2-abc7-4a43-adae-14471ee5c7cf",
},
)
row = result.fetchone()
new_uuid = row[0] if row else None
# Create M:N parent link (control_parent_links)
if new_uuid:
citation = _parse_citation(obl.get("parent_citation", ""))
self.db.execute(
text("""
INSERT INTO control_parent_links
(control_uuid, parent_control_uuid, link_type, confidence,
source_regulation, source_article, obligation_candidate_id)
VALUES
(CAST(:cu AS uuid), CAST(:pu AS uuid), 'decomposition', 1.0,
:sr, :sa, CAST(:oci AS uuid))
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
"""),
{
"cu": new_uuid,
"pu": parent_uuid,
"sr": citation.get("source", ""),
"sa": citation.get("article", ""),
"oci": obl["oc_id"],
},
)
return new_uuid
def _next_atomic_seq(self, parent_control_id: str) -> int:
"""Get the next sequence number for atomic controls under a parent."""
result = self.db.execute(
@@ -2004,6 +2053,22 @@ def _format_citation(citation) -> str:
return str(citation)
def _parse_citation(citation) -> dict:
"""Parse source_citation JSONB into a dict with source/article/paragraph."""
if not citation:
return {}
if isinstance(citation, dict):
return citation
if isinstance(citation, str):
try:
c = json.loads(citation)
if isinstance(c, dict):
return c
except (json.JSONDecodeError, TypeError):
pass
return {}
def _compute_extraction_confidence(flags: dict) -> float:
"""Compute confidence score from quality flags."""
score = 0.0

View File

@@ -1118,10 +1118,15 @@ class TestDecompositionPassAnthropicBatch:
call_count[0] += 1
if call_count[0] == 1:
return mock_rows # SELECT candidates
# _next_atomic_seq calls (every 3rd after first: 2, 5, 8, ...)
if call_count[0] in (2, 5):
# _next_atomic_seq calls: call 2 (control 1), call 6 (control 2)
if call_count[0] in (2, 6):
return mock_seq
return MagicMock() # INSERT/UPDATE
# INSERT RETURNING calls: call 3 (control 1), call 7 (control 2)
if call_count[0] in (3, 7):
mock_insert = MagicMock()
mock_insert.fetchone.return_value = (f"new-uuid-{call_count[0]}",)
return mock_insert
return MagicMock() # parent_links INSERT / UPDATE
mock_db.execute.side_effect = side_effect
batched_response = json.dumps({
@@ -1608,12 +1613,16 @@ class TestPass0bWithEnrichment:
mock_db = MagicMock()
mock_seq = MagicMock()
mock_seq.fetchone.return_value = (0,)
mock_insert = MagicMock()
mock_insert.fetchone.return_value = ("new-uuid-1",)
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return mock_seq # _next_atomic_seq
if call_count[0] == 2:
return mock_insert # INSERT RETURNING id
return MagicMock()
mock_db.execute.side_effect = side_effect
@@ -1623,12 +1632,20 @@ class TestPass0bWithEnrichment:
decomp._process_pass0b_control(obl, parsed, stats)
)
# _write_atomic_control is call #2: db.execute(text(...), {params})
# _write_atomic_control INSERT is call #2: db.execute(text(...), {params})
insert_call = mock_db.execute.call_args_list[1]
# positional args: (text_obj, params_dict)
insert_params = insert_call[0][1]
assert insert_params["severity"] == "medium"
# parent_link INSERT is call #3
link_call = mock_db.execute.call_args_list[2]
link_query = str(link_call[0][0])
assert "control_parent_links" in link_query
link_params = link_call[0][1]
assert link_params["cu"] == "new-uuid-1"
assert link_params["pu"] == "p-uuid"
def test_test_obligation_gets_testing_category(self):
"""Test obligations should get category='testing'."""
obl = {
@@ -1664,12 +1681,16 @@ class TestPass0bWithEnrichment:
mock_db = MagicMock()
mock_seq = MagicMock()
mock_seq.fetchone.return_value = (0,)
mock_insert = MagicMock()
mock_insert.fetchone.return_value = ("new-uuid-2",)
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return mock_seq
if call_count[0] == 2:
return mock_insert # INSERT RETURNING id
return MagicMock()
mock_db.execute.side_effect = side_effect
@@ -1679,7 +1700,99 @@ class TestPass0bWithEnrichment:
decomp._process_pass0b_control(obl, parsed, stats)
)
# _write_atomic_control is call #2: db.execute(text(...), {params})
# _write_atomic_control INSERT is call #2: db.execute(text(...), {params})
insert_call = mock_db.execute.call_args_list[1]
insert_params = insert_call[0][1]
assert insert_params["category"] == "testing"
def test_parent_link_created_with_source_citation(self):
"""_write_atomic_control inserts a row into control_parent_links
with source_regulation and source_article parsed from parent_citation."""
import json as _json
obl = {
"oc_id": "oc-link-1",
"candidate_id": "OC-DSGVO-01",
"parent_uuid": "p-uuid-dsgvo",
"obligation_text": "Daten minimieren",
"action": "minimieren",
"object": "personenbezogene Daten",
"is_test": False,
"is_reporting": False,
"parent_title": "Datenminimierung",
"parent_category": "privacy",
"parent_citation": _json.dumps({
"source": "DSGVO",
"article": "Art. 5 Abs. 1 lit. c",
"paragraph": "",
}),
"parent_severity": "high",
"parent_control_id": "PRIV-001",
"source_ref": "DSGVO Art. 5 Abs. 1 lit. c",
"trigger_type": "continuous",
"is_implementation_specific": False,
}
parsed = {
"title": "Personenbezogene Daten minimieren",
"objective": "Nur erforderliche Daten erheben",
"requirements": ["Datenminimierung"],
"test_procedure": ["Audit"],
"evidence": ["Protokoll"],
"severity": "high",
"category": "privacy",
}
stats = {"controls_created": 0, "candidates_processed": 0,
"llm_failures": 0, "dedup_linked": 0, "dedup_review": 0}
mock_db = MagicMock()
mock_seq = MagicMock()
mock_seq.fetchone.return_value = (0,)
mock_insert = MagicMock()
mock_insert.fetchone.return_value = ("new-uuid-dsgvo",)
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return mock_seq
if call_count[0] == 2:
return mock_insert
return MagicMock()
mock_db.execute.side_effect = side_effect
import asyncio
decomp = DecompositionPass(db=mock_db)
asyncio.get_event_loop().run_until_complete(
decomp._process_pass0b_control(obl, parsed, stats)
)
# Call #3 is the parent_link INSERT
link_call = mock_db.execute.call_args_list[2]
link_query = str(link_call[0][0])
assert "control_parent_links" in link_query
link_params = link_call[0][1]
assert link_params["cu"] == "new-uuid-dsgvo"
assert link_params["pu"] == "p-uuid-dsgvo"
assert link_params["sr"] == "DSGVO"
assert link_params["sa"] == "Art. 5 Abs. 1 lit. c"
assert link_params["oci"] == "oc-link-1"
def test_parse_citation_handles_formats(self):
"""_parse_citation handles JSON string, dict, empty, and invalid."""
import json as _json
from compliance.services.decomposition_pass import _parse_citation
# JSON string
result = _parse_citation(_json.dumps({"source": "NIS2", "article": "Art. 21"}))
assert result["source"] == "NIS2"
assert result["article"] == "Art. 21"
# Already a dict
result = _parse_citation({"source": "DSGVO", "article": "Art. 5"})
assert result["source"] == "DSGVO"
# Empty / None
assert _parse_citation("") == {}
assert _parse_citation(None) == {}
# Invalid JSON
assert _parse_citation("not json") == {}