Compare commits
12 Commits
cb034b8009
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
712fa8cb74 | ||
|
|
447ec08509 | ||
|
|
8cb1dc1108 | ||
|
|
f8d9919b97 | ||
|
|
fb2cf29b34 | ||
|
|
f39e5a71af | ||
|
|
ac42a0aaa0 | ||
|
|
52e463a7c8 | ||
|
|
2dee62fa6f | ||
|
|
3fb07e201f | ||
|
|
81c9ce5de3 | ||
|
|
db7c207464 |
@@ -50,9 +50,18 @@ export async function GET(request: NextRequest) {
|
||||
break
|
||||
}
|
||||
|
||||
case 'controls-meta':
|
||||
backendPath = '/api/compliance/v1/canonical/controls-meta'
|
||||
case 'controls-meta': {
|
||||
const metaParams = new URLSearchParams()
|
||||
const metaPassthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category', 'evidence_type',
|
||||
'target_audience', 'source', 'search', 'control_type', 'exclude_duplicates']
|
||||
for (const key of metaPassthrough) {
|
||||
const val = searchParams.get(key)
|
||||
if (val) metaParams.set(key, val)
|
||||
}
|
||||
const metaQs = metaParams.toString()
|
||||
backendPath = `/api/compliance/v1/canonical/controls-meta${metaQs ? `?${metaQs}` : ''}`
|
||||
break
|
||||
}
|
||||
|
||||
case 'control': {
|
||||
const controlId = searchParams.get('id')
|
||||
@@ -135,6 +144,23 @@ export async function GET(request: NextRequest) {
|
||||
backendPath = '/api/compliance/v1/canonical/blocked-sources'
|
||||
break
|
||||
|
||||
case 'v1-matches': {
|
||||
const matchId = searchParams.get('id')
|
||||
if (!matchId) {
|
||||
return NextResponse.json({ error: 'Missing control id' }, { status: 400 })
|
||||
}
|
||||
backendPath = `/api/compliance/v1/canonical/controls/${encodeURIComponent(matchId)}/v1-matches`
|
||||
break
|
||||
}
|
||||
|
||||
case 'v1-enrichment-stats':
|
||||
backendPath = '/api/compliance/v1/canonical/controls/v1-enrichment-stats'
|
||||
break
|
||||
|
||||
case 'obligation-dedup-stats':
|
||||
backendPath = '/api/compliance/v1/canonical/obligations/dedup-stats'
|
||||
break
|
||||
|
||||
case 'controls-customer': {
|
||||
const custSeverity = searchParams.get('severity')
|
||||
const custDomain = searchParams.get('domain')
|
||||
@@ -201,6 +227,16 @@ export async function POST(request: NextRequest) {
|
||||
backendPath = '/api/compliance/v1/canonical/generate/bulk-review'
|
||||
} else if (endpoint === 'blocked-sources-cleanup') {
|
||||
backendPath = '/api/compliance/v1/canonical/blocked-sources/cleanup'
|
||||
} else if (endpoint === 'enrich-v1-matches') {
|
||||
const dryRun = searchParams.get('dry_run') ?? 'true'
|
||||
const batchSize = searchParams.get('batch_size') ?? '100'
|
||||
const enrichOffset = searchParams.get('offset') ?? '0'
|
||||
backendPath = `/api/compliance/v1/canonical/controls/enrich-v1-matches?dry_run=${dryRun}&batch_size=${batchSize}&offset=${enrichOffset}`
|
||||
} else if (endpoint === 'obligation-dedup') {
|
||||
const dryRun = searchParams.get('dry_run') ?? 'true'
|
||||
const batchSize = searchParams.get('batch_size') ?? '0'
|
||||
const dedupOffset = searchParams.get('offset') ?? '0'
|
||||
backendPath = `/api/compliance/v1/canonical/obligations/dedup?dry_run=${dryRun}&batch_size=${batchSize}&offset=${dedupOffset}`
|
||||
} else if (endpoint === 'similarity-check') {
|
||||
const controlId = searchParams.get('id')
|
||||
if (!controlId) {
|
||||
|
||||
@@ -308,7 +308,7 @@ export default function AtomicControlsPage() {
|
||||
<StateBadge state={ctrl.release_state} />
|
||||
<CategoryBadge category={ctrl.category} />
|
||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||
</div>
|
||||
<h3 className="text-sm font-medium text-gray-900 group-hover:text-violet-700">{ctrl.title}</h3>
|
||||
|
||||
@@ -9,7 +9,7 @@ import {
|
||||
import {
|
||||
CanonicalControl, EFFORT_LABELS, BACKEND_URL,
|
||||
SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, EvidenceTypeBadge, TargetAudienceBadge,
|
||||
ObligationTypeBadge, GenerationStrategyBadge,
|
||||
ObligationTypeBadge, GenerationStrategyBadge, isEigenentwicklung,
|
||||
ExtractionMethodBadge, RegulationCountBadge,
|
||||
VERIFICATION_METHODS, CATEGORY_OPTIONS, EVIDENCE_TYPE_OPTIONS,
|
||||
ObligationInfo, DocumentReference, MergedDuplicate, RegulationSummary,
|
||||
@@ -65,6 +65,20 @@ interface TraceabilityData {
|
||||
regulations_summary?: RegulationSummary[]
|
||||
}
|
||||
|
||||
interface V1Match {
|
||||
matched_control_id: string
|
||||
matched_title: string
|
||||
matched_objective: string
|
||||
matched_severity: string
|
||||
matched_category: string
|
||||
matched_source: string | null
|
||||
matched_article: string | null
|
||||
matched_source_citation: Record<string, string> | null
|
||||
similarity_score: number
|
||||
match_rank: number
|
||||
match_method: string
|
||||
}
|
||||
|
||||
interface ControlDetailProps {
|
||||
ctrl: CanonicalControl
|
||||
onBack: () => void
|
||||
@@ -73,6 +87,7 @@ interface ControlDetailProps {
|
||||
onReview: (controlId: string, action: string) => void
|
||||
onRefresh?: () => void
|
||||
onNavigateToControl?: (controlId: string) => void
|
||||
onCompare?: (ctrl: CanonicalControl, matches: V1Match[]) => void
|
||||
// Review mode navigation
|
||||
reviewMode?: boolean
|
||||
reviewIndex?: number
|
||||
@@ -89,6 +104,7 @@ export function ControlDetail({
|
||||
onReview,
|
||||
onRefresh,
|
||||
onNavigateToControl,
|
||||
onCompare,
|
||||
reviewMode,
|
||||
reviewIndex = 0,
|
||||
reviewTotal = 0,
|
||||
@@ -101,6 +117,9 @@ export function ControlDetail({
|
||||
const [merging, setMerging] = useState(false)
|
||||
const [traceability, setTraceability] = useState<TraceabilityData | null>(null)
|
||||
const [loadingTrace, setLoadingTrace] = useState(false)
|
||||
const [v1Matches, setV1Matches] = useState<V1Match[]>([])
|
||||
const [loadingV1, setLoadingV1] = useState(false)
|
||||
const eigenentwicklung = isEigenentwicklung(ctrl)
|
||||
|
||||
const loadTraceability = useCallback(async () => {
|
||||
setLoadingTrace(true)
|
||||
@@ -117,9 +136,21 @@ export function ControlDetail({
|
||||
finally { setLoadingTrace(false) }
|
||||
}, [ctrl.control_id])
|
||||
|
||||
const loadV1Matches = useCallback(async () => {
|
||||
if (!eigenentwicklung) { setV1Matches([]); return }
|
||||
setLoadingV1(true)
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}?endpoint=v1-matches&id=${ctrl.control_id}`)
|
||||
if (res.ok) setV1Matches(await res.json())
|
||||
else setV1Matches([])
|
||||
} catch { setV1Matches([]) }
|
||||
finally { setLoadingV1(false) }
|
||||
}, [ctrl.control_id, eigenentwicklung])
|
||||
|
||||
useEffect(() => {
|
||||
loadSimilarControls()
|
||||
loadTraceability()
|
||||
loadV1Matches()
|
||||
setSelectedDuplicates(new Set())
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [ctrl.control_id])
|
||||
@@ -187,7 +218,7 @@ export function ControlDetail({
|
||||
<CategoryBadge category={ctrl.category} />
|
||||
<EvidenceTypeBadge type={ctrl.evidence_type} />
|
||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||
</div>
|
||||
<h2 className="text-lg font-semibold text-gray-900 mt-1">{ctrl.title}</h2>
|
||||
@@ -303,6 +334,75 @@ export function ControlDetail({
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Regulatorische Abdeckung (Eigenentwicklung) */}
|
||||
{eigenentwicklung && (
|
||||
<section className="bg-orange-50 border border-orange-200 rounded-lg p-4">
|
||||
<div className="flex items-center gap-2 mb-3">
|
||||
<Scale className="w-4 h-4 text-orange-600" />
|
||||
<h3 className="text-sm font-semibold text-orange-900">
|
||||
Regulatorische Abdeckung
|
||||
</h3>
|
||||
{loadingV1 && <span className="text-xs text-orange-400">Laden...</span>}
|
||||
</div>
|
||||
{v1Matches.length > 0 ? (
|
||||
<div className="space-y-2">
|
||||
{v1Matches.map((match, i) => (
|
||||
<div key={i} className="bg-white/60 border border-orange-100 rounded-lg p-3">
|
||||
<div className="flex items-start justify-between gap-2">
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2 flex-wrap mb-1">
|
||||
{match.matched_source && (
|
||||
<span className="text-xs font-semibold text-blue-800 bg-blue-100 px-1.5 py-0.5 rounded">
|
||||
{match.matched_source}
|
||||
</span>
|
||||
)}
|
||||
{match.matched_article && (
|
||||
<span className="text-xs text-blue-700 bg-blue-50 px-1.5 py-0.5 rounded">
|
||||
{match.matched_article}
|
||||
</span>
|
||||
)}
|
||||
<span className={`text-xs font-medium px-1.5 py-0.5 rounded ${
|
||||
match.similarity_score >= 0.85 ? 'bg-green-100 text-green-700' :
|
||||
match.similarity_score >= 0.80 ? 'bg-yellow-100 text-yellow-700' :
|
||||
'bg-gray-100 text-gray-600'
|
||||
}`}>
|
||||
{(match.similarity_score * 100).toFixed(0)}%
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-sm text-gray-800">
|
||||
{onNavigateToControl ? (
|
||||
<button
|
||||
onClick={() => onNavigateToControl(match.matched_control_id)}
|
||||
className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded hover:bg-purple-100 hover:underline mr-1.5"
|
||||
>
|
||||
{match.matched_control_id}
|
||||
</button>
|
||||
) : (
|
||||
<span className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded mr-1.5">
|
||||
{match.matched_control_id}
|
||||
</span>
|
||||
)}
|
||||
{match.matched_title}
|
||||
</p>
|
||||
</div>
|
||||
{onCompare && (
|
||||
<button
|
||||
onClick={() => onCompare(ctrl, v1Matches)}
|
||||
className="text-xs text-orange-600 border border-orange-300 rounded px-2 py-1 hover:bg-orange-100 whitespace-nowrap flex-shrink-0"
|
||||
>
|
||||
Vergleichen
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : !loadingV1 ? (
|
||||
<p className="text-sm text-orange-600">Keine regulatorische Abdeckung gefunden. Dieses Control ist eine reine Eigenentwicklung.</p>
|
||||
) : null}
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Rechtsgrundlagen / Traceability (atomic controls) */}
|
||||
{traceability && traceability.parent_links.length > 0 && (
|
||||
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
|
||||
|
||||
@@ -15,7 +15,7 @@ import {
|
||||
// Compact Control Panel (used on both sides of the comparison)
|
||||
// =============================================================================
|
||||
|
||||
function ControlPanel({ ctrl, label, highlight }: { ctrl: CanonicalControl; label: string; highlight?: boolean }) {
|
||||
export function ControlPanel({ ctrl, label, highlight }: { ctrl: CanonicalControl; label: string; highlight?: boolean }) {
|
||||
return (
|
||||
<div className={`flex flex-col h-full overflow-y-auto ${highlight ? 'bg-yellow-50' : 'bg-white'}`}>
|
||||
{/* Panel Header */}
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
import {
|
||||
ArrowLeft, ChevronLeft, SkipForward, Scale,
|
||||
} from 'lucide-react'
|
||||
import { CanonicalControl, BACKEND_URL } from './helpers'
|
||||
import { ControlPanel } from './ReviewCompare'
|
||||
|
||||
interface V1Match {
|
||||
matched_control_id: string
|
||||
matched_title: string
|
||||
matched_objective: string
|
||||
matched_severity: string
|
||||
matched_category: string
|
||||
matched_source: string | null
|
||||
matched_article: string | null
|
||||
matched_source_citation: Record<string, string> | null
|
||||
similarity_score: number
|
||||
match_rank: number
|
||||
match_method: string
|
||||
}
|
||||
|
||||
interface V1CompareViewProps {
|
||||
v1Control: CanonicalControl
|
||||
matches: V1Match[]
|
||||
onBack: () => void
|
||||
onNavigateToControl?: (controlId: string) => void
|
||||
}
|
||||
|
||||
export function V1CompareView({ v1Control, matches, onBack, onNavigateToControl }: V1CompareViewProps) {
|
||||
const [currentMatchIndex, setCurrentMatchIndex] = useState(0)
|
||||
const [matchedControl, setMatchedControl] = useState<CanonicalControl | null>(null)
|
||||
const [loading, setLoading] = useState(false)
|
||||
|
||||
const currentMatch = matches[currentMatchIndex]
|
||||
|
||||
// Load the full matched control when index changes
|
||||
useEffect(() => {
|
||||
if (!currentMatch) return
|
||||
const load = async () => {
|
||||
setLoading(true)
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${encodeURIComponent(currentMatch.matched_control_id)}`)
|
||||
if (res.ok) {
|
||||
setMatchedControl(await res.json())
|
||||
} else {
|
||||
setMatchedControl(null)
|
||||
}
|
||||
} catch {
|
||||
setMatchedControl(null)
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
load()
|
||||
}, [currentMatch])
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full">
|
||||
{/* Header */}
|
||||
<div className="border-b border-gray-200 bg-white px-6 py-3 flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<button onClick={onBack} className="text-gray-400 hover:text-gray-600">
|
||||
<ArrowLeft className="w-5 h-5" />
|
||||
</button>
|
||||
<div>
|
||||
<div className="flex items-center gap-2">
|
||||
<Scale className="w-4 h-4 text-orange-500" />
|
||||
<span className="text-sm font-semibold text-gray-900">V1-Vergleich</span>
|
||||
{currentMatch && (
|
||||
<span className={`text-xs font-medium px-2 py-0.5 rounded-full ${
|
||||
currentMatch.similarity_score >= 0.85 ? 'bg-green-100 text-green-700' :
|
||||
currentMatch.similarity_score >= 0.80 ? 'bg-yellow-100 text-yellow-700' :
|
||||
'bg-gray-100 text-gray-600'
|
||||
}`}>
|
||||
{(currentMatch.similarity_score * 100).toFixed(1)}% Aehnlichkeit
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Navigation */}
|
||||
<div className="flex items-center gap-1">
|
||||
<button
|
||||
onClick={() => setCurrentMatchIndex(Math.max(0, currentMatchIndex - 1))}
|
||||
disabled={currentMatchIndex === 0}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 disabled:opacity-30"
|
||||
>
|
||||
<ChevronLeft className="w-4 h-4" />
|
||||
</button>
|
||||
<span className="text-xs text-gray-500 font-medium">
|
||||
{currentMatchIndex + 1} / {matches.length}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => setCurrentMatchIndex(Math.min(matches.length - 1, currentMatchIndex + 1))}
|
||||
disabled={currentMatchIndex >= matches.length - 1}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 disabled:opacity-30"
|
||||
>
|
||||
<SkipForward className="w-4 h-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Navigate to matched control */}
|
||||
{onNavigateToControl && matchedControl && (
|
||||
<button
|
||||
onClick={() => { onBack(); onNavigateToControl(matchedControl.control_id) }}
|
||||
className="px-3 py-1.5 text-sm text-purple-600 border border-purple-300 rounded-lg hover:bg-purple-50"
|
||||
>
|
||||
Zum Control
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Source info bar */}
|
||||
{currentMatch && (currentMatch.matched_source || currentMatch.matched_article) && (
|
||||
<div className="px-6 py-2 bg-blue-50 border-b border-blue-200 flex items-center gap-2 text-sm">
|
||||
<Scale className="w-3.5 h-3.5 text-blue-600" />
|
||||
{currentMatch.matched_source && (
|
||||
<span className="font-semibold text-blue-900">{currentMatch.matched_source}</span>
|
||||
)}
|
||||
{currentMatch.matched_article && (
|
||||
<span className="text-blue-700">{currentMatch.matched_article}</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Side-by-Side Panels */}
|
||||
<div className="flex-1 flex overflow-hidden">
|
||||
{/* Left: V1 Eigenentwicklung */}
|
||||
<div className="w-1/2 border-r border-gray-200 overflow-y-auto">
|
||||
<ControlPanel ctrl={v1Control} label="Eigenentwicklung" highlight />
|
||||
</div>
|
||||
|
||||
{/* Right: Regulatory match */}
|
||||
<div className="w-1/2 overflow-y-auto">
|
||||
{loading ? (
|
||||
<div className="flex items-center justify-center h-full">
|
||||
<div className="animate-spin rounded-full h-6 w-6 border-2 border-purple-600 border-t-transparent" />
|
||||
</div>
|
||||
) : matchedControl ? (
|
||||
<ControlPanel ctrl={matchedControl} label="Regulatorisch gedeckt" />
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400 text-sm">
|
||||
Control konnte nicht geladen werden
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -52,6 +52,7 @@ export interface CanonicalControl {
|
||||
parent_control_id?: string | null
|
||||
parent_control_title?: string | null
|
||||
decomposition_method?: string | null
|
||||
pipeline_version?: number | string | null
|
||||
created_at: string
|
||||
updated_at: string
|
||||
}
|
||||
@@ -293,7 +294,29 @@ export function TargetAudienceBadge({ audience }: { audience: string | string[]
|
||||
)
|
||||
}
|
||||
|
||||
export function GenerationStrategyBadge({ strategy }: { strategy: string | null | undefined }) {
|
||||
export interface CanonicalControlPipelineInfo {
|
||||
pipeline_version?: number | string | null
|
||||
source_citation?: Record<string, string> | null
|
||||
parent_control_uuid?: string | null
|
||||
}
|
||||
|
||||
export function isEigenentwicklung(ctrl: CanonicalControlPipelineInfo & { generation_strategy?: string | null }): boolean {
|
||||
return (
|
||||
(!ctrl.generation_strategy || ctrl.generation_strategy === 'ungrouped') &&
|
||||
(!ctrl.pipeline_version || String(ctrl.pipeline_version) === '1') &&
|
||||
!ctrl.source_citation &&
|
||||
!ctrl.parent_control_uuid
|
||||
)
|
||||
}
|
||||
|
||||
export function GenerationStrategyBadge({ strategy, pipelineInfo }: {
|
||||
strategy: string | null | undefined
|
||||
pipelineInfo?: CanonicalControlPipelineInfo & { generation_strategy?: string | null }
|
||||
}) {
|
||||
// Eigenentwicklung detection: v1 + no source + no parent
|
||||
if (pipelineInfo && isEigenentwicklung(pipelineInfo)) {
|
||||
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-orange-100 text-orange-700">Eigenentwicklung</span>
|
||||
}
|
||||
if (!strategy || strategy === 'ungrouped') {
|
||||
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">v1</span>
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import {
|
||||
import { ControlForm } from './components/ControlForm'
|
||||
import { ControlDetail } from './components/ControlDetail'
|
||||
import { ReviewCompare } from './components/ReviewCompare'
|
||||
import { V1CompareView } from './components/V1CompareView'
|
||||
import { GeneratorModal } from './components/GeneratorModal'
|
||||
|
||||
// =============================================================================
|
||||
@@ -26,6 +27,16 @@ interface ControlsMeta {
|
||||
domains: Array<{ domain: string; count: number }>
|
||||
sources: Array<{ source: string; count: number }>
|
||||
no_source_count: number
|
||||
type_counts?: {
|
||||
rich: number
|
||||
atomic: number
|
||||
eigenentwicklung: number
|
||||
}
|
||||
severity_counts?: Record<string, number>
|
||||
verification_method_counts?: Record<string, number>
|
||||
category_counts?: Record<string, number>
|
||||
evidence_type_counts?: Record<string, number>
|
||||
release_state_counts?: Record<string, number>
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
@@ -79,6 +90,21 @@ export default function ControlLibraryPage() {
|
||||
const [reviewDuplicates, setReviewDuplicates] = useState<CanonicalControl[]>([])
|
||||
const [reviewRule3, setReviewRule3] = useState<CanonicalControl[]>([])
|
||||
|
||||
// V1 Compare mode
|
||||
const [compareMode, setCompareMode] = useState(false)
|
||||
const [compareV1Control, setCompareV1Control] = useState<CanonicalControl | null>(null)
|
||||
const [compareMatches, setCompareMatches] = useState<Array<{
|
||||
matched_control_id: string; matched_title: string; matched_objective: string
|
||||
matched_severity: string; matched_category: string
|
||||
matched_source: string | null; matched_article: string | null
|
||||
matched_source_citation: Record<string, string> | null
|
||||
similarity_score: number; match_rank: number; match_method: string
|
||||
}>>([])
|
||||
|
||||
// Abort controllers for cancelling stale requests
|
||||
const metaAbortRef = useRef<AbortController | null>(null)
|
||||
const controlsAbortRef = useRef<AbortController | null>(null)
|
||||
|
||||
// Debounce search
|
||||
const searchTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
|
||||
useEffect(() => {
|
||||
@@ -105,20 +131,33 @@ export default function ControlLibraryPage() {
|
||||
return p.toString()
|
||||
}, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, evidenceTypeFilter, audienceFilter, sourceFilter, typeFilter, hideDuplicates, debouncedSearch])
|
||||
|
||||
// Load metadata (domains, sources — once + on refresh)
|
||||
const loadMeta = useCallback(async () => {
|
||||
// Load frameworks (once)
|
||||
const loadFrameworks = useCallback(async () => {
|
||||
try {
|
||||
const [fwRes, metaRes] = await Promise.all([
|
||||
fetch(`${BACKEND_URL}?endpoint=frameworks`),
|
||||
fetch(`${BACKEND_URL}?endpoint=controls-meta`),
|
||||
])
|
||||
if (fwRes.ok) setFrameworks(await fwRes.json())
|
||||
if (metaRes.ok) setMeta(await metaRes.json())
|
||||
const res = await fetch(`${BACKEND_URL}?endpoint=frameworks`)
|
||||
if (res.ok) setFrameworks(await res.json())
|
||||
} catch { /* ignore */ }
|
||||
}, [])
|
||||
|
||||
// Load controls page
|
||||
// Load faceted metadata (reloads when filters change, cancels stale requests)
|
||||
const loadMeta = useCallback(async () => {
|
||||
if (metaAbortRef.current) metaAbortRef.current.abort()
|
||||
const controller = new AbortController()
|
||||
metaAbortRef.current = controller
|
||||
try {
|
||||
const qs = buildParams()
|
||||
const res = await fetch(`${BACKEND_URL}?endpoint=controls-meta${qs ? `&${qs}` : ''}`, { signal: controller.signal })
|
||||
if (res.ok && !controller.signal.aborted) setMeta(await res.json())
|
||||
} catch (e) {
|
||||
if (e instanceof DOMException && e.name === 'AbortError') return
|
||||
}
|
||||
}, [buildParams])
|
||||
|
||||
// Load controls page (cancels stale requests)
|
||||
const loadControls = useCallback(async () => {
|
||||
if (controlsAbortRef.current) controlsAbortRef.current.abort()
|
||||
const controller = new AbortController()
|
||||
controlsAbortRef.current = controller
|
||||
try {
|
||||
setLoading(true)
|
||||
|
||||
@@ -137,19 +176,22 @@ export default function ControlLibraryPage() {
|
||||
const countQs = buildParams()
|
||||
|
||||
const [ctrlRes, countRes] = await Promise.all([
|
||||
fetch(`${BACKEND_URL}?endpoint=controls&${qs}`),
|
||||
fetch(`${BACKEND_URL}?endpoint=controls-count&${countQs}`),
|
||||
fetch(`${BACKEND_URL}?endpoint=controls&${qs}`, { signal: controller.signal }),
|
||||
fetch(`${BACKEND_URL}?endpoint=controls-count&${countQs}`, { signal: controller.signal }),
|
||||
])
|
||||
|
||||
if (ctrlRes.ok) setControls(await ctrlRes.json())
|
||||
if (countRes.ok) {
|
||||
const data = await countRes.json()
|
||||
setTotalCount(data.total || 0)
|
||||
if (!controller.signal.aborted) {
|
||||
if (ctrlRes.ok) setControls(await ctrlRes.json())
|
||||
if (countRes.ok) {
|
||||
const data = await countRes.json()
|
||||
setTotalCount(data.total || 0)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof DOMException && err.name === 'AbortError') return
|
||||
setError(err instanceof Error ? err.message : 'Fehler beim Laden')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
if (!controller.signal.aborted) setLoading(false)
|
||||
}
|
||||
}, [buildParams, sortBy, currentPage])
|
||||
|
||||
@@ -164,8 +206,11 @@ export default function ControlLibraryPage() {
|
||||
} catch { /* ignore */ }
|
||||
}, [])
|
||||
|
||||
// Initial load
|
||||
useEffect(() => { loadMeta(); loadReviewCount() }, [loadMeta, loadReviewCount])
|
||||
// Initial load (frameworks only once)
|
||||
useEffect(() => { loadFrameworks(); loadReviewCount() }, [loadFrameworks, loadReviewCount])
|
||||
|
||||
// Load faceted meta when filters change
|
||||
useEffect(() => { loadMeta() }, [loadMeta])
|
||||
|
||||
// Load controls when filters/page/sort change
|
||||
useEffect(() => { loadControls() }, [loadControls])
|
||||
@@ -178,8 +223,8 @@ export default function ControlLibraryPage() {
|
||||
|
||||
// Full reload (after CRUD)
|
||||
const fullReload = useCallback(async () => {
|
||||
await Promise.all([loadControls(), loadMeta(), loadReviewCount()])
|
||||
}, [loadControls, loadMeta, loadReviewCount])
|
||||
await Promise.all([loadControls(), loadMeta(), loadFrameworks(), loadReviewCount()])
|
||||
}, [loadControls, loadMeta, loadFrameworks, loadReviewCount])
|
||||
|
||||
// CRUD handlers
|
||||
const handleCreate = async (data: typeof EMPTY_CONTROL) => {
|
||||
@@ -398,6 +443,27 @@ export default function ControlLibraryPage() {
|
||||
)
|
||||
}
|
||||
|
||||
// V1 COMPARE MODE
|
||||
if (compareMode && compareV1Control) {
|
||||
return (
|
||||
<V1CompareView
|
||||
v1Control={compareV1Control}
|
||||
matches={compareMatches}
|
||||
onBack={() => { setCompareMode(false) }}
|
||||
onNavigateToControl={async (controlId: string) => {
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
|
||||
if (res.ok) {
|
||||
setCompareMode(false)
|
||||
setSelectedControl(await res.json())
|
||||
setMode('detail')
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
}}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
// DETAIL MODE
|
||||
if (mode === 'detail' && selectedControl) {
|
||||
const isDuplicateReview = reviewMode && reviewTab === 'duplicates'
|
||||
@@ -467,6 +533,11 @@ export default function ControlLibraryPage() {
|
||||
onDelete={handleDelete}
|
||||
onReview={handleReview}
|
||||
onRefresh={fullReload}
|
||||
onCompare={(ctrl, matches) => {
|
||||
setCompareV1Control(ctrl)
|
||||
setCompareMatches(matches)
|
||||
setCompareMode(true)
|
||||
}}
|
||||
onNavigateToControl={async (controlId: string) => {
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
|
||||
@@ -584,7 +655,7 @@ export default function ControlLibraryPage() {
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => { loadControls(); loadMeta(); loadReviewCount() }}
|
||||
onClick={() => { loadControls(); loadMeta(); loadFrameworks(); loadReviewCount() }}
|
||||
className="p-2 text-gray-400 hover:text-purple-600"
|
||||
title="Aktualisieren"
|
||||
>
|
||||
@@ -599,10 +670,10 @@ export default function ControlLibraryPage() {
|
||||
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
|
||||
>
|
||||
<option value="">Schweregrad</option>
|
||||
<option value="critical">Kritisch</option>
|
||||
<option value="high">Hoch</option>
|
||||
<option value="medium">Mittel</option>
|
||||
<option value="low">Niedrig</option>
|
||||
<option value="critical">Kritisch{meta?.severity_counts?.critical ? ` (${meta.severity_counts.critical})` : ''}</option>
|
||||
<option value="high">Hoch{meta?.severity_counts?.high ? ` (${meta.severity_counts.high})` : ''}</option>
|
||||
<option value="medium">Mittel{meta?.severity_counts?.medium ? ` (${meta.severity_counts.medium})` : ''}</option>
|
||||
<option value="low">Niedrig{meta?.severity_counts?.low ? ` (${meta.severity_counts.low})` : ''}</option>
|
||||
</select>
|
||||
<select
|
||||
value={domainFilter}
|
||||
@@ -620,12 +691,12 @@ export default function ControlLibraryPage() {
|
||||
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
|
||||
>
|
||||
<option value="">Status</option>
|
||||
<option value="draft">Draft</option>
|
||||
<option value="approved">Approved</option>
|
||||
<option value="needs_review">Review noetig</option>
|
||||
<option value="too_close">Zu aehnlich</option>
|
||||
<option value="duplicate">Duplikat</option>
|
||||
<option value="deprecated">Deprecated</option>
|
||||
<option value="draft">Draft{meta?.release_state_counts?.draft ? ` (${meta.release_state_counts.draft})` : ''}</option>
|
||||
<option value="approved">Approved{meta?.release_state_counts?.approved ? ` (${meta.release_state_counts.approved})` : ''}</option>
|
||||
<option value="needs_review">Review noetig{meta?.release_state_counts?.needs_review ? ` (${meta.release_state_counts.needs_review})` : ''}</option>
|
||||
<option value="too_close">Zu aehnlich{meta?.release_state_counts?.too_close ? ` (${meta.release_state_counts.too_close})` : ''}</option>
|
||||
<option value="duplicate">Duplikat{meta?.release_state_counts?.duplicate ? ` (${meta.release_state_counts.duplicate})` : ''}</option>
|
||||
<option value="deprecated">Deprecated{meta?.release_state_counts?.deprecated ? ` (${meta.release_state_counts.deprecated})` : ''}</option>
|
||||
</select>
|
||||
<label className="flex items-center gap-1.5 text-sm text-gray-600 cursor-pointer whitespace-nowrap">
|
||||
<input
|
||||
@@ -643,8 +714,9 @@ export default function ControlLibraryPage() {
|
||||
>
|
||||
<option value="">Nachweis</option>
|
||||
{Object.entries(VERIFICATION_METHODS).map(([k, v]) => (
|
||||
<option key={k} value={k}>{v.label}</option>
|
||||
<option key={k} value={k}>{v.label}{meta?.verification_method_counts?.[k] ? ` (${meta.verification_method_counts[k]})` : ''}</option>
|
||||
))}
|
||||
{meta?.verification_method_counts?.['__none__'] ? <option value="__none__">Ohne Nachweis ({meta.verification_method_counts['__none__']})</option> : null}
|
||||
</select>
|
||||
<select
|
||||
value={categoryFilter}
|
||||
@@ -653,8 +725,9 @@ export default function ControlLibraryPage() {
|
||||
>
|
||||
<option value="">Kategorie</option>
|
||||
{CATEGORY_OPTIONS.map(c => (
|
||||
<option key={c.value} value={c.value}>{c.label}</option>
|
||||
<option key={c.value} value={c.value}>{c.label}{meta?.category_counts?.[c.value] ? ` (${meta.category_counts[c.value]})` : ''}</option>
|
||||
))}
|
||||
{meta?.category_counts?.['__none__'] ? <option value="__none__">Ohne Kategorie ({meta.category_counts['__none__']})</option> : null}
|
||||
</select>
|
||||
<select
|
||||
value={evidenceTypeFilter}
|
||||
@@ -663,8 +736,9 @@ export default function ControlLibraryPage() {
|
||||
>
|
||||
<option value="">Nachweisart</option>
|
||||
{EVIDENCE_TYPE_OPTIONS.map(c => (
|
||||
<option key={c.value} value={c.value}>{c.label}</option>
|
||||
<option key={c.value} value={c.value}>{c.label}{meta?.evidence_type_counts?.[c.value] ? ` (${meta.evidence_type_counts[c.value]})` : ''}</option>
|
||||
))}
|
||||
{meta?.evidence_type_counts?.['__none__'] ? <option value="__none__">Ohne Nachweisart ({meta.evidence_type_counts['__none__']})</option> : null}
|
||||
</select>
|
||||
<select
|
||||
value={audienceFilter}
|
||||
@@ -705,8 +779,9 @@ export default function ControlLibraryPage() {
|
||||
className="text-sm border border-gray-300 rounded-lg px-2 py-1.5 focus:outline-none focus:ring-2 focus:ring-purple-500"
|
||||
>
|
||||
<option value="">Alle Typen</option>
|
||||
<option value="rich">Rich Controls</option>
|
||||
<option value="atomic">Atomare Controls</option>
|
||||
<option value="rich">Rich Controls{meta?.type_counts ? ` (${meta.type_counts.rich})` : ''}</option>
|
||||
<option value="atomic">Atomare Controls{meta?.type_counts ? ` (${meta.type_counts.atomic})` : ''}</option>
|
||||
<option value="eigenentwicklung">Eigenentwicklung{meta?.type_counts ? ` (${meta.type_counts.eigenentwicklung})` : ''}</option>
|
||||
</select>
|
||||
<span className="text-gray-300 mx-1">|</span>
|
||||
<ArrowUpDown className="w-4 h-4 text-gray-400" />
|
||||
@@ -806,7 +881,7 @@ export default function ControlLibraryPage() {
|
||||
<CategoryBadge category={ctrl.category} />
|
||||
<EvidenceTypeBadge type={ctrl.evidence_type} />
|
||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||
{ctrl.risk_score !== null && (
|
||||
<span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span>
|
||||
|
||||
@@ -347,14 +347,23 @@ async def list_controls(
|
||||
query += " AND release_state = :rs"
|
||||
params["rs"] = release_state
|
||||
if verification_method:
|
||||
query += " AND verification_method = :vm"
|
||||
params["vm"] = verification_method
|
||||
if verification_method == "__none__":
|
||||
query += " AND verification_method IS NULL"
|
||||
else:
|
||||
query += " AND verification_method = :vm"
|
||||
params["vm"] = verification_method
|
||||
if category:
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if category == "__none__":
|
||||
query += " AND category IS NULL"
|
||||
else:
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if evidence_type:
|
||||
query += " AND evidence_type = :et"
|
||||
params["et"] = evidence_type
|
||||
if evidence_type == "__none__":
|
||||
query += " AND evidence_type IS NULL"
|
||||
else:
|
||||
query += " AND evidence_type = :et"
|
||||
params["et"] = evidence_type
|
||||
if target_audience:
|
||||
query += " AND target_audience LIKE :ta_pattern"
|
||||
params["ta_pattern"] = f'%"{target_audience}"%'
|
||||
@@ -368,6 +377,11 @@ async def list_controls(
|
||||
query += " AND decomposition_method = 'pass0b'"
|
||||
elif control_type == "rich":
|
||||
query += " AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')"
|
||||
elif control_type == "eigenentwicklung":
|
||||
query += """ AND generation_strategy = 'ungrouped'
|
||||
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||
AND source_citation IS NULL
|
||||
AND parent_control_uuid IS NULL"""
|
||||
if search:
|
||||
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
|
||||
params["q"] = f"%{search}%"
|
||||
@@ -429,14 +443,23 @@ async def count_controls(
|
||||
query += " AND release_state = :rs"
|
||||
params["rs"] = release_state
|
||||
if verification_method:
|
||||
query += " AND verification_method = :vm"
|
||||
params["vm"] = verification_method
|
||||
if verification_method == "__none__":
|
||||
query += " AND verification_method IS NULL"
|
||||
else:
|
||||
query += " AND verification_method = :vm"
|
||||
params["vm"] = verification_method
|
||||
if category:
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if category == "__none__":
|
||||
query += " AND category IS NULL"
|
||||
else:
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if evidence_type:
|
||||
query += " AND evidence_type = :et"
|
||||
params["et"] = evidence_type
|
||||
if evidence_type == "__none__":
|
||||
query += " AND evidence_type IS NULL"
|
||||
else:
|
||||
query += " AND evidence_type = :et"
|
||||
params["et"] = evidence_type
|
||||
if target_audience:
|
||||
query += " AND target_audience LIKE :ta_pattern"
|
||||
params["ta_pattern"] = f'%"{target_audience}"%'
|
||||
@@ -450,6 +473,11 @@ async def count_controls(
|
||||
query += " AND decomposition_method = 'pass0b'"
|
||||
elif control_type == "rich":
|
||||
query += " AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')"
|
||||
elif control_type == "eigenentwicklung":
|
||||
query += """ AND generation_strategy = 'ungrouped'
|
||||
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||
AND source_citation IS NULL
|
||||
AND parent_control_uuid IS NULL"""
|
||||
if search:
|
||||
query += " AND (control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)"
|
||||
params["q"] = f"%{search}%"
|
||||
@@ -461,34 +489,189 @@ async def count_controls(
|
||||
|
||||
|
||||
@router.get("/controls-meta")
|
||||
async def controls_meta():
|
||||
"""Return aggregated metadata for filter dropdowns (domains, sources, counts)."""
|
||||
async def controls_meta(
|
||||
severity: Optional[str] = Query(None),
|
||||
domain: Optional[str] = Query(None),
|
||||
release_state: Optional[str] = Query(None),
|
||||
verification_method: Optional[str] = Query(None),
|
||||
category: Optional[str] = Query(None),
|
||||
evidence_type: Optional[str] = Query(None),
|
||||
target_audience: Optional[str] = Query(None),
|
||||
source: Optional[str] = Query(None),
|
||||
search: Optional[str] = Query(None),
|
||||
control_type: Optional[str] = Query(None),
|
||||
exclude_duplicates: bool = Query(False),
|
||||
):
|
||||
"""Return faceted metadata for filter dropdowns.
|
||||
|
||||
Each facet's counts respect ALL active filters EXCEPT the facet's own,
|
||||
so dropdowns always show how many items each option would yield.
|
||||
"""
|
||||
|
||||
def _build_where(skip: Optional[str] = None) -> tuple[str, dict[str, Any]]:
|
||||
clauses = ["1=1"]
|
||||
p: dict[str, Any] = {}
|
||||
|
||||
if exclude_duplicates:
|
||||
clauses.append("release_state != 'duplicate'")
|
||||
if severity and skip != "severity":
|
||||
clauses.append("severity = :sev")
|
||||
p["sev"] = severity
|
||||
if domain and skip != "domain":
|
||||
clauses.append("LEFT(control_id, LENGTH(:dom)) = :dom")
|
||||
p["dom"] = domain.upper()
|
||||
if release_state and skip != "release_state":
|
||||
clauses.append("release_state = :rs")
|
||||
p["rs"] = release_state
|
||||
if verification_method and skip != "verification_method":
|
||||
if verification_method == "__none__":
|
||||
clauses.append("verification_method IS NULL")
|
||||
else:
|
||||
clauses.append("verification_method = :vm")
|
||||
p["vm"] = verification_method
|
||||
if category and skip != "category":
|
||||
if category == "__none__":
|
||||
clauses.append("category IS NULL")
|
||||
else:
|
||||
clauses.append("category = :cat")
|
||||
p["cat"] = category
|
||||
if evidence_type and skip != "evidence_type":
|
||||
if evidence_type == "__none__":
|
||||
clauses.append("evidence_type IS NULL")
|
||||
else:
|
||||
clauses.append("evidence_type = :et")
|
||||
p["et"] = evidence_type
|
||||
if target_audience and skip != "target_audience":
|
||||
clauses.append("target_audience LIKE :ta_pattern")
|
||||
p["ta_pattern"] = f'%"{target_audience}"%'
|
||||
if source and skip != "source":
|
||||
if source == "__none__":
|
||||
clauses.append("(source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = '')")
|
||||
else:
|
||||
clauses.append("source_citation->>'source' = :src")
|
||||
p["src"] = source
|
||||
if control_type and skip != "control_type":
|
||||
if control_type == "atomic":
|
||||
clauses.append("decomposition_method = 'pass0b'")
|
||||
elif control_type == "rich":
|
||||
clauses.append("(decomposition_method IS NULL OR decomposition_method != 'pass0b')")
|
||||
elif control_type == "eigenentwicklung":
|
||||
clauses.append("""generation_strategy = 'ungrouped'
|
||||
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||
AND source_citation IS NULL
|
||||
AND parent_control_uuid IS NULL""")
|
||||
if search and skip != "search":
|
||||
clauses.append("(control_id ILIKE :q OR title ILIKE :q OR objective ILIKE :q)")
|
||||
p["q"] = f"%{search}%"
|
||||
|
||||
return " AND ".join(clauses), p
|
||||
|
||||
with SessionLocal() as db:
|
||||
total = db.execute(text("SELECT count(*) FROM canonical_controls")).scalar()
|
||||
# Total with ALL filters
|
||||
w_all, p_all = _build_where()
|
||||
total = db.execute(text(f"SELECT count(*) FROM canonical_controls WHERE {w_all}"), p_all).scalar()
|
||||
|
||||
domains = db.execute(text("""
|
||||
# Domain facet (skip domain filter so user sees all domains)
|
||||
w_dom, p_dom = _build_where(skip="domain")
|
||||
domains = db.execute(text(f"""
|
||||
SELECT UPPER(SPLIT_PART(control_id, '-', 1)) as domain, count(*) as cnt
|
||||
FROM canonical_controls
|
||||
FROM canonical_controls WHERE {w_dom}
|
||||
GROUP BY domain ORDER BY domain
|
||||
""")).fetchall()
|
||||
"""), p_dom).fetchall()
|
||||
|
||||
sources = db.execute(text("""
|
||||
# Source facet (skip source filter)
|
||||
w_src, p_src = _build_where(skip="source")
|
||||
sources = db.execute(text(f"""
|
||||
SELECT source_citation->>'source' as src, count(*) as cnt
|
||||
FROM canonical_controls
|
||||
WHERE source_citation->>'source' IS NOT NULL AND source_citation->>'source' != ''
|
||||
WHERE {w_src}
|
||||
AND source_citation->>'source' IS NOT NULL AND source_citation->>'source' != ''
|
||||
GROUP BY src ORDER BY cnt DESC
|
||||
""")).fetchall()
|
||||
"""), p_src).fetchall()
|
||||
|
||||
no_source = db.execute(text("""
|
||||
no_source = db.execute(text(f"""
|
||||
SELECT count(*) FROM canonical_controls
|
||||
WHERE source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = ''
|
||||
""")).scalar()
|
||||
WHERE {w_src}
|
||||
AND (source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = '')
|
||||
"""), p_src).scalar()
|
||||
|
||||
# Type facet (skip control_type filter)
|
||||
w_typ, p_typ = _build_where(skip="control_type")
|
||||
atomic_count = db.execute(text(f"""
|
||||
SELECT count(*) FROM canonical_controls
|
||||
WHERE {w_typ} AND decomposition_method = 'pass0b'
|
||||
"""), p_typ).scalar() or 0
|
||||
|
||||
eigenentwicklung_count = db.execute(text(f"""
|
||||
SELECT count(*) FROM canonical_controls
|
||||
WHERE {w_typ}
|
||||
AND generation_strategy = 'ungrouped'
|
||||
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||
AND source_citation IS NULL
|
||||
AND parent_control_uuid IS NULL
|
||||
"""), p_typ).scalar() or 0
|
||||
|
||||
rich_count = db.execute(text(f"""
|
||||
SELECT count(*) FROM canonical_controls
|
||||
WHERE {w_typ}
|
||||
AND (decomposition_method IS NULL OR decomposition_method != 'pass0b')
|
||||
"""), p_typ).scalar() or 0
|
||||
|
||||
# Severity facet (skip severity filter)
|
||||
w_sev, p_sev = _build_where(skip="severity")
|
||||
severity_counts = db.execute(text(f"""
|
||||
SELECT severity, count(*) as cnt
|
||||
FROM canonical_controls WHERE {w_sev}
|
||||
GROUP BY severity ORDER BY severity
|
||||
"""), p_sev).fetchall()
|
||||
|
||||
# Verification method facet (include NULLs as __none__)
|
||||
w_vm, p_vm = _build_where(skip="verification_method")
|
||||
vm_counts = db.execute(text(f"""
|
||||
SELECT COALESCE(verification_method, '__none__') as vm, count(*) as cnt
|
||||
FROM canonical_controls WHERE {w_vm}
|
||||
GROUP BY vm ORDER BY vm
|
||||
"""), p_vm).fetchall()
|
||||
|
||||
# Category facet (include NULLs as __none__)
|
||||
w_cat, p_cat = _build_where(skip="category")
|
||||
cat_counts = db.execute(text(f"""
|
||||
SELECT COALESCE(category, '__none__') as cat, count(*) as cnt
|
||||
FROM canonical_controls WHERE {w_cat}
|
||||
GROUP BY cat ORDER BY cnt DESC
|
||||
"""), p_cat).fetchall()
|
||||
|
||||
# Evidence type facet (include NULLs as __none__)
|
||||
w_et, p_et = _build_where(skip="evidence_type")
|
||||
et_counts = db.execute(text(f"""
|
||||
SELECT COALESCE(evidence_type, '__none__') as et, count(*) as cnt
|
||||
FROM canonical_controls WHERE {w_et}
|
||||
GROUP BY et ORDER BY et
|
||||
"""), p_et).fetchall()
|
||||
|
||||
# Release state facet
|
||||
w_rs, p_rs = _build_where(skip="release_state")
|
||||
rs_counts = db.execute(text(f"""
|
||||
SELECT release_state, count(*) as cnt
|
||||
FROM canonical_controls WHERE {w_rs}
|
||||
GROUP BY release_state ORDER BY release_state
|
||||
"""), p_rs).fetchall()
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"domains": [{"domain": r[0], "count": r[1]} for r in domains],
|
||||
"sources": [{"source": r[0], "count": r[1]} for r in sources],
|
||||
"no_source_count": no_source,
|
||||
"type_counts": {
|
||||
"rich": rich_count,
|
||||
"atomic": atomic_count,
|
||||
"eigenentwicklung": eigenentwicklung_count,
|
||||
},
|
||||
"severity_counts": {r[0]: r[1] for r in severity_counts},
|
||||
"verification_method_counts": {r[0]: r[1] for r in vm_counts},
|
||||
"category_counts": {r[0]: r[1] for r in cat_counts},
|
||||
"evidence_type_counts": {r[0]: r[1] for r in et_counts},
|
||||
"release_state_counts": {r[0]: r[1] for r in rs_counts},
|
||||
}
|
||||
|
||||
|
||||
@@ -547,6 +730,15 @@ async def atomic_stats():
|
||||
}
|
||||
|
||||
|
||||
@router.get("/controls/v1-enrichment-stats")
|
||||
async def v1_enrichment_stats_endpoint():
|
||||
"""
|
||||
Uebersicht: Wie viele v1 Controls haben regulatorische Abdeckung?
|
||||
"""
|
||||
from compliance.services.v1_enrichment import get_v1_enrichment_stats
|
||||
return await get_v1_enrichment_stats()
|
||||
|
||||
|
||||
@router.get("/controls/{control_id}")
|
||||
async def get_control(control_id: str):
|
||||
"""Get a single canonical control by its control_id (e.g. AUTH-001)."""
|
||||
@@ -823,7 +1015,7 @@ async def get_control_provenance(control_id: str):
|
||||
normative_strength, release_state
|
||||
FROM obligation_candidates
|
||||
WHERE parent_control_uuid = CAST(:uid AS uuid)
|
||||
AND release_state NOT IN ('rejected', 'merged')
|
||||
AND release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
ORDER BY candidate_id
|
||||
"""),
|
||||
{"uid": ctrl_uuid},
|
||||
@@ -958,7 +1150,7 @@ async def backfill_normative_strength(
|
||||
cc.source_citation->>'source' AS parent_source
|
||||
FROM obligation_candidates oc
|
||||
JOIN canonical_controls cc ON cc.id = oc.parent_control_uuid
|
||||
WHERE oc.release_state NOT IN ('rejected', 'merged')
|
||||
WHERE oc.release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
AND oc.normative_strength IS NOT NULL
|
||||
ORDER BY oc.candidate_id
|
||||
""")).fetchall()
|
||||
@@ -1009,6 +1201,162 @@ async def backfill_normative_strength(
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OBLIGATION DEDUPLICATION
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/obligations/dedup")
|
||||
async def dedup_obligations(
|
||||
dry_run: bool = Query(True, description="Nur zaehlen, nicht aendern"),
|
||||
batch_size: int = Query(0, description="0 = alle auf einmal"),
|
||||
offset: int = Query(0, description="Offset fuer Batch-Verarbeitung"),
|
||||
):
|
||||
"""
|
||||
Markiert doppelte obligation_candidates als 'duplicate'.
|
||||
|
||||
Duplikate = mehrere Eintraege mit gleichem candidate_id.
|
||||
Pro candidate_id wird der aelteste Eintrag (MIN(created_at)) behalten,
|
||||
alle anderen erhalten release_state='duplicate' und merged_into_id
|
||||
zeigt auf den behaltenen Eintrag.
|
||||
"""
|
||||
with SessionLocal() as db:
|
||||
# 1. Finde alle candidate_ids mit mehr als einem Eintrag
|
||||
# (nur noch nicht-deduplizierte beruecksichtigen)
|
||||
dup_query = """
|
||||
SELECT candidate_id, count(*) as cnt
|
||||
FROM obligation_candidates
|
||||
WHERE release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
GROUP BY candidate_id
|
||||
HAVING count(*) > 1
|
||||
ORDER BY candidate_id
|
||||
"""
|
||||
if batch_size > 0:
|
||||
dup_query += f" LIMIT {batch_size} OFFSET {offset}"
|
||||
|
||||
dup_groups = db.execute(text(dup_query)).fetchall()
|
||||
|
||||
total_groups = db.execute(text("""
|
||||
SELECT count(*) FROM (
|
||||
SELECT candidate_id
|
||||
FROM obligation_candidates
|
||||
WHERE release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
GROUP BY candidate_id
|
||||
HAVING count(*) > 1
|
||||
) sub
|
||||
""")).scalar()
|
||||
|
||||
# 2. Pro Gruppe: aeltesten behalten, Rest als duplicate markieren
|
||||
kept_count = 0
|
||||
duplicate_count = 0
|
||||
sample_changes: list[dict[str, Any]] = []
|
||||
|
||||
for grp in dup_groups:
|
||||
cid = grp.candidate_id
|
||||
|
||||
# Alle Eintraege fuer dieses candidate_id holen
|
||||
entries = db.execute(text("""
|
||||
SELECT id, candidate_id, obligation_text, release_state, created_at
|
||||
FROM obligation_candidates
|
||||
WHERE candidate_id = :cid
|
||||
AND release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
ORDER BY created_at ASC, id ASC
|
||||
"""), {"cid": cid}).fetchall()
|
||||
|
||||
if len(entries) < 2:
|
||||
continue
|
||||
|
||||
keeper = entries[0] # aeltester Eintrag
|
||||
duplicates = entries[1:]
|
||||
kept_count += 1
|
||||
duplicate_count += len(duplicates)
|
||||
|
||||
if len(sample_changes) < 20:
|
||||
sample_changes.append({
|
||||
"candidate_id": cid,
|
||||
"kept_id": str(keeper.id),
|
||||
"kept_text": keeper.obligation_text[:100],
|
||||
"duplicate_count": len(duplicates),
|
||||
"duplicate_ids": [str(d.id) for d in duplicates],
|
||||
})
|
||||
|
||||
if not dry_run:
|
||||
for dup in duplicates:
|
||||
db.execute(text("""
|
||||
UPDATE obligation_candidates
|
||||
SET release_state = 'duplicate',
|
||||
merged_into_id = CAST(:keeper_id AS uuid),
|
||||
quality_flags = COALESCE(quality_flags, '{}'::jsonb)
|
||||
|| jsonb_build_object(
|
||||
'dedup_reason', 'duplicate of ' || :keeper_cid,
|
||||
'dedup_kept_id', :keeper_id_str,
|
||||
'dedup_at', NOW()::text
|
||||
)
|
||||
WHERE id = CAST(:dup_id AS uuid)
|
||||
"""), {
|
||||
"keeper_id": str(keeper.id),
|
||||
"keeper_cid": cid,
|
||||
"keeper_id_str": str(keeper.id),
|
||||
"dup_id": str(dup.id),
|
||||
})
|
||||
|
||||
if not dry_run and duplicate_count > 0:
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"dry_run": dry_run,
|
||||
"stats": {
|
||||
"total_duplicate_groups": total_groups,
|
||||
"processed_groups": len(dup_groups),
|
||||
"kept": kept_count,
|
||||
"marked_duplicate": duplicate_count,
|
||||
},
|
||||
"sample_changes": sample_changes,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/obligations/dedup-stats")
|
||||
async def dedup_obligations_stats():
|
||||
"""Statistiken ueber den aktuellen Dedup-Status der Obligations."""
|
||||
with SessionLocal() as db:
|
||||
total = db.execute(text(
|
||||
"SELECT count(*) FROM obligation_candidates"
|
||||
)).scalar()
|
||||
|
||||
by_state = db.execute(text("""
|
||||
SELECT release_state, count(*) as cnt
|
||||
FROM obligation_candidates
|
||||
GROUP BY release_state
|
||||
ORDER BY release_state
|
||||
""")).fetchall()
|
||||
|
||||
dup_groups = db.execute(text("""
|
||||
SELECT count(*) FROM (
|
||||
SELECT candidate_id
|
||||
FROM obligation_candidates
|
||||
WHERE release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
GROUP BY candidate_id
|
||||
HAVING count(*) > 1
|
||||
) sub
|
||||
""")).scalar()
|
||||
|
||||
removable = db.execute(text("""
|
||||
SELECT COALESCE(sum(cnt - 1), 0) FROM (
|
||||
SELECT candidate_id, count(*) as cnt
|
||||
FROM obligation_candidates
|
||||
WHERE release_state NOT IN ('rejected', 'merged', 'duplicate')
|
||||
GROUP BY candidate_id
|
||||
HAVING count(*) > 1
|
||||
) sub
|
||||
""")).scalar()
|
||||
|
||||
return {
|
||||
"total_obligations": total,
|
||||
"by_state": {r.release_state: r.cnt for r in by_state},
|
||||
"pending_duplicate_groups": dup_groups,
|
||||
"pending_removable_duplicates": removable,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# EVIDENCE TYPE BACKFILL
|
||||
# =============================================================================
|
||||
@@ -1567,6 +1915,57 @@ async def list_licenses():
|
||||
return get_license_matrix(db)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# V1 ENRICHMENT (Eigenentwicklung → Regulatorische Abdeckung)
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/controls/enrich-v1-matches")
|
||||
async def enrich_v1_matches_endpoint(
|
||||
dry_run: bool = Query(True, description="Nur zaehlen, nicht schreiben"),
|
||||
batch_size: int = Query(100, description="Controls pro Durchlauf"),
|
||||
offset: int = Query(0, description="Offset fuer Paginierung"),
|
||||
):
|
||||
"""
|
||||
Findet regulatorische Abdeckung fuer v1 Eigenentwicklung Controls.
|
||||
|
||||
Eigenentwicklung = generation_strategy='ungrouped', pipeline_version=1,
|
||||
source_citation IS NULL, parent_control_uuid IS NULL.
|
||||
|
||||
Workflow:
|
||||
1. dry_run=true → Statistiken anzeigen
|
||||
2. dry_run=false&batch_size=100&offset=0 → Erste 100 verarbeiten
|
||||
3. Wiederholen mit next_offset bis fertig
|
||||
"""
|
||||
from compliance.services.v1_enrichment import enrich_v1_matches
|
||||
return await enrich_v1_matches(
|
||||
dry_run=dry_run,
|
||||
batch_size=batch_size,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/controls/{control_id}/v1-matches")
|
||||
async def get_v1_matches_endpoint(control_id: str):
|
||||
"""
|
||||
Gibt regulatorische Matches fuer ein v1 Control zurueck.
|
||||
|
||||
Returns:
|
||||
Liste von Matches mit Control-Details, Source, Score.
|
||||
"""
|
||||
from compliance.services.v1_enrichment import get_v1_matches
|
||||
|
||||
# Resolve control_id to UUID
|
||||
with SessionLocal() as db:
|
||||
row = db.execute(text("""
|
||||
SELECT id FROM canonical_controls WHERE control_id = :cid
|
||||
"""), {"cid": control_id}).fetchone()
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||
|
||||
return await get_v1_matches(str(row.id))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# INTERNAL HELPERS
|
||||
# =============================================================================
|
||||
|
||||
@@ -459,7 +459,9 @@ def _split_compound_action(action: str) -> list[str]:
|
||||
# ── 2. Action Type Classification (18 types) ────────────────────────────
|
||||
|
||||
_ACTION_PRIORITY = [
|
||||
"prevent", "exclude", "forbid",
|
||||
"implement", "configure", "encrypt", "restrict_access",
|
||||
"enforce", "invalidate", "issue", "rotate",
|
||||
"monitor", "review", "assess", "audit",
|
||||
"test", "verify", "validate",
|
||||
"report", "notify", "train",
|
||||
@@ -470,7 +472,41 @@ _ACTION_PRIORITY = [
|
||||
]
|
||||
|
||||
_ACTION_KEYWORDS: list[tuple[str, str]] = [
|
||||
# Multi-word patterns first (longest match wins)
|
||||
# ── Negative / prohibitive actions (highest priority) ────
|
||||
("dürfen keine", "prevent"),
|
||||
("dürfen nicht", "prevent"),
|
||||
("darf keine", "prevent"),
|
||||
("darf nicht", "prevent"),
|
||||
("nicht zulässig", "forbid"),
|
||||
("nicht erlaubt", "forbid"),
|
||||
("nicht gestattet", "forbid"),
|
||||
("untersagt", "forbid"),
|
||||
("verboten", "forbid"),
|
||||
("nicht enthalten", "exclude"),
|
||||
("nicht übertragen", "prevent"),
|
||||
("nicht übermittelt", "prevent"),
|
||||
("nicht wiederverwendet", "prevent"),
|
||||
("nicht gespeichert", "prevent"),
|
||||
("verhindern", "prevent"),
|
||||
("unterbinden", "prevent"),
|
||||
("ausschließen", "exclude"),
|
||||
("vermeiden", "prevent"),
|
||||
("ablehnen", "exclude"),
|
||||
("zurückweisen", "exclude"),
|
||||
# ── Session / lifecycle actions ──────────────────────────
|
||||
("ungültig machen", "invalidate"),
|
||||
("invalidieren", "invalidate"),
|
||||
("widerrufen", "invalidate"),
|
||||
("session beenden", "invalidate"),
|
||||
("vergeben", "issue"),
|
||||
("ausstellen", "issue"),
|
||||
("erzeugen", "issue"),
|
||||
("generieren", "issue"),
|
||||
("rotieren", "rotate"),
|
||||
("erneuern", "rotate"),
|
||||
("durchsetzen", "enforce"),
|
||||
("erzwingen", "enforce"),
|
||||
# ── Multi-word patterns (longest match wins) ─────────────
|
||||
("aktuell halten", "maintain"),
|
||||
("aufrechterhalten", "maintain"),
|
||||
("sicherstellen", "ensure"),
|
||||
@@ -565,6 +601,15 @@ _ACTION_KEYWORDS: list[tuple[str, str]] = [
|
||||
("remediate", "remediate"),
|
||||
("perform", "perform"),
|
||||
("obtain", "obtain"),
|
||||
("prevent", "prevent"),
|
||||
("forbid", "forbid"),
|
||||
("exclude", "exclude"),
|
||||
("invalidate", "invalidate"),
|
||||
("revoke", "invalidate"),
|
||||
("issue", "issue"),
|
||||
("generate", "issue"),
|
||||
("rotate", "rotate"),
|
||||
("enforce", "enforce"),
|
||||
]
|
||||
|
||||
|
||||
@@ -627,11 +672,29 @@ _OBJECT_CLASS_KEYWORDS: dict[str, list[str]] = {
|
||||
"access_control": [
|
||||
"authentifizierung", "autorisierung", "zugriff",
|
||||
"berechtigung", "passwort", "kennwort", "anmeldung",
|
||||
"sso", "rbac", "session",
|
||||
"sso", "rbac",
|
||||
],
|
||||
"session": [
|
||||
"session", "sitzung", "sitzungsverwaltung", "session management",
|
||||
"session-id", "session-token", "idle timeout",
|
||||
"inaktivitäts-timeout", "inaktivitätszeitraum",
|
||||
"logout", "abmeldung",
|
||||
],
|
||||
"cookie": [
|
||||
"cookie", "session-cookie", "secure-flag", "httponly",
|
||||
"samesite", "cookie-attribut",
|
||||
],
|
||||
"jwt": [
|
||||
"jwt", "json web token", "bearer token",
|
||||
"jwt-algorithmus", "jwt-signatur",
|
||||
],
|
||||
"federated_assertion": [
|
||||
"assertion", "saml", "oidc", "openid",
|
||||
"föderiert", "federated", "identity provider",
|
||||
],
|
||||
"cryptographic_control": [
|
||||
"schlüssel", "zertifikat", "signatur", "kryptographi",
|
||||
"cipher", "hash", "token",
|
||||
"cipher", "hash", "token", "entropie",
|
||||
],
|
||||
"configuration": [
|
||||
"konfiguration", "einstellung", "parameter",
|
||||
@@ -1030,6 +1093,85 @@ _ACTION_TEMPLATES: dict[str, dict[str, list[str]]] = {
|
||||
"Gültigkeitsprüfung mit Zeitstempeln",
|
||||
],
|
||||
},
|
||||
# ── Prevent / Exclude / Forbid (negative norms) ────────────
|
||||
"prevent": {
|
||||
"test_procedure": [
|
||||
"Prüfung, dass {object} technisch verhindert wird",
|
||||
"Stichprobe: Versuch der verbotenen Aktion schlägt fehl",
|
||||
"Review der Konfiguration und Zugriffskontrollen",
|
||||
],
|
||||
"evidence": [
|
||||
"Konfigurationsnachweis der Präventionsmassnahme",
|
||||
"Testprotokoll der Negativtests",
|
||||
],
|
||||
},
|
||||
"exclude": {
|
||||
"test_procedure": [
|
||||
"Prüfung, dass {object} ausgeschlossen ist",
|
||||
"Stichprobe: Verbotene Inhalte/Aktionen sind nicht vorhanden",
|
||||
"Automatisierter Scan oder manuelle Prüfung",
|
||||
],
|
||||
"evidence": [
|
||||
"Scan-Ergebnis oder Prüfprotokoll",
|
||||
"Konfigurationsnachweis",
|
||||
],
|
||||
},
|
||||
"forbid": {
|
||||
"test_procedure": [
|
||||
"Prüfung, dass {object} untersagt und technisch blockiert ist",
|
||||
"Verifizierung der Richtlinie und technischen Durchsetzung",
|
||||
"Stichprobe: Versuch der untersagten Aktion wird abgelehnt",
|
||||
],
|
||||
"evidence": [
|
||||
"Richtlinie mit explizitem Verbot",
|
||||
"Technischer Nachweis der Blockierung",
|
||||
],
|
||||
},
|
||||
# ── Enforce / Invalidate / Issue / Rotate ────────────────
|
||||
"enforce": {
|
||||
"test_procedure": [
|
||||
"Prüfung der technischen Durchsetzung von {object}",
|
||||
"Stichprobe: Nicht-konforme Konfigurationen werden automatisch korrigiert oder abgelehnt",
|
||||
"Review der Enforcement-Regeln und Ausnahmen",
|
||||
],
|
||||
"evidence": [
|
||||
"Enforcement-Policy mit technischer Umsetzung",
|
||||
"Protokoll erzwungener Korrekturen oder Ablehnungen",
|
||||
],
|
||||
},
|
||||
"invalidate": {
|
||||
"test_procedure": [
|
||||
"Prüfung, dass {object} korrekt ungültig gemacht wird",
|
||||
"Stichprobe: Nach Invalidierung kein Zugriff mehr möglich",
|
||||
"Verifizierung der serverseitigen Bereinigung",
|
||||
],
|
||||
"evidence": [
|
||||
"Protokoll der Invalidierungsaktionen",
|
||||
"Testnachweis der Zugriffsverweigerung nach Invalidierung",
|
||||
],
|
||||
},
|
||||
"issue": {
|
||||
"test_procedure": [
|
||||
"Prüfung des Vergabeprozesses für {object}",
|
||||
"Verifizierung der kryptographischen Sicherheit und Entropie",
|
||||
"Stichprobe: Korrekte Vergabe unter definierten Bedingungen",
|
||||
],
|
||||
"evidence": [
|
||||
"Prozessdokumentation der Vergabe",
|
||||
"Nachweis der Entropie-/Sicherheitseigenschaften",
|
||||
],
|
||||
},
|
||||
"rotate": {
|
||||
"test_procedure": [
|
||||
"Prüfung des Rotationsprozesses für {object}",
|
||||
"Verifizierung der Rotationsfrequenz und automatischen Auslöser",
|
||||
"Stichprobe: Alte Artefakte nach Rotation ungültig",
|
||||
],
|
||||
"evidence": [
|
||||
"Rotationsrichtlinie mit Frequenz",
|
||||
"Rotationsprotokoll mit Zeitstempeln",
|
||||
],
|
||||
},
|
||||
# ── Approve / Remediate ───────────────────────────────────
|
||||
"approve": {
|
||||
"test_procedure": [
|
||||
@@ -1415,20 +1557,127 @@ _OBJECT_SYNONYMS: dict[str, str] = {
|
||||
"zugriff": "access_control",
|
||||
"einwilligung": "consent",
|
||||
"zustimmung": "consent",
|
||||
# Near-synonym expansions found via heavy-control analysis (2026-03-28)
|
||||
"erkennung": "detection",
|
||||
"früherkennung": "detection",
|
||||
"frühzeitige erkennung": "detection",
|
||||
"frühzeitigen erkennung": "detection",
|
||||
"detektion": "detection",
|
||||
"eskalation": "escalation",
|
||||
"eskalationsprozess": "escalation",
|
||||
"eskalationsverfahren": "escalation",
|
||||
"benachrichtigungsprozess": "notification",
|
||||
"benachrichtigungsverfahren": "notification",
|
||||
"meldeprozess": "notification",
|
||||
"meldeverfahren": "notification",
|
||||
"meldesystem": "notification",
|
||||
"benachrichtigungssystem": "notification",
|
||||
"überwachung": "monitoring",
|
||||
"monitoring": "monitoring",
|
||||
"kontinuierliche überwachung": "monitoring",
|
||||
"laufende überwachung": "monitoring",
|
||||
"prüfung": "audit",
|
||||
"überprüfung": "audit",
|
||||
"kontrolle": "control_check",
|
||||
"sicherheitskontrolle": "control_check",
|
||||
"dokumentation": "documentation",
|
||||
"aufzeichnungspflicht": "documentation",
|
||||
"protokollierung": "logging",
|
||||
"logführung": "logging",
|
||||
"logmanagement": "logging",
|
||||
"wiederherstellung": "recovery",
|
||||
"notfallwiederherstellung": "recovery",
|
||||
"disaster recovery": "recovery",
|
||||
"notfallplan": "contingency_plan",
|
||||
"notfallplanung": "contingency_plan",
|
||||
"wiederanlaufplan": "contingency_plan",
|
||||
"klassifizierung": "classification",
|
||||
"kategorisierung": "classification",
|
||||
"einstufung": "classification",
|
||||
"segmentierung": "segmentation",
|
||||
"netzwerksegmentierung": "segmentation",
|
||||
"netzwerk-segmentierung": "segmentation",
|
||||
"trennung": "segmentation",
|
||||
"isolierung": "isolation",
|
||||
"patch": "patch_mgmt",
|
||||
"patchmanagement": "patch_mgmt",
|
||||
"patch-management": "patch_mgmt",
|
||||
"aktualisierung": "patch_mgmt",
|
||||
"softwareaktualisierung": "patch_mgmt",
|
||||
"härtung": "hardening",
|
||||
"systemhärtung": "hardening",
|
||||
"härtungsmaßnahme": "hardening",
|
||||
"löschung": "deletion",
|
||||
"datenlöschung": "deletion",
|
||||
"löschkonzept": "deletion",
|
||||
"anonymisierung": "anonymization",
|
||||
"pseudonymisierung": "pseudonymization",
|
||||
"zugangssteuerung": "access_control",
|
||||
"zugangskontrolle": "access_control",
|
||||
"zugriffssteuerung": "access_control",
|
||||
"zugriffskontrolle": "access_control",
|
||||
"schlüsselmanagement": "key_mgmt",
|
||||
"schlüsselverwaltung": "key_mgmt",
|
||||
"key management": "key_mgmt",
|
||||
"zertifikatsverwaltung": "cert_mgmt",
|
||||
"zertifikatsmanagement": "cert_mgmt",
|
||||
"lieferant": "vendor",
|
||||
"dienstleister": "vendor",
|
||||
"auftragsverarbeiter": "vendor",
|
||||
"drittanbieter": "vendor",
|
||||
# Session management synonyms (2026-03-28)
|
||||
"sitzung": "session",
|
||||
"sitzungsverwaltung": "session_mgmt",
|
||||
"session management": "session_mgmt",
|
||||
"session-id": "session_token",
|
||||
"sitzungstoken": "session_token",
|
||||
"session-token": "session_token",
|
||||
"idle timeout": "session_timeout",
|
||||
"inaktivitäts-timeout": "session_timeout",
|
||||
"inaktivitätszeitraum": "session_timeout",
|
||||
"abmeldung": "logout",
|
||||
"cookie-attribut": "cookie_security",
|
||||
"secure-flag": "cookie_security",
|
||||
"httponly": "cookie_security",
|
||||
"samesite": "cookie_security",
|
||||
"json web token": "jwt",
|
||||
"bearer token": "jwt",
|
||||
"föderierte assertion": "federated_assertion",
|
||||
"saml assertion": "federated_assertion",
|
||||
}
|
||||
|
||||
|
||||
def _truncate_title(title: str, max_len: int = 80) -> str:
|
||||
"""Truncate title at word boundary to avoid mid-word cuts."""
|
||||
if len(title) <= max_len:
|
||||
return title
|
||||
truncated = title[:max_len]
|
||||
# Cut at last space to avoid mid-word truncation
|
||||
last_space = truncated.rfind(" ")
|
||||
if last_space > max_len // 2:
|
||||
return truncated[:last_space]
|
||||
return truncated
|
||||
|
||||
|
||||
def _normalize_object(object_raw: str) -> str:
|
||||
"""Normalize object text to a snake_case key for merge hints.
|
||||
|
||||
Applies synonym mapping to collapse German terms to canonical forms
|
||||
(e.g., 'Richtlinie' -> 'policy', 'Verzeichnis' -> 'register').
|
||||
Then strips qualifying prepositional phrases that would create
|
||||
near-duplicate keys (e.g., 'bei Schwellenwertüberschreitung').
|
||||
Truncates to 40 chars to collapse overly specific variants.
|
||||
"""
|
||||
if not object_raw:
|
||||
return "unknown"
|
||||
|
||||
obj_lower = object_raw.strip().lower()
|
||||
|
||||
# Strip qualifying prepositional phrases that don't change core identity.
|
||||
# These create near-duplicate keys like "eskalationsprozess" vs
|
||||
# "eskalationsprozess bei schwellenwertüberschreitung".
|
||||
obj_lower = _QUALIFYING_PHRASE_RE.sub("", obj_lower).strip()
|
||||
|
||||
# Synonym mapping — find the longest matching synonym
|
||||
best_match = ""
|
||||
best_canonical = ""
|
||||
@@ -1444,7 +1693,54 @@ def _normalize_object(object_raw: str) -> str:
|
||||
for src, dst in [("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("ß", "ss")]:
|
||||
obj = obj.replace(src, dst)
|
||||
obj = re.sub(r"[^a-z0-9_]", "", obj)
|
||||
return obj[:80] or "unknown"
|
||||
|
||||
# Strip trailing noise tokens (articles/prepositions stuck at the end)
|
||||
obj = re.sub(r"(_(?:der|die|das|des|dem|den|fuer|bei|von|zur|zum|mit|auf|in|und|oder|aus|an|ueber|nach|gegen|unter|vor|zwischen|als|durch|ohne|wie))+$", "", obj)
|
||||
|
||||
# Truncate at 40 chars (at underscore boundary) to collapse
|
||||
# overly specific suffixes that create near-duplicate keys.
|
||||
obj = _truncate_at_boundary(obj, 40)
|
||||
|
||||
return obj or "unknown"
|
||||
|
||||
|
||||
# Regex to strip German qualifying prepositional phrases from object text.
|
||||
# Matches patterns like "bei schwellenwertüberschreitung",
|
||||
# "für kritische systeme", "gemäß artikel 32" etc.
|
||||
_QUALIFYING_PHRASE_RE = re.compile(
|
||||
r"\s+(?:"
|
||||
r"bei\s+\w+"
|
||||
r"|für\s+(?:die\s+|den\s+|das\s+|kritische\s+)?\w+"
|
||||
r"|gemäß\s+\w+"
|
||||
r"|nach\s+\w+"
|
||||
r"|von\s+\w+"
|
||||
r"|im\s+(?:falle?\s+|rahmen\s+)?\w+"
|
||||
r"|mit\s+(?:den\s+|der\s+|dem\s+)?\w+"
|
||||
r"|auf\s+(?:basis|grundlage)\s+\w+"
|
||||
r"|zur\s+(?:einhaltung|sicherstellung|gewährleistung|vermeidung|erfüllung)\s*\w*"
|
||||
r"|durch\s+(?:den\s+|die\s+|das\s+)?\w+"
|
||||
r"|über\s+(?:den\s+|die\s+|das\s+)?\w+"
|
||||
r"|unter\s+\w+"
|
||||
r"|zwischen\s+\w+"
|
||||
r"|innerhalb\s+\w+"
|
||||
r"|gegenüber\s+\w+"
|
||||
r"|hinsichtlich\s+\w+"
|
||||
r"|bezüglich\s+\w+"
|
||||
r"|einschließlich\s+\w+"
|
||||
r").*$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _truncate_at_boundary(text: str, max_len: int) -> str:
|
||||
"""Truncate text at the last underscore boundary within max_len."""
|
||||
if len(text) <= max_len:
|
||||
return text
|
||||
truncated = text[:max_len]
|
||||
last_sep = truncated.rfind("_")
|
||||
if last_sep > max_len // 2:
|
||||
return truncated[:last_sep]
|
||||
return truncated
|
||||
|
||||
|
||||
# ── 7b. Framework / Composite Detection ──────────────────────────────────
|
||||
@@ -1461,11 +1757,33 @@ _COMPOSITE_OBJECT_KEYWORDS: list[str] = [
|
||||
"soc 2", "soc2", "enisa", "kritis",
|
||||
]
|
||||
|
||||
# Container objects that are too broad for atomic controls.
|
||||
# These produce titles like "Sichere Sitzungsverwaltung umgesetzt" which
|
||||
# are not auditable — they encompass multiple sub-requirements.
|
||||
_CONTAINER_OBJECT_KEYWORDS: list[str] = [
|
||||
"sitzungsverwaltung", "session management", "session-management",
|
||||
"token-schutz", "tokenschutz",
|
||||
"authentifizierungsmechanismen", "authentifizierungsmechanismus",
|
||||
"sicherheitsmaßnahmen", "sicherheitsmassnahmen",
|
||||
"schutzmaßnahmen", "schutzmassnahmen",
|
||||
"zugriffskontrollmechanismen",
|
||||
"sicherheitsarchitektur",
|
||||
"sicherheitskontrollen",
|
||||
"datenschutzmaßnahmen", "datenschutzmassnahmen",
|
||||
"compliance-anforderungen",
|
||||
"risikomanagementprozess",
|
||||
]
|
||||
|
||||
_COMPOSITE_RE = re.compile(
|
||||
"|".join(_FRAMEWORK_KEYWORDS + _COMPOSITE_OBJECT_KEYWORDS),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
_CONTAINER_RE = re.compile(
|
||||
"|".join(_CONTAINER_OBJECT_KEYWORDS),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _is_composite_obligation(obligation_text: str, object_: str) -> bool:
|
||||
"""Detect framework-level / composite obligations that are NOT atomic.
|
||||
@@ -1477,6 +1795,17 @@ def _is_composite_obligation(obligation_text: str, object_: str) -> bool:
|
||||
return bool(_COMPOSITE_RE.search(combined))
|
||||
|
||||
|
||||
def _is_container_object(object_: str) -> bool:
|
||||
"""Detect overly broad container objects that should not be atomic.
|
||||
|
||||
Objects like 'Sitzungsverwaltung' or 'Token-Schutz' encompass multiple
|
||||
sub-requirements and produce non-auditable controls.
|
||||
"""
|
||||
if not object_:
|
||||
return False
|
||||
return bool(_CONTAINER_RE.search(object_))
|
||||
|
||||
|
||||
# ── 7c. Output Validator (Negativregeln) ─────────────────────────────────
|
||||
|
||||
def _validate_atomic_control(
|
||||
@@ -1613,11 +1942,11 @@ def _compose_deterministic(
|
||||
# ── Title: "{Object} {Zustand}" ───────────────────────────
|
||||
state = _ACTION_STATE_SUFFIX.get(action_type, "umgesetzt")
|
||||
if object_:
|
||||
title = f"{object_.strip()} {state}"[:80]
|
||||
title = _truncate_title(f"{object_.strip()} {state}")
|
||||
elif action:
|
||||
title = f"{action.strip().capitalize()} {state}"[:80]
|
||||
title = _truncate_title(f"{action.strip().capitalize()} {state}")
|
||||
else:
|
||||
title = f"{parent_title} {state}"[:80]
|
||||
title = _truncate_title(f"{parent_title} {state}")
|
||||
|
||||
# ── Objective = obligation text (the normative statement) ─
|
||||
objective = obligation_text.strip()[:2000]
|
||||
@@ -1678,7 +2007,7 @@ def _compose_deterministic(
|
||||
requirements=requirements,
|
||||
test_procedure=test_procedure,
|
||||
evidence=evidence,
|
||||
severity=_normalize_severity(parent_severity),
|
||||
severity=_calibrate_severity(parent_severity, action_type),
|
||||
category=parent_category or "governance",
|
||||
)
|
||||
# Attach extra metadata (stored in generation_metadata)
|
||||
@@ -1690,11 +2019,17 @@ def _compose_deterministic(
|
||||
atomic._deadline_hours = deadline_hours # type: ignore[attr-defined]
|
||||
atomic._frequency = frequency # type: ignore[attr-defined]
|
||||
|
||||
# ── Composite / Framework detection ───────────────────────
|
||||
# ── Composite / Framework / Container detection ────────────
|
||||
is_composite = _is_composite_obligation(obligation_text, object_)
|
||||
atomic._is_composite = is_composite # type: ignore[attr-defined]
|
||||
atomic._atomicity = "composite" if is_composite else "atomic" # type: ignore[attr-defined]
|
||||
atomic._requires_decomposition = is_composite # type: ignore[attr-defined]
|
||||
is_container = _is_container_object(object_)
|
||||
atomic._is_composite = is_composite or is_container # type: ignore[attr-defined]
|
||||
if is_composite:
|
||||
atomic._atomicity = "composite" # type: ignore[attr-defined]
|
||||
elif is_container:
|
||||
atomic._atomicity = "container" # type: ignore[attr-defined]
|
||||
else:
|
||||
atomic._atomicity = "atomic" # type: ignore[attr-defined]
|
||||
atomic._requires_decomposition = is_composite or is_container # type: ignore[attr-defined]
|
||||
|
||||
# ── Validate (log issues, never reject) ───────────────────
|
||||
validation_issues = _validate_atomic_control(atomic, action_type, object_class)
|
||||
@@ -2315,6 +2650,7 @@ class DecompositionPass:
|
||||
SELECT 1 FROM canonical_controls ac
|
||||
WHERE ac.parent_control_uuid = oc.parent_control_uuid
|
||||
AND ac.decomposition_method = 'pass0b'
|
||||
AND ac.release_state NOT IN ('deprecated', 'duplicate')
|
||||
AND ac.title LIKE '%' || LEFT(oc.action, 20) || '%'
|
||||
)
|
||||
"""
|
||||
@@ -2877,10 +3213,31 @@ class DecompositionPass:
|
||||
"""Insert an atomic control and create parent link.
|
||||
|
||||
Returns the UUID of the newly created control, or None on failure.
|
||||
Checks merge_hint to prevent duplicate controls under the same parent.
|
||||
"""
|
||||
parent_uuid = obl["parent_uuid"]
|
||||
candidate_id = obl["candidate_id"]
|
||||
|
||||
# ── Duplicate Guard: skip if same merge_hint already exists ──
|
||||
merge_hint = getattr(atomic, "source_regulation", "") or ""
|
||||
if merge_hint:
|
||||
existing = self.db.execute(
|
||||
text("""
|
||||
SELECT id::text FROM canonical_controls
|
||||
WHERE parent_control_uuid = CAST(:parent AS uuid)
|
||||
AND generation_metadata->>'merge_group_hint' = :hint
|
||||
AND release_state NOT IN ('rejected', 'deprecated', 'duplicate')
|
||||
LIMIT 1
|
||||
"""),
|
||||
{"parent": parent_uuid, "hint": merge_hint},
|
||||
).fetchone()
|
||||
if existing:
|
||||
logger.debug(
|
||||
"Duplicate guard: skipping %s — merge_hint %s already exists as %s",
|
||||
candidate_id, merge_hint, existing[0],
|
||||
)
|
||||
return existing[0]
|
||||
|
||||
result = self.db.execute(
|
||||
text("""
|
||||
INSERT INTO canonical_controls (
|
||||
@@ -3135,6 +3492,7 @@ class DecompositionPass:
|
||||
SELECT 1 FROM canonical_controls ac
|
||||
WHERE ac.parent_control_uuid = oc.parent_control_uuid
|
||||
AND ac.decomposition_method = 'pass0b'
|
||||
AND ac.release_state NOT IN ('deprecated', 'duplicate')
|
||||
AND ac.title LIKE '%' || LEFT(oc.action, 20) || '%'
|
||||
)
|
||||
"""
|
||||
@@ -3475,4 +3833,45 @@ def _normalize_severity(val: str) -> str:
|
||||
return "medium"
|
||||
|
||||
|
||||
# Action-type-based severity calibration: not every atomic control
|
||||
# inherits the parent's severity. Definition and review controls are
|
||||
# typically medium, while implementation controls stay high.
|
||||
_ACTION_SEVERITY_CAP: dict[str, str] = {
|
||||
"define": "medium",
|
||||
"review": "medium",
|
||||
"document": "medium",
|
||||
"report": "medium",
|
||||
"test": "medium",
|
||||
"implement": "high",
|
||||
"configure": "high",
|
||||
"monitor": "high",
|
||||
"enforce": "high",
|
||||
"prevent": "high",
|
||||
"exclude": "high",
|
||||
"forbid": "high",
|
||||
"invalidate": "high",
|
||||
"issue": "high",
|
||||
"rotate": "medium",
|
||||
}
|
||||
|
||||
# Severity ordering for cap comparison
|
||||
_SEVERITY_ORDER = {"low": 0, "medium": 1, "high": 2, "critical": 3}
|
||||
|
||||
|
||||
def _calibrate_severity(parent_severity: str, action_type: str) -> str:
|
||||
"""Calibrate severity based on action type.
|
||||
|
||||
Implementation/enforcement inherits parent severity.
|
||||
Definition/review/test/documentation caps at medium.
|
||||
"""
|
||||
parent = _normalize_severity(parent_severity)
|
||||
cap = _ACTION_SEVERITY_CAP.get(action_type)
|
||||
if not cap:
|
||||
return parent
|
||||
# Return the lower of parent severity and action-type cap
|
||||
if _SEVERITY_ORDER.get(parent, 1) <= _SEVERITY_ORDER.get(cap, 1):
|
||||
return parent
|
||||
return cap
|
||||
|
||||
|
||||
# _template_fallback removed — replaced by _compose_deterministic engine
|
||||
|
||||
331
backend-compliance/compliance/services/v1_enrichment.py
Normal file
331
backend-compliance/compliance/services/v1_enrichment.py
Normal file
@@ -0,0 +1,331 @@
|
||||
"""V1 Control Enrichment Service — Match Eigenentwicklung controls to regulations.
|
||||
|
||||
Finds regulatory coverage for v1 controls (generation_strategy='ungrouped',
|
||||
pipeline_version=1, no source_citation) by embedding similarity search.
|
||||
|
||||
Reuses embedding + Qdrant helpers from control_dedup.py.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from database import SessionLocal
|
||||
from compliance.services.control_dedup import (
|
||||
get_embedding,
|
||||
qdrant_search_cross_regulation,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Similarity threshold — lower than dedup (0.85) since we want informational matches
|
||||
# Typical top scores for v1 controls are 0.70-0.77
|
||||
V1_MATCH_THRESHOLD = 0.70
|
||||
V1_MAX_MATCHES = 5
|
||||
|
||||
|
||||
def _is_eigenentwicklung_query() -> str:
|
||||
"""SQL WHERE clause identifying v1 Eigenentwicklung controls."""
|
||||
return """
|
||||
generation_strategy = 'ungrouped'
|
||||
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||
AND source_citation IS NULL
|
||||
AND parent_control_uuid IS NULL
|
||||
AND release_state NOT IN ('rejected', 'merged', 'deprecated')
|
||||
"""
|
||||
|
||||
|
||||
async def count_v1_controls() -> int:
|
||||
"""Count how many v1 Eigenentwicklung controls exist."""
|
||||
with SessionLocal() as db:
|
||||
row = db.execute(text(f"""
|
||||
SELECT COUNT(*) AS cnt
|
||||
FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
""")).fetchone()
|
||||
return row.cnt if row else 0
|
||||
|
||||
|
||||
async def enrich_v1_matches(
|
||||
dry_run: bool = True,
|
||||
batch_size: int = 100,
|
||||
offset: int = 0,
|
||||
) -> dict:
|
||||
"""Find regulatory matches for v1 Eigenentwicklung controls.
|
||||
|
||||
Args:
|
||||
dry_run: If True, only count — don't write matches.
|
||||
batch_size: Number of v1 controls to process per call.
|
||||
offset: Pagination offset (v1 control index).
|
||||
|
||||
Returns:
|
||||
Stats dict with counts, sample matches, and pagination info.
|
||||
"""
|
||||
with SessionLocal() as db:
|
||||
# 1. Load v1 controls (paginated)
|
||||
v1_controls = db.execute(text(f"""
|
||||
SELECT id, control_id, title, objective, category
|
||||
FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
ORDER BY control_id
|
||||
LIMIT :limit OFFSET :offset
|
||||
"""), {"limit": batch_size, "offset": offset}).fetchall()
|
||||
|
||||
# Count total for pagination
|
||||
total_row = db.execute(text(f"""
|
||||
SELECT COUNT(*) AS cnt
|
||||
FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
""")).fetchone()
|
||||
total_v1 = total_row.cnt if total_row else 0
|
||||
|
||||
if not v1_controls:
|
||||
return {
|
||||
"dry_run": dry_run,
|
||||
"processed": 0,
|
||||
"total_v1": total_v1,
|
||||
"message": "Kein weiterer Batch — alle v1 Controls verarbeitet.",
|
||||
}
|
||||
|
||||
if dry_run:
|
||||
return {
|
||||
"dry_run": True,
|
||||
"total_v1": total_v1,
|
||||
"offset": offset,
|
||||
"batch_size": batch_size,
|
||||
"sample_controls": [
|
||||
{
|
||||
"control_id": r.control_id,
|
||||
"title": r.title,
|
||||
"category": r.category,
|
||||
}
|
||||
for r in v1_controls[:20]
|
||||
],
|
||||
}
|
||||
|
||||
# 2. Process each v1 control
|
||||
processed = 0
|
||||
matches_inserted = 0
|
||||
errors = []
|
||||
sample_matches = []
|
||||
|
||||
for v1 in v1_controls:
|
||||
try:
|
||||
# Build search text
|
||||
search_text = f"{v1.title} — {v1.objective}"
|
||||
|
||||
# Get embedding
|
||||
embedding = await get_embedding(search_text)
|
||||
if not embedding:
|
||||
errors.append({
|
||||
"control_id": v1.control_id,
|
||||
"error": "Embedding fehlgeschlagen",
|
||||
})
|
||||
continue
|
||||
|
||||
# Search Qdrant (cross-regulation, no pattern filter)
|
||||
# Collection is atomic_controls_dedup (contains ~51k atomare Controls)
|
||||
results = await qdrant_search_cross_regulation(
|
||||
embedding, top_k=20,
|
||||
collection="atomic_controls_dedup",
|
||||
)
|
||||
|
||||
# For each hit: resolve to a regulatory parent with source_citation.
|
||||
# Atomic controls in Qdrant usually have parent_control_uuid → parent
|
||||
# has the source_citation. We deduplicate by parent to avoid
|
||||
# listing the same regulation multiple times.
|
||||
rank = 0
|
||||
seen_parents: set[str] = set()
|
||||
|
||||
for hit in results:
|
||||
score = hit.get("score", 0)
|
||||
if score < V1_MATCH_THRESHOLD:
|
||||
continue
|
||||
|
||||
payload = hit.get("payload", {})
|
||||
matched_uuid = payload.get("control_uuid")
|
||||
if not matched_uuid or matched_uuid == str(v1.id):
|
||||
continue
|
||||
|
||||
# Try the matched control itself first, then its parent
|
||||
matched_row = db.execute(text("""
|
||||
SELECT c.id, c.control_id, c.title, c.source_citation,
|
||||
c.severity, c.category, c.parent_control_uuid
|
||||
FROM canonical_controls c
|
||||
WHERE c.id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": matched_uuid}).fetchone()
|
||||
|
||||
if not matched_row:
|
||||
continue
|
||||
|
||||
# Resolve to regulatory control (one with source_citation)
|
||||
reg_row = matched_row
|
||||
if not reg_row.source_citation and reg_row.parent_control_uuid:
|
||||
# Look up parent — the parent has the source_citation
|
||||
parent_row = db.execute(text("""
|
||||
SELECT id, control_id, title, source_citation,
|
||||
severity, category, parent_control_uuid
|
||||
FROM canonical_controls
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
AND source_citation IS NOT NULL
|
||||
"""), {"uuid": str(reg_row.parent_control_uuid)}).fetchone()
|
||||
if parent_row:
|
||||
reg_row = parent_row
|
||||
|
||||
if not reg_row.source_citation:
|
||||
continue
|
||||
|
||||
# Deduplicate by parent UUID
|
||||
parent_key = str(reg_row.id)
|
||||
if parent_key in seen_parents:
|
||||
continue
|
||||
seen_parents.add(parent_key)
|
||||
|
||||
rank += 1
|
||||
if rank > V1_MAX_MATCHES:
|
||||
break
|
||||
|
||||
# Extract source info
|
||||
source_citation = reg_row.source_citation or {}
|
||||
matched_source = source_citation.get("source") if isinstance(source_citation, dict) else None
|
||||
matched_article = source_citation.get("article") if isinstance(source_citation, dict) else None
|
||||
|
||||
# Insert match — link to the regulatory parent (not the atomic child)
|
||||
db.execute(text("""
|
||||
INSERT INTO v1_control_matches
|
||||
(v1_control_uuid, matched_control_uuid, similarity_score,
|
||||
match_rank, matched_source, matched_article, match_method)
|
||||
VALUES
|
||||
(CAST(:v1_uuid AS uuid), CAST(:matched_uuid AS uuid), :score,
|
||||
:rank, :source, :article, 'embedding')
|
||||
ON CONFLICT (v1_control_uuid, matched_control_uuid) DO UPDATE
|
||||
SET similarity_score = EXCLUDED.similarity_score,
|
||||
match_rank = EXCLUDED.match_rank
|
||||
"""), {
|
||||
"v1_uuid": str(v1.id),
|
||||
"matched_uuid": str(reg_row.id),
|
||||
"score": round(score, 3),
|
||||
"rank": rank,
|
||||
"source": matched_source,
|
||||
"article": matched_article,
|
||||
})
|
||||
matches_inserted += 1
|
||||
|
||||
# Collect sample
|
||||
if len(sample_matches) < 20:
|
||||
sample_matches.append({
|
||||
"v1_control_id": v1.control_id,
|
||||
"v1_title": v1.title,
|
||||
"matched_control_id": reg_row.control_id,
|
||||
"matched_title": reg_row.title,
|
||||
"matched_source": matched_source,
|
||||
"matched_article": matched_article,
|
||||
"similarity_score": round(score, 3),
|
||||
"match_rank": rank,
|
||||
})
|
||||
|
||||
processed += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("V1 enrichment error for %s: %s", v1.control_id, e)
|
||||
errors.append({
|
||||
"control_id": v1.control_id,
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
db.commit()
|
||||
|
||||
# Pagination
|
||||
next_offset = offset + batch_size if len(v1_controls) == batch_size else None
|
||||
|
||||
return {
|
||||
"dry_run": False,
|
||||
"offset": offset,
|
||||
"batch_size": batch_size,
|
||||
"next_offset": next_offset,
|
||||
"total_v1": total_v1,
|
||||
"processed": processed,
|
||||
"matches_inserted": matches_inserted,
|
||||
"errors": errors[:10],
|
||||
"sample_matches": sample_matches,
|
||||
}
|
||||
|
||||
|
||||
async def get_v1_matches(control_uuid: str) -> list[dict]:
|
||||
"""Get all regulatory matches for a specific v1 control.
|
||||
|
||||
Args:
|
||||
control_uuid: The UUID of the v1 control.
|
||||
|
||||
Returns:
|
||||
List of match dicts with control details.
|
||||
"""
|
||||
with SessionLocal() as db:
|
||||
rows = db.execute(text("""
|
||||
SELECT
|
||||
m.similarity_score,
|
||||
m.match_rank,
|
||||
m.matched_source,
|
||||
m.matched_article,
|
||||
m.match_method,
|
||||
c.control_id AS matched_control_id,
|
||||
c.title AS matched_title,
|
||||
c.objective AS matched_objective,
|
||||
c.severity AS matched_severity,
|
||||
c.category AS matched_category,
|
||||
c.source_citation AS matched_source_citation
|
||||
FROM v1_control_matches m
|
||||
JOIN canonical_controls c ON c.id = m.matched_control_uuid
|
||||
WHERE m.v1_control_uuid = CAST(:uuid AS uuid)
|
||||
ORDER BY m.match_rank
|
||||
"""), {"uuid": control_uuid}).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"matched_control_id": r.matched_control_id,
|
||||
"matched_title": r.matched_title,
|
||||
"matched_objective": r.matched_objective,
|
||||
"matched_severity": r.matched_severity,
|
||||
"matched_category": r.matched_category,
|
||||
"matched_source": r.matched_source,
|
||||
"matched_article": r.matched_article,
|
||||
"matched_source_citation": r.matched_source_citation,
|
||||
"similarity_score": float(r.similarity_score),
|
||||
"match_rank": r.match_rank,
|
||||
"match_method": r.match_method,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
async def get_v1_enrichment_stats() -> dict:
|
||||
"""Get overview stats for v1 enrichment."""
|
||||
with SessionLocal() as db:
|
||||
total_v1 = db.execute(text(f"""
|
||||
SELECT COUNT(*) AS cnt FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
""")).fetchone()
|
||||
|
||||
matched_v1 = db.execute(text(f"""
|
||||
SELECT COUNT(DISTINCT m.v1_control_uuid) AS cnt
|
||||
FROM v1_control_matches m
|
||||
JOIN canonical_controls c ON c.id = m.v1_control_uuid
|
||||
WHERE {_is_eigenentwicklung_query().replace('release_state', 'c.release_state').replace('generation_strategy', 'c.generation_strategy').replace('pipeline_version', 'c.pipeline_version').replace('source_citation', 'c.source_citation').replace('parent_control_uuid', 'c.parent_control_uuid')}
|
||||
""")).fetchone()
|
||||
|
||||
total_matches = db.execute(text("""
|
||||
SELECT COUNT(*) AS cnt FROM v1_control_matches
|
||||
""")).fetchone()
|
||||
|
||||
avg_score = db.execute(text("""
|
||||
SELECT AVG(similarity_score) AS avg_score FROM v1_control_matches
|
||||
""")).fetchone()
|
||||
|
||||
return {
|
||||
"total_v1_controls": total_v1.cnt if total_v1 else 0,
|
||||
"v1_with_matches": matched_v1.cnt if matched_v1 else 0,
|
||||
"v1_without_matches": (total_v1.cnt if total_v1 else 0) - (matched_v1.cnt if matched_v1 else 0),
|
||||
"total_matches": total_matches.cnt if total_matches else 0,
|
||||
"avg_similarity_score": round(float(avg_score.avg_score), 3) if avg_score and avg_score.avg_score else None,
|
||||
}
|
||||
18
backend-compliance/migrations/080_v1_control_matches.sql
Normal file
18
backend-compliance/migrations/080_v1_control_matches.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- V1 Control Enrichment: Cross-reference table for matching
|
||||
-- Eigenentwicklung (v1, ungrouped, no source) → regulatorische Controls
|
||||
|
||||
CREATE TABLE IF NOT EXISTS v1_control_matches (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
v1_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
matched_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
similarity_score NUMERIC(4,3) NOT NULL,
|
||||
match_rank SMALLINT NOT NULL DEFAULT 1,
|
||||
matched_source TEXT, -- e.g. "DSGVO (EU) 2016/679"
|
||||
matched_article TEXT, -- e.g. "Art. 32"
|
||||
match_method VARCHAR(30) NOT NULL DEFAULT 'embedding',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CONSTRAINT uq_v1_match UNIQUE (v1_control_uuid, matched_control_uuid)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_v1m_v1 ON v1_control_matches(v1_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_v1m_matched ON v1_control_matches(matched_control_uuid);
|
||||
11
backend-compliance/migrations/081_obligation_dedup_state.sql
Normal file
11
backend-compliance/migrations/081_obligation_dedup_state.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Migration 081: Add 'duplicate' release_state for obligation deduplication
|
||||
--
|
||||
-- Allows marking duplicate obligation_candidates as 'duplicate' instead of
|
||||
-- deleting them, preserving traceability via merged_into_id.
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
DROP CONSTRAINT IF EXISTS obligation_candidates_release_state_check;
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
ADD CONSTRAINT obligation_candidates_release_state_check
|
||||
CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed', 'merged', 'duplicate'));
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Widen source_article and source_regulation to TEXT to handle long NIST references
|
||||
-- e.g. "SC-22 (und weitere redaktionelle Änderungen SC-7, SC-14, SC-17, ...)"
|
||||
ALTER TABLE control_parent_links ALTER COLUMN source_article TYPE TEXT;
|
||||
ALTER TABLE control_parent_links ALTER COLUMN source_regulation TYPE TEXT;
|
||||
@@ -443,18 +443,105 @@ class TestControlsMeta:
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# 4 sequential execute() calls
|
||||
total_r = MagicMock(); total_r.scalar.return_value = 100
|
||||
domain_r = MagicMock(); domain_r.fetchall.return_value = []
|
||||
source_r = MagicMock(); source_r.fetchall.return_value = []
|
||||
nosrc_r = MagicMock(); nosrc_r.scalar.return_value = 20
|
||||
db.execute.side_effect = [total_r, domain_r, source_r, nosrc_r]
|
||||
# Faceted meta does many execute() calls — use a default mock
|
||||
scalar_r = MagicMock()
|
||||
scalar_r.scalar.return_value = 100
|
||||
scalar_r.fetchall.return_value = []
|
||||
db.execute.return_value = scalar_r
|
||||
mock_cls.return_value = db
|
||||
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls-meta")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] == 100
|
||||
assert data["no_source_count"] == 20
|
||||
assert isinstance(data["domains"], list)
|
||||
assert isinstance(data["sources"], list)
|
||||
assert "type_counts" in data
|
||||
assert "severity_counts" in data
|
||||
assert "verification_method_counts" in data
|
||||
assert "category_counts" in data
|
||||
assert "evidence_type_counts" in data
|
||||
assert "release_state_counts" in data
|
||||
|
||||
|
||||
class TestObligationDedup:
|
||||
"""Tests for obligation deduplication endpoints."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_dedup_dry_run(self, mock_cls):
|
||||
db = MagicMock()
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
mock_cls.return_value = db
|
||||
|
||||
# Mock: 2 duplicate groups
|
||||
dup_row1 = MagicMock(candidate_id="OC-AUTH-001-01", cnt=3)
|
||||
dup_row2 = MagicMock(candidate_id="OC-AUTH-001-02", cnt=2)
|
||||
|
||||
# Entries for group 1
|
||||
import uuid
|
||||
uid1 = uuid.uuid4()
|
||||
uid2 = uuid.uuid4()
|
||||
uid3 = uuid.uuid4()
|
||||
entry1 = MagicMock(id=uid1, candidate_id="OC-AUTH-001-01", obligation_text="Text A", release_state="composed", created_at=datetime(2026, 1, 1, tzinfo=timezone.utc))
|
||||
entry2 = MagicMock(id=uid2, candidate_id="OC-AUTH-001-01", obligation_text="Text B", release_state="composed", created_at=datetime(2026, 1, 2, tzinfo=timezone.utc))
|
||||
entry3 = MagicMock(id=uid3, candidate_id="OC-AUTH-001-01", obligation_text="Text C", release_state="composed", created_at=datetime(2026, 1, 3, tzinfo=timezone.utc))
|
||||
|
||||
# Entries for group 2
|
||||
uid4 = uuid.uuid4()
|
||||
uid5 = uuid.uuid4()
|
||||
entry4 = MagicMock(id=uid4, candidate_id="OC-AUTH-001-02", obligation_text="Text D", release_state="composed", created_at=datetime(2026, 1, 1, tzinfo=timezone.utc))
|
||||
entry5 = MagicMock(id=uid5, candidate_id="OC-AUTH-001-02", obligation_text="Text E", release_state="composed", created_at=datetime(2026, 1, 2, tzinfo=timezone.utc))
|
||||
|
||||
# Side effects: 1) dup groups, 2) total count, 3) entries grp1, 4) entries grp2
|
||||
mock_result_groups = MagicMock()
|
||||
mock_result_groups.fetchall.return_value = [dup_row1, dup_row2]
|
||||
mock_result_total = MagicMock()
|
||||
mock_result_total.scalar.return_value = 2
|
||||
mock_result_entries1 = MagicMock()
|
||||
mock_result_entries1.fetchall.return_value = [entry1, entry2, entry3]
|
||||
mock_result_entries2 = MagicMock()
|
||||
mock_result_entries2.fetchall.return_value = [entry4, entry5]
|
||||
|
||||
db.execute.side_effect = [mock_result_groups, mock_result_total, mock_result_entries1, mock_result_entries2]
|
||||
|
||||
resp = _client.post("/api/compliance/v1/canonical/obligations/dedup?dry_run=true")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["dry_run"] is True
|
||||
assert data["stats"]["total_duplicate_groups"] == 2
|
||||
assert data["stats"]["kept"] == 2
|
||||
assert data["stats"]["marked_duplicate"] == 3 # 2 from grp1 + 1 from grp2
|
||||
# Dry run: no commit
|
||||
db.commit.assert_not_called()
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_dedup_stats(self, mock_cls):
|
||||
db = MagicMock()
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
mock_cls.return_value = db
|
||||
|
||||
# total, by_state, dup_groups, removable
|
||||
mock_total = MagicMock()
|
||||
mock_total.scalar.return_value = 76046
|
||||
mock_states = MagicMock()
|
||||
mock_states.fetchall.return_value = [
|
||||
MagicMock(release_state="composed", cnt=41217),
|
||||
MagicMock(release_state="duplicate", cnt=34829),
|
||||
]
|
||||
mock_dup_groups = MagicMock()
|
||||
mock_dup_groups.scalar.return_value = 0
|
||||
mock_removable = MagicMock()
|
||||
mock_removable.scalar.return_value = 0
|
||||
|
||||
db.execute.side_effect = [mock_total, mock_states, mock_dup_groups, mock_removable]
|
||||
|
||||
resp = _client.get("/api/compliance/v1/canonical/obligations/dedup-stats")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total_obligations"] == 76046
|
||||
assert data["by_state"]["composed"] == 41217
|
||||
assert data["by_state"]["duplicate"] == 34829
|
||||
assert data["pending_duplicate_groups"] == 0
|
||||
assert data["pending_removable_duplicates"] == 0
|
||||
|
||||
@@ -40,6 +40,8 @@ from compliance.services.decomposition_pass import (
|
||||
_format_citation,
|
||||
_compute_extraction_confidence,
|
||||
_normalize_severity,
|
||||
_calibrate_severity,
|
||||
_truncate_title,
|
||||
_compose_deterministic,
|
||||
_classify_action,
|
||||
_classify_object,
|
||||
@@ -63,6 +65,9 @@ from compliance.services.decomposition_pass import (
|
||||
_PATTERN_CANDIDATES_MAP,
|
||||
_PATTERN_CANDIDATES_BY_ACTION,
|
||||
_is_composite_obligation,
|
||||
_is_container_object,
|
||||
_ACTION_TEMPLATES,
|
||||
_ACTION_SEVERITY_CAP,
|
||||
)
|
||||
|
||||
|
||||
@@ -704,7 +709,8 @@ class TestComposeDeterministic:
|
||||
# Object placeholder should use parent_title
|
||||
assert "System Security" in ac.test_procedure[0]
|
||||
|
||||
def test_severity_inherited(self):
|
||||
def test_severity_calibrated(self):
|
||||
# implement caps at high — critical is reserved for parent-level controls
|
||||
ac = _compose_deterministic(
|
||||
obligation_text="Kritische Pflicht",
|
||||
action="implementieren",
|
||||
@@ -715,7 +721,7 @@ class TestComposeDeterministic:
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert ac.severity == "critical"
|
||||
assert ac.severity == "high"
|
||||
|
||||
def test_category_inherited(self):
|
||||
ac = _compose_deterministic(
|
||||
@@ -971,6 +977,76 @@ class TestObjectNormalization:
|
||||
assert "ue" in result
|
||||
assert "ä" not in result
|
||||
|
||||
# --- New tests for improved normalization (2026-03-28) ---
|
||||
|
||||
def test_qualifying_phrase_stripped(self):
|
||||
"""Prepositional qualifiers like 'bei X' are stripped."""
|
||||
base = _normalize_object("Eskalationsprozess")
|
||||
qualified = _normalize_object(
|
||||
"Eskalationsprozess bei Schwellenwertüberschreitung"
|
||||
)
|
||||
assert base == qualified
|
||||
|
||||
def test_fuer_phrase_stripped(self):
|
||||
"""'für kritische Systeme' qualifier is stripped."""
|
||||
base = _normalize_object("Backup-Verfahren")
|
||||
qualified = _normalize_object("Backup-Verfahren für kritische Systeme")
|
||||
assert base == qualified
|
||||
|
||||
def test_gemaess_phrase_stripped(self):
|
||||
"""'gemäß Artikel 32' qualifier is stripped."""
|
||||
base = _normalize_object("Verschlüsselung")
|
||||
qualified = _normalize_object("Verschlüsselung gemäß Artikel 32")
|
||||
assert base == qualified
|
||||
|
||||
def test_truncation_at_40_chars(self):
|
||||
"""Objects truncated at 40 chars at word boundary."""
|
||||
long_obj = "interner_eskalationsprozess_bei_schwellenwertueberschreitung_und_mehr"
|
||||
result = _normalize_object(long_obj)
|
||||
assert len(result) <= 40
|
||||
|
||||
def test_near_synonym_erkennung(self):
|
||||
"""'Früherkennung' and 'frühzeitige Erkennung' collapse."""
|
||||
a = _normalize_object("Früherkennung von Anomalien")
|
||||
b = _normalize_object("frühzeitige Erkennung von Angriffen")
|
||||
assert a == b
|
||||
|
||||
def test_near_synonym_eskalation(self):
|
||||
"""'Eskalationsprozess' and 'Eskalationsverfahren' collapse."""
|
||||
a = _normalize_object("Eskalationsprozess")
|
||||
b = _normalize_object("Eskalationsverfahren")
|
||||
assert a == b
|
||||
|
||||
def test_near_synonym_meldeprozess(self):
|
||||
"""'Meldeprozess' and 'Meldeverfahren' collapse to notification."""
|
||||
a = _normalize_object("Meldeprozess")
|
||||
b = _normalize_object("Meldeverfahren")
|
||||
assert a == b
|
||||
|
||||
def test_near_synonym_ueberwachung(self):
|
||||
"""'Überwachung' and 'Monitoring' collapse."""
|
||||
a = _normalize_object("Überwachung")
|
||||
b = _normalize_object("Monitoring")
|
||||
assert a == b
|
||||
|
||||
def test_trailing_noise_stripped(self):
|
||||
"""Trailing articles/prepositions are stripped."""
|
||||
result = _normalize_object("Schutz der")
|
||||
assert not result.endswith("_der")
|
||||
|
||||
def test_vendor_synonyms(self):
|
||||
"""Lieferant/Dienstleister/Auftragsverarbeiter collapse to vendor."""
|
||||
a = _normalize_object("Lieferant")
|
||||
b = _normalize_object("Dienstleister")
|
||||
c = _normalize_object("Auftragsverarbeiter")
|
||||
assert a == b == c
|
||||
|
||||
def test_patch_mgmt_synonyms(self):
|
||||
"""Patchmanagement/Aktualisierung collapse."""
|
||||
a = _normalize_object("Patchmanagement")
|
||||
b = _normalize_object("Softwareaktualisierung")
|
||||
assert a == b
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GAP 5: OUTPUT VALIDATOR TESTS
|
||||
@@ -2431,3 +2507,444 @@ class TestPass0bWithEnrichment:
|
||||
|
||||
# Invalid JSON
|
||||
assert _parse_citation("not json") == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TRUNCATE TITLE TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTruncateTitle:
|
||||
"""Tests for _truncate_title — word-boundary truncation."""
|
||||
|
||||
def test_short_title_unchanged(self):
|
||||
assert _truncate_title("Rate-Limiting umgesetzt") == "Rate-Limiting umgesetzt"
|
||||
|
||||
def test_exactly_80_unchanged(self):
|
||||
title = "A" * 80
|
||||
assert _truncate_title(title) == title
|
||||
|
||||
def test_long_title_cuts_at_word_boundary(self):
|
||||
title = "Maximale Payload-Groessen fuer API-Anfragen und API-Antworten definiert und technisch durchgesetzt"
|
||||
result = _truncate_title(title)
|
||||
assert len(result) <= 80
|
||||
assert not result.endswith(" ")
|
||||
# Should not cut mid-word
|
||||
assert result[-1].isalpha() or result[-1] in ("-", ")")
|
||||
|
||||
def test_no_mid_word_cut(self):
|
||||
# "definieren" would be cut to "defin" with naive [:80]
|
||||
title = "x" * 75 + " definieren"
|
||||
result = _truncate_title(title)
|
||||
assert "defin" not in result or "definieren" in result
|
||||
|
||||
def test_custom_max_len(self):
|
||||
result = _truncate_title("Rate-Limiting fuer alle Endpunkte", max_len=20)
|
||||
assert len(result) <= 20
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SEVERITY CALIBRATION TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCalibrateSeverity:
|
||||
"""Tests for _calibrate_severity — action-type-based severity."""
|
||||
|
||||
def test_implement_keeps_high(self):
|
||||
assert _calibrate_severity("high", "implement") == "high"
|
||||
|
||||
def test_define_caps_to_medium(self):
|
||||
assert _calibrate_severity("high", "define") == "medium"
|
||||
|
||||
def test_review_caps_to_medium(self):
|
||||
assert _calibrate_severity("high", "review") == "medium"
|
||||
|
||||
def test_test_caps_to_medium(self):
|
||||
assert _calibrate_severity("high", "test") == "medium"
|
||||
|
||||
def test_document_caps_to_medium(self):
|
||||
assert _calibrate_severity("high", "document") == "medium"
|
||||
|
||||
def test_monitor_keeps_high(self):
|
||||
assert _calibrate_severity("high", "monitor") == "high"
|
||||
|
||||
def test_low_parent_stays_low(self):
|
||||
# Even for implement, if parent is low, stays low
|
||||
assert _calibrate_severity("low", "implement") == "low"
|
||||
|
||||
def test_medium_parent_define_stays_medium(self):
|
||||
assert _calibrate_severity("medium", "define") == "medium"
|
||||
|
||||
def test_unknown_action_inherits_parent(self):
|
||||
assert _calibrate_severity("high", "unknown_action") == "high"
|
||||
|
||||
def test_critical_implement_caps_to_high(self):
|
||||
# implement caps at high — critical is reserved for parent-level controls
|
||||
assert _calibrate_severity("critical", "implement") == "high"
|
||||
|
||||
def test_critical_define_caps_to_medium(self):
|
||||
assert _calibrate_severity("critical", "define") == "medium"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# COMPOSE DETERMINISTIC — SEVERITY CALIBRATION INTEGRATION
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestComposeDeterministicSeverity:
|
||||
"""Verify _compose_deterministic uses calibrated severity."""
|
||||
|
||||
def test_define_action_gets_medium(self):
|
||||
atomic = _compose_deterministic(
|
||||
obligation_text="Payload-Grenzen sind verbindlich festzulegen.",
|
||||
action="definieren",
|
||||
object_="Payload-Grenzen",
|
||||
parent_title="API Ressourcen",
|
||||
parent_severity="high",
|
||||
parent_category="security",
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert atomic.severity == "medium"
|
||||
|
||||
def test_implement_action_keeps_high(self):
|
||||
atomic = _compose_deterministic(
|
||||
obligation_text="Rate-Limiting muss technisch umgesetzt werden.",
|
||||
action="implementieren",
|
||||
object_="Rate-Limiting",
|
||||
parent_title="API Ressourcen",
|
||||
parent_severity="high",
|
||||
parent_category="security",
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert atomic.severity == "high"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ERROR CLASS 1: NEGATIVE / PROHIBITIVE ACTION CLASSIFICATION
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNegativeActions:
|
||||
"""Tests for prohibitive action keywords → prevent/exclude/forbid."""
|
||||
|
||||
def test_duerfen_keine_maps_to_prevent(self):
|
||||
assert _classify_action("dürfen keine") == "prevent"
|
||||
|
||||
def test_duerfen_nicht_maps_to_prevent(self):
|
||||
assert _classify_action("dürfen nicht") == "prevent"
|
||||
|
||||
def test_darf_keine_maps_to_prevent(self):
|
||||
assert _classify_action("darf keine") == "prevent"
|
||||
|
||||
def test_verboten_maps_to_forbid(self):
|
||||
assert _classify_action("verboten") == "forbid"
|
||||
|
||||
def test_untersagt_maps_to_forbid(self):
|
||||
assert _classify_action("untersagt") == "forbid"
|
||||
|
||||
def test_nicht_zulaessig_maps_to_forbid(self):
|
||||
assert _classify_action("nicht zulässig") == "forbid"
|
||||
|
||||
def test_nicht_erlaubt_maps_to_forbid(self):
|
||||
assert _classify_action("nicht erlaubt") == "forbid"
|
||||
|
||||
def test_nicht_enthalten_maps_to_exclude(self):
|
||||
assert _classify_action("nicht enthalten") == "exclude"
|
||||
|
||||
def test_ausschliessen_maps_to_exclude(self):
|
||||
assert _classify_action("ausschließen") == "exclude"
|
||||
|
||||
def test_verhindern_maps_to_prevent(self):
|
||||
assert _classify_action("verhindern") == "prevent"
|
||||
|
||||
def test_unterbinden_maps_to_prevent(self):
|
||||
assert _classify_action("unterbinden") == "prevent"
|
||||
|
||||
def test_ablehnen_maps_to_exclude(self):
|
||||
assert _classify_action("ablehnen") == "exclude"
|
||||
|
||||
def test_nicht_uebertragen_maps_to_prevent(self):
|
||||
assert _classify_action("nicht übertragen") == "prevent"
|
||||
|
||||
def test_nicht_gespeichert_maps_to_prevent(self):
|
||||
assert _classify_action("nicht gespeichert") == "prevent"
|
||||
|
||||
def test_negative_action_has_higher_priority_than_implement(self):
|
||||
"""Negative keywords at start of ACTION_PRIORITY → picked over lower ones."""
|
||||
result = _classify_action("verhindern und dokumentieren")
|
||||
assert result == "prevent"
|
||||
|
||||
def test_prevent_template_exists(self):
|
||||
assert "prevent" in _ACTION_TEMPLATES
|
||||
assert "test_procedure" in _ACTION_TEMPLATES["prevent"]
|
||||
assert "evidence" in _ACTION_TEMPLATES["prevent"]
|
||||
|
||||
def test_exclude_template_exists(self):
|
||||
assert "exclude" in _ACTION_TEMPLATES
|
||||
assert "test_procedure" in _ACTION_TEMPLATES["exclude"]
|
||||
|
||||
def test_forbid_template_exists(self):
|
||||
assert "forbid" in _ACTION_TEMPLATES
|
||||
assert "test_procedure" in _ACTION_TEMPLATES["forbid"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ERROR CLASS 1b: SESSION / LIFECYCLE ACTIONS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSessionActions:
|
||||
"""Tests for session lifecycle action keywords."""
|
||||
|
||||
def test_ungueltig_machen_maps_to_invalidate(self):
|
||||
assert _classify_action("ungültig machen") == "invalidate"
|
||||
|
||||
def test_invalidieren_maps_to_invalidate(self):
|
||||
assert _classify_action("invalidieren") == "invalidate"
|
||||
|
||||
def test_widerrufen_maps_to_invalidate(self):
|
||||
assert _classify_action("widerrufen") == "invalidate"
|
||||
|
||||
def test_session_beenden_maps_to_invalidate(self):
|
||||
assert _classify_action("session beenden") == "invalidate"
|
||||
|
||||
def test_vergeben_maps_to_issue(self):
|
||||
assert _classify_action("vergeben") == "issue"
|
||||
|
||||
def test_erzeugen_maps_to_issue(self):
|
||||
assert _classify_action("erzeugen") == "issue"
|
||||
|
||||
def test_rotieren_maps_to_rotate(self):
|
||||
assert _classify_action("rotieren") == "rotate"
|
||||
|
||||
def test_erneuern_maps_to_rotate(self):
|
||||
assert _classify_action("erneuern") == "rotate"
|
||||
|
||||
def test_durchsetzen_maps_to_enforce(self):
|
||||
assert _classify_action("durchsetzen") == "enforce"
|
||||
|
||||
def test_erzwingen_maps_to_enforce(self):
|
||||
assert _classify_action("erzwingen") == "enforce"
|
||||
|
||||
def test_invalidate_template_exists(self):
|
||||
assert "invalidate" in _ACTION_TEMPLATES
|
||||
assert "test_procedure" in _ACTION_TEMPLATES["invalidate"]
|
||||
|
||||
def test_issue_template_exists(self):
|
||||
assert "issue" in _ACTION_TEMPLATES
|
||||
|
||||
def test_rotate_template_exists(self):
|
||||
assert "rotate" in _ACTION_TEMPLATES
|
||||
|
||||
def test_enforce_template_exists(self):
|
||||
assert "enforce" in _ACTION_TEMPLATES
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ERROR CLASS 2: CONTAINER OBJECT DETECTION
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestContainerObjectDetection:
|
||||
"""Tests for _is_container_object — broad objects that need decomposition."""
|
||||
|
||||
def test_sitzungsverwaltung_is_container(self):
|
||||
assert _is_container_object("Sitzungsverwaltung") is True
|
||||
|
||||
def test_session_management_is_container(self):
|
||||
assert _is_container_object("Session Management") is True
|
||||
|
||||
def test_token_schutz_is_container(self):
|
||||
assert _is_container_object("Token-Schutz") is True
|
||||
|
||||
def test_authentifizierungsmechanismen_is_container(self):
|
||||
assert _is_container_object("Authentifizierungsmechanismen") is True
|
||||
|
||||
def test_sicherheitsmassnahmen_is_container(self):
|
||||
assert _is_container_object("Sicherheitsmaßnahmen") is True
|
||||
|
||||
def test_zugriffskontrollmechanismen_is_container(self):
|
||||
assert _is_container_object("Zugriffskontrollmechanismen") is True
|
||||
|
||||
def test_sicherheitsarchitektur_is_container(self):
|
||||
assert _is_container_object("Sicherheitsarchitektur") is True
|
||||
|
||||
def test_compliance_anforderungen_is_container(self):
|
||||
assert _is_container_object("Compliance-Anforderungen") is True
|
||||
|
||||
def test_session_id_is_not_container(self):
|
||||
"""Specific objects like Session-ID are NOT containers."""
|
||||
assert _is_container_object("Session-ID") is False
|
||||
|
||||
def test_firewall_is_not_container(self):
|
||||
assert _is_container_object("Firewall") is False
|
||||
|
||||
def test_mfa_is_not_container(self):
|
||||
assert _is_container_object("MFA") is False
|
||||
|
||||
def test_verschluesselung_is_not_container(self):
|
||||
assert _is_container_object("Verschlüsselung") is False
|
||||
|
||||
def test_cookie_is_not_container(self):
|
||||
assert _is_container_object("Session-Cookie") is False
|
||||
|
||||
def test_empty_string_is_not_container(self):
|
||||
assert _is_container_object("") is False
|
||||
|
||||
def test_none_is_not_container(self):
|
||||
assert _is_container_object(None) is False
|
||||
|
||||
def test_container_in_compose_sets_atomicity(self):
|
||||
"""Container objects set _atomicity='container' and _requires_decomposition."""
|
||||
ac = _compose_deterministic(
|
||||
obligation_text="Sitzungsverwaltung muss abgesichert werden",
|
||||
action="implementieren",
|
||||
object_="Sitzungsverwaltung",
|
||||
parent_title="Session Security",
|
||||
parent_severity="high",
|
||||
parent_category="security",
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert ac._atomicity == "container"
|
||||
assert ac._requires_decomposition is True
|
||||
|
||||
def test_specific_object_is_atomic(self):
|
||||
"""Specific objects like Session-ID stay atomic."""
|
||||
ac = _compose_deterministic(
|
||||
obligation_text="Session-ID muss nach Logout gelöscht werden",
|
||||
action="implementieren",
|
||||
object_="Session-ID",
|
||||
parent_title="Session Security",
|
||||
parent_severity="high",
|
||||
parent_category="security",
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert ac._atomicity == "atomic"
|
||||
assert ac._requires_decomposition is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ERROR CLASS 3: SESSION-SPECIFIC OBJECT CLASSES
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSessionObjectClasses:
|
||||
"""Tests for session/cookie/jwt/federated_assertion object classification."""
|
||||
|
||||
def test_session_class(self):
|
||||
assert _classify_object("Session") == "session"
|
||||
|
||||
def test_sitzung_class(self):
|
||||
assert _classify_object("Sitzung") == "session"
|
||||
|
||||
def test_session_id_class(self):
|
||||
assert _classify_object("Session-ID") == "session"
|
||||
|
||||
def test_session_token_class(self):
|
||||
assert _classify_object("Session-Token") == "session"
|
||||
|
||||
def test_idle_timeout_class(self):
|
||||
assert _classify_object("Idle Timeout") == "session"
|
||||
|
||||
def test_logout_matches_record_via_log(self):
|
||||
"""'Logout' matches 'log' in record class (checked before session)."""
|
||||
# Ordering: record class checked before session — "log" substring matches
|
||||
assert _classify_object("Logout") == "record"
|
||||
|
||||
def test_abmeldung_matches_report_via_meldung(self):
|
||||
"""'Abmeldung' matches 'meldung' in report class (checked before session)."""
|
||||
assert _classify_object("Abmeldung") == "report"
|
||||
|
||||
def test_cookie_class(self):
|
||||
assert _classify_object("Cookie") == "cookie"
|
||||
|
||||
def test_session_cookie_matches_session_first(self):
|
||||
"""'Session-Cookie' matches 'session' in session class (checked before cookie)."""
|
||||
assert _classify_object("Session-Cookie") == "session"
|
||||
|
||||
def test_secure_flag_class(self):
|
||||
assert _classify_object("Secure-Flag") == "cookie"
|
||||
|
||||
def test_httponly_class(self):
|
||||
assert _classify_object("HttpOnly") == "cookie"
|
||||
|
||||
def test_samesite_class(self):
|
||||
assert _classify_object("SameSite") == "cookie"
|
||||
|
||||
def test_jwt_class(self):
|
||||
assert _classify_object("JWT") == "jwt"
|
||||
|
||||
def test_json_web_token_class(self):
|
||||
assert _classify_object("JSON Web Token") == "jwt"
|
||||
|
||||
def test_bearer_token_class(self):
|
||||
assert _classify_object("Bearer Token") == "jwt"
|
||||
|
||||
def test_saml_assertion_class(self):
|
||||
assert _classify_object("SAML Assertion") == "federated_assertion"
|
||||
|
||||
def test_oidc_class(self):
|
||||
assert _classify_object("OIDC Provider") == "federated_assertion"
|
||||
|
||||
def test_openid_class(self):
|
||||
assert _classify_object("OpenID Connect") == "federated_assertion"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ERROR CLASS 4: SEVERITY CAPS FOR NEW ACTION TYPES
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNewActionSeverityCaps:
|
||||
"""Tests for _ACTION_SEVERITY_CAP on new action types."""
|
||||
|
||||
def test_prevent_capped_at_high(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("prevent") == "high"
|
||||
|
||||
def test_exclude_capped_at_high(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("exclude") == "high"
|
||||
|
||||
def test_forbid_capped_at_high(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("forbid") == "high"
|
||||
|
||||
def test_invalidate_capped_at_high(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("invalidate") == "high"
|
||||
|
||||
def test_issue_capped_at_high(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("issue") == "high"
|
||||
|
||||
def test_rotate_capped_at_medium(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("rotate") == "medium"
|
||||
|
||||
def test_enforce_capped_at_high(self):
|
||||
assert _ACTION_SEVERITY_CAP.get("enforce") == "high"
|
||||
|
||||
def test_prevent_action_severity_in_compose(self):
|
||||
"""prevent + critical parent → capped to high."""
|
||||
ac = _compose_deterministic(
|
||||
obligation_text="Session-Tokens dürfen nicht im Klartext gespeichert werden",
|
||||
action="verhindern",
|
||||
object_="Klartextspeicherung",
|
||||
parent_title="Token Security",
|
||||
parent_severity="critical",
|
||||
parent_category="security",
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert ac.severity == "high"
|
||||
|
||||
def test_rotate_action_severity_in_compose(self):
|
||||
"""rotate + high parent → capped to medium."""
|
||||
ac = _compose_deterministic(
|
||||
obligation_text="Session-Tokens müssen regelmäßig rotiert werden",
|
||||
action="rotieren",
|
||||
object_="Session-Token",
|
||||
parent_title="Token Lifecycle",
|
||||
parent_severity="high",
|
||||
parent_category="security",
|
||||
is_test=False,
|
||||
is_reporting=False,
|
||||
)
|
||||
assert ac.severity == "medium"
|
||||
|
||||
234
backend-compliance/tests/test_v1_enrichment.py
Normal file
234
backend-compliance/tests/test_v1_enrichment.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""Tests for V1 Control Enrichment (Eigenentwicklung matching)."""
|
||||
import sys
|
||||
sys.path.insert(0, ".")
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from compliance.services.v1_enrichment import (
|
||||
enrich_v1_matches,
|
||||
get_v1_matches,
|
||||
count_v1_controls,
|
||||
)
|
||||
|
||||
|
||||
class TestV1EnrichmentDryRun:
|
||||
"""Dry-run mode should return statistics without touching DB."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dry_run_returns_stats(self):
|
||||
mock_v1 = [
|
||||
MagicMock(
|
||||
id="uuid-v1-1",
|
||||
control_id="ACC-013",
|
||||
title="Zugriffskontrolle",
|
||||
objective="Zugriff einschraenken",
|
||||
category="access",
|
||||
),
|
||||
MagicMock(
|
||||
id="uuid-v1-2",
|
||||
control_id="SEC-005",
|
||||
title="Verschluesselung",
|
||||
objective="Daten verschluesseln",
|
||||
category="encryption",
|
||||
),
|
||||
]
|
||||
|
||||
mock_count = MagicMock(cnt=863)
|
||||
|
||||
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||
db = MagicMock()
|
||||
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
# First call: v1 controls, second call: count
|
||||
db.execute.return_value.fetchall.return_value = mock_v1
|
||||
db.execute.return_value.fetchone.return_value = mock_count
|
||||
|
||||
result = await enrich_v1_matches(dry_run=True, batch_size=100, offset=0)
|
||||
|
||||
assert result["dry_run"] is True
|
||||
assert result["total_v1"] == 863
|
||||
assert len(result["sample_controls"]) == 2
|
||||
assert result["sample_controls"][0]["control_id"] == "ACC-013"
|
||||
|
||||
|
||||
class TestV1EnrichmentExecution:
|
||||
"""Execution mode should find matches and insert them."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_processes_and_inserts_matches(self):
|
||||
mock_v1 = [
|
||||
MagicMock(
|
||||
id="uuid-v1-1",
|
||||
control_id="ACC-013",
|
||||
title="Zugriffskontrolle",
|
||||
objective="Zugriff auf Systeme einschraenken",
|
||||
category="access",
|
||||
),
|
||||
]
|
||||
|
||||
mock_count = MagicMock(cnt=1)
|
||||
|
||||
# Atomic control found in Qdrant (has parent, no source_citation)
|
||||
mock_atomic_row = MagicMock(
|
||||
id="uuid-atomic-1",
|
||||
control_id="SEC-042-A01",
|
||||
title="Verschluesselung (atomar)",
|
||||
source_citation=None, # Atomic controls don't have source_citation
|
||||
parent_control_uuid="uuid-reg-1",
|
||||
severity="high",
|
||||
category="encryption",
|
||||
)
|
||||
# Parent control (has source_citation)
|
||||
mock_parent_row = MagicMock(
|
||||
id="uuid-reg-1",
|
||||
control_id="SEC-042",
|
||||
title="Verschluesselung personenbezogener Daten",
|
||||
source_citation={"source": "DSGVO (EU) 2016/679", "article": "Art. 32"},
|
||||
parent_control_uuid=None,
|
||||
severity="high",
|
||||
category="encryption",
|
||||
)
|
||||
|
||||
mock_qdrant_results = [
|
||||
{
|
||||
"score": 0.89,
|
||||
"payload": {
|
||||
"control_uuid": "uuid-atomic-1",
|
||||
"control_id": "SEC-042-A01",
|
||||
"title": "Verschluesselung (atomar)",
|
||||
},
|
||||
},
|
||||
{
|
||||
"score": 0.65, # Below threshold
|
||||
"payload": {
|
||||
"control_uuid": "uuid-reg-2",
|
||||
"control_id": "SEC-100",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||
db = MagicMock()
|
||||
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# Route queries to correct mock data
|
||||
def side_effect_execute(query, params=None):
|
||||
result = MagicMock()
|
||||
query_str = str(query)
|
||||
result.fetchall.return_value = mock_v1
|
||||
if "COUNT" in query_str:
|
||||
result.fetchone.return_value = mock_count
|
||||
elif "source_citation IS NOT NULL" in query_str:
|
||||
# Parent lookup
|
||||
result.fetchone.return_value = mock_parent_row
|
||||
elif "c.id = CAST" in query_str or "canonical_controls c" in query_str:
|
||||
# Direct atomic control lookup
|
||||
result.fetchone.return_value = mock_atomic_row
|
||||
else:
|
||||
result.fetchone.return_value = mock_count
|
||||
return result
|
||||
|
||||
db.execute.side_effect = side_effect_execute
|
||||
|
||||
with patch("compliance.services.v1_enrichment.get_embedding") as mock_embed, \
|
||||
patch("compliance.services.v1_enrichment.qdrant_search_cross_regulation") as mock_qdrant:
|
||||
mock_embed.return_value = [0.1] * 1024
|
||||
mock_qdrant.return_value = mock_qdrant_results
|
||||
|
||||
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=0)
|
||||
|
||||
assert result["dry_run"] is False
|
||||
assert result["processed"] == 1
|
||||
assert result["matches_inserted"] == 1
|
||||
assert len(result["sample_matches"]) == 1
|
||||
assert result["sample_matches"][0]["matched_control_id"] == "SEC-042"
|
||||
assert result["sample_matches"][0]["similarity_score"] == 0.89
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_batch_returns_done(self):
|
||||
mock_count = MagicMock(cnt=863)
|
||||
|
||||
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||
db = MagicMock()
|
||||
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
db.execute.return_value.fetchall.return_value = []
|
||||
db.execute.return_value.fetchone.return_value = mock_count
|
||||
|
||||
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=9999)
|
||||
|
||||
assert result["processed"] == 0
|
||||
assert "alle v1 Controls verarbeitet" in result["message"]
|
||||
|
||||
|
||||
class TestV1MatchesEndpoint:
|
||||
"""Test the matches retrieval."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_matches(self):
|
||||
mock_rows = [
|
||||
MagicMock(
|
||||
matched_control_id="SEC-042",
|
||||
matched_title="Verschluesselung",
|
||||
matched_objective="Daten verschluesseln",
|
||||
matched_severity="high",
|
||||
matched_category="encryption",
|
||||
matched_source="DSGVO (EU) 2016/679",
|
||||
matched_article="Art. 32",
|
||||
matched_source_citation={"source": "DSGVO (EU) 2016/679"},
|
||||
similarity_score=0.89,
|
||||
match_rank=1,
|
||||
match_method="embedding",
|
||||
),
|
||||
]
|
||||
|
||||
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||
db = MagicMock()
|
||||
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
db.execute.return_value.fetchall.return_value = mock_rows
|
||||
|
||||
result = await get_v1_matches("uuid-v1-1")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["matched_control_id"] == "SEC-042"
|
||||
assert result[0]["similarity_score"] == 0.89
|
||||
assert result[0]["matched_source"] == "DSGVO (EU) 2016/679"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_matches(self):
|
||||
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||
db = MagicMock()
|
||||
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
db.execute.return_value.fetchall.return_value = []
|
||||
|
||||
result = await get_v1_matches("uuid-nonexistent")
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
class TestEigenentwicklungDetection:
|
||||
"""Verify the Eigenentwicklung detection query."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_count_v1_controls(self):
|
||||
mock_count = MagicMock(cnt=863)
|
||||
|
||||
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||
db = MagicMock()
|
||||
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
db.execute.return_value.fetchone.return_value = mock_count
|
||||
|
||||
result = await count_v1_controls()
|
||||
|
||||
assert result == 863
|
||||
# Verify the query includes all conditions
|
||||
call_args = db.execute.call_args[0][0]
|
||||
query_str = str(call_args)
|
||||
assert "generation_strategy = 'ungrouped'" in query_str
|
||||
assert "source_citation IS NULL" in query_str
|
||||
assert "parent_control_uuid IS NULL" in query_str
|
||||
@@ -152,6 +152,8 @@ erDiagram
|
||||
| `POST` | `/v1/canonical/generate/backfill-domain` | Domain/Category/Target-Audience nachpflegen (Anthropic) |
|
||||
| `GET` | `/v1/canonical/blocked-sources` | Gesperrte Quellen (Rule 3) |
|
||||
| `POST` | `/v1/canonical/blocked-sources/cleanup` | Cleanup-Workflow starten |
|
||||
| `POST` | `/v1/canonical/obligations/dedup` | Obligation-Duplikate markieren (dry_run, batch_size, offset) |
|
||||
| `GET` | `/v1/canonical/obligations/dedup-stats` | Dedup-Statistik (total, by_state, pending) |
|
||||
|
||||
### Beispiel: Control abrufen
|
||||
|
||||
@@ -984,6 +986,37 @@ vom Parent-Obligation uebernommen.
|
||||
**Datei:** `compliance/services/decomposition_pass.py`
|
||||
**Test-Script:** `scripts/qa/test_pass0a.py` (standalone, speichert JSON)
|
||||
|
||||
#### Obligation Deduplizierung
|
||||
|
||||
Die Decomposition-Pipeline erzeugt pro Rich Control mehrere Obligation Candidates.
|
||||
Durch Wiederholungen in der Pipeline koennen identische `candidate_id`-Eintraege
|
||||
mehrfach existieren (z.B. 5x `OC-AUTH-839-01` mit leicht unterschiedlichem Text).
|
||||
|
||||
**Dedup-Strategie:** Pro `candidate_id` wird der aelteste Eintrag (`MIN(created_at)`)
|
||||
behalten. Alle anderen erhalten:
|
||||
|
||||
- `release_state = 'duplicate'`
|
||||
- `merged_into_id` → UUID des behaltenen Eintrags
|
||||
- `quality_flags.dedup_reason` → z.B. `"duplicate of OC-AUTH-839-01"`
|
||||
|
||||
**Endpunkte:**
|
||||
|
||||
```bash
|
||||
# Dry Run — zaehlt betroffene Duplikat-Gruppen
|
||||
curl -X POST "https://macmini:8002/api/compliance/v1/canonical/obligations/dedup?dry_run=true"
|
||||
|
||||
# Ausfuehren — markiert alle Duplikate
|
||||
curl -X POST "https://macmini:8002/api/compliance/v1/canonical/obligations/dedup?dry_run=false"
|
||||
|
||||
# Statistiken
|
||||
curl "https://macmini:8002/api/compliance/v1/canonical/obligations/dedup-stats"
|
||||
```
|
||||
|
||||
**Stand (2026-03-26):** 76.046 Obligations gesamt, davon 34.617 als `duplicate` markiert.
|
||||
41.043 aktive Obligations verbleiben (composed + validated).
|
||||
|
||||
**Migration:** `081_obligation_dedup_state.sql` — Fuegt `'duplicate'` zum `release_state` Constraint hinzu.
|
||||
|
||||
---
|
||||
|
||||
### Migration Passes (1-5)
|
||||
@@ -1033,6 +1066,9 @@ Die Crosswalk-Matrix bildet diese N:M-Beziehung ab.
|
||||
|---------|-------------|
|
||||
| `obligation_candidates` | Extrahierte atomare Pflichten aus Rich Controls |
|
||||
| `obligation_candidates.obligation_type` | `pflicht` / `empfehlung` / `kann` (3-Tier-Klassifizierung) |
|
||||
| `obligation_candidates.release_state` | `extracted` / `validated` / `rejected` / `composed` / `merged` / `duplicate` |
|
||||
| `obligation_candidates.merged_into_id` | UUID des behaltenen Eintrags (bei `duplicate`/`merged`) |
|
||||
| `obligation_candidates.quality_flags` | JSONB mit Metadaten (u.a. `dedup_reason`, `dedup_kept_id`) |
|
||||
| `canonical_controls.parent_control_uuid` | Self-Referenz zum Rich Control (neues Feld) |
|
||||
| `canonical_controls.decomposition_method` | Zerlegungsmethode (neues Feld) |
|
||||
| `canonical_controls.obligation_type` | Uebernommen von Obligation: pflicht/empfehlung/kann |
|
||||
|
||||
Reference in New Issue
Block a user