feat: V1 Control Enrichment — Eigenentwicklung-Label, regulatorisches Matching & Vergleichsansicht
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 39s
CI/CD / test-python-backend-compliance (push) Successful in 32s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 16s
CI/CD / validate-canonical-controls (push) Successful in 9s
CI/CD / Deploy (push) Successful in 4s
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 39s
CI/CD / test-python-backend-compliance (push) Successful in 32s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 16s
CI/CD / validate-canonical-controls (push) Successful in 9s
CI/CD / Deploy (push) Successful in 4s
863 v1-Controls (manuell geschrieben, ohne Rechtsgrundlage) werden als "Eigenentwicklung" gekennzeichnet und automatisch mit regulatorischen Controls (DSGVO, NIS2, OWASP etc.) per Embedding-Similarity abgeglichen. Backend: - Migration 080: v1_control_matches Tabelle (Cross-Reference) - v1_enrichment.py: Batch-Matching via BGE-M3 + Qdrant (Threshold 0.75) - 3 neue API-Endpoints: enrich-v1-matches, v1-matches, v1-enrichment-stats - 6 Tests (dry-run, execution, matches, pagination, detection) Frontend: - Orange "Eigenentwicklung"-Badge statt grauem "v1" (wenn kein Source) - "Regulatorische Abdeckung"-Sektion im ControlDetail mit Match-Karten - Side-by-Side V1CompareView (Eigenentwicklung vs. regulatorisch gedeckt) - Prev/Next Navigation durch alle Matches Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -135,6 +135,19 @@ export async function GET(request: NextRequest) {
|
|||||||
backendPath = '/api/compliance/v1/canonical/blocked-sources'
|
backendPath = '/api/compliance/v1/canonical/blocked-sources'
|
||||||
break
|
break
|
||||||
|
|
||||||
|
case 'v1-matches': {
|
||||||
|
const matchId = searchParams.get('id')
|
||||||
|
if (!matchId) {
|
||||||
|
return NextResponse.json({ error: 'Missing control id' }, { status: 400 })
|
||||||
|
}
|
||||||
|
backendPath = `/api/compliance/v1/canonical/controls/${encodeURIComponent(matchId)}/v1-matches`
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'v1-enrichment-stats':
|
||||||
|
backendPath = '/api/compliance/v1/canonical/controls/v1-enrichment-stats'
|
||||||
|
break
|
||||||
|
|
||||||
case 'controls-customer': {
|
case 'controls-customer': {
|
||||||
const custSeverity = searchParams.get('severity')
|
const custSeverity = searchParams.get('severity')
|
||||||
const custDomain = searchParams.get('domain')
|
const custDomain = searchParams.get('domain')
|
||||||
@@ -201,6 +214,11 @@ export async function POST(request: NextRequest) {
|
|||||||
backendPath = '/api/compliance/v1/canonical/generate/bulk-review'
|
backendPath = '/api/compliance/v1/canonical/generate/bulk-review'
|
||||||
} else if (endpoint === 'blocked-sources-cleanup') {
|
} else if (endpoint === 'blocked-sources-cleanup') {
|
||||||
backendPath = '/api/compliance/v1/canonical/blocked-sources/cleanup'
|
backendPath = '/api/compliance/v1/canonical/blocked-sources/cleanup'
|
||||||
|
} else if (endpoint === 'enrich-v1-matches') {
|
||||||
|
const dryRun = searchParams.get('dry_run') ?? 'true'
|
||||||
|
const batchSize = searchParams.get('batch_size') ?? '100'
|
||||||
|
const enrichOffset = searchParams.get('offset') ?? '0'
|
||||||
|
backendPath = `/api/compliance/v1/canonical/controls/enrich-v1-matches?dry_run=${dryRun}&batch_size=${batchSize}&offset=${enrichOffset}`
|
||||||
} else if (endpoint === 'similarity-check') {
|
} else if (endpoint === 'similarity-check') {
|
||||||
const controlId = searchParams.get('id')
|
const controlId = searchParams.get('id')
|
||||||
if (!controlId) {
|
if (!controlId) {
|
||||||
|
|||||||
@@ -308,7 +308,7 @@ export default function AtomicControlsPage() {
|
|||||||
<StateBadge state={ctrl.release_state} />
|
<StateBadge state={ctrl.release_state} />
|
||||||
<CategoryBadge category={ctrl.category} />
|
<CategoryBadge category={ctrl.category} />
|
||||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
|
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||||
</div>
|
</div>
|
||||||
<h3 className="text-sm font-medium text-gray-900 group-hover:text-violet-700">{ctrl.title}</h3>
|
<h3 className="text-sm font-medium text-gray-900 group-hover:text-violet-700">{ctrl.title}</h3>
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import {
|
|||||||
import {
|
import {
|
||||||
CanonicalControl, EFFORT_LABELS, BACKEND_URL,
|
CanonicalControl, EFFORT_LABELS, BACKEND_URL,
|
||||||
SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, EvidenceTypeBadge, TargetAudienceBadge,
|
SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, EvidenceTypeBadge, TargetAudienceBadge,
|
||||||
ObligationTypeBadge, GenerationStrategyBadge,
|
ObligationTypeBadge, GenerationStrategyBadge, isEigenentwicklung,
|
||||||
ExtractionMethodBadge, RegulationCountBadge,
|
ExtractionMethodBadge, RegulationCountBadge,
|
||||||
VERIFICATION_METHODS, CATEGORY_OPTIONS, EVIDENCE_TYPE_OPTIONS,
|
VERIFICATION_METHODS, CATEGORY_OPTIONS, EVIDENCE_TYPE_OPTIONS,
|
||||||
ObligationInfo, DocumentReference, MergedDuplicate, RegulationSummary,
|
ObligationInfo, DocumentReference, MergedDuplicate, RegulationSummary,
|
||||||
@@ -65,6 +65,20 @@ interface TraceabilityData {
|
|||||||
regulations_summary?: RegulationSummary[]
|
regulations_summary?: RegulationSummary[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface V1Match {
|
||||||
|
matched_control_id: string
|
||||||
|
matched_title: string
|
||||||
|
matched_objective: string
|
||||||
|
matched_severity: string
|
||||||
|
matched_category: string
|
||||||
|
matched_source: string | null
|
||||||
|
matched_article: string | null
|
||||||
|
matched_source_citation: Record<string, string> | null
|
||||||
|
similarity_score: number
|
||||||
|
match_rank: number
|
||||||
|
match_method: string
|
||||||
|
}
|
||||||
|
|
||||||
interface ControlDetailProps {
|
interface ControlDetailProps {
|
||||||
ctrl: CanonicalControl
|
ctrl: CanonicalControl
|
||||||
onBack: () => void
|
onBack: () => void
|
||||||
@@ -73,6 +87,7 @@ interface ControlDetailProps {
|
|||||||
onReview: (controlId: string, action: string) => void
|
onReview: (controlId: string, action: string) => void
|
||||||
onRefresh?: () => void
|
onRefresh?: () => void
|
||||||
onNavigateToControl?: (controlId: string) => void
|
onNavigateToControl?: (controlId: string) => void
|
||||||
|
onCompare?: (ctrl: CanonicalControl, matches: V1Match[]) => void
|
||||||
// Review mode navigation
|
// Review mode navigation
|
||||||
reviewMode?: boolean
|
reviewMode?: boolean
|
||||||
reviewIndex?: number
|
reviewIndex?: number
|
||||||
@@ -89,6 +104,7 @@ export function ControlDetail({
|
|||||||
onReview,
|
onReview,
|
||||||
onRefresh,
|
onRefresh,
|
||||||
onNavigateToControl,
|
onNavigateToControl,
|
||||||
|
onCompare,
|
||||||
reviewMode,
|
reviewMode,
|
||||||
reviewIndex = 0,
|
reviewIndex = 0,
|
||||||
reviewTotal = 0,
|
reviewTotal = 0,
|
||||||
@@ -101,6 +117,9 @@ export function ControlDetail({
|
|||||||
const [merging, setMerging] = useState(false)
|
const [merging, setMerging] = useState(false)
|
||||||
const [traceability, setTraceability] = useState<TraceabilityData | null>(null)
|
const [traceability, setTraceability] = useState<TraceabilityData | null>(null)
|
||||||
const [loadingTrace, setLoadingTrace] = useState(false)
|
const [loadingTrace, setLoadingTrace] = useState(false)
|
||||||
|
const [v1Matches, setV1Matches] = useState<V1Match[]>([])
|
||||||
|
const [loadingV1, setLoadingV1] = useState(false)
|
||||||
|
const eigenentwicklung = isEigenentwicklung(ctrl)
|
||||||
|
|
||||||
const loadTraceability = useCallback(async () => {
|
const loadTraceability = useCallback(async () => {
|
||||||
setLoadingTrace(true)
|
setLoadingTrace(true)
|
||||||
@@ -117,9 +136,21 @@ export function ControlDetail({
|
|||||||
finally { setLoadingTrace(false) }
|
finally { setLoadingTrace(false) }
|
||||||
}, [ctrl.control_id])
|
}, [ctrl.control_id])
|
||||||
|
|
||||||
|
const loadV1Matches = useCallback(async () => {
|
||||||
|
if (!eigenentwicklung) { setV1Matches([]); return }
|
||||||
|
setLoadingV1(true)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BACKEND_URL}?endpoint=v1-matches&id=${ctrl.control_id}`)
|
||||||
|
if (res.ok) setV1Matches(await res.json())
|
||||||
|
else setV1Matches([])
|
||||||
|
} catch { setV1Matches([]) }
|
||||||
|
finally { setLoadingV1(false) }
|
||||||
|
}, [ctrl.control_id, eigenentwicklung])
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
loadSimilarControls()
|
loadSimilarControls()
|
||||||
loadTraceability()
|
loadTraceability()
|
||||||
|
loadV1Matches()
|
||||||
setSelectedDuplicates(new Set())
|
setSelectedDuplicates(new Set())
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
}, [ctrl.control_id])
|
}, [ctrl.control_id])
|
||||||
@@ -187,7 +218,7 @@ export function ControlDetail({
|
|||||||
<CategoryBadge category={ctrl.category} />
|
<CategoryBadge category={ctrl.category} />
|
||||||
<EvidenceTypeBadge type={ctrl.evidence_type} />
|
<EvidenceTypeBadge type={ctrl.evidence_type} />
|
||||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
|
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||||
</div>
|
</div>
|
||||||
<h2 className="text-lg font-semibold text-gray-900 mt-1">{ctrl.title}</h2>
|
<h2 className="text-lg font-semibold text-gray-900 mt-1">{ctrl.title}</h2>
|
||||||
@@ -303,6 +334,75 @@ export function ControlDetail({
|
|||||||
</section>
|
</section>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Regulatorische Abdeckung (Eigenentwicklung) */}
|
||||||
|
{eigenentwicklung && (
|
||||||
|
<section className="bg-orange-50 border border-orange-200 rounded-lg p-4">
|
||||||
|
<div className="flex items-center gap-2 mb-3">
|
||||||
|
<Scale className="w-4 h-4 text-orange-600" />
|
||||||
|
<h3 className="text-sm font-semibold text-orange-900">
|
||||||
|
Regulatorische Abdeckung
|
||||||
|
</h3>
|
||||||
|
{loadingV1 && <span className="text-xs text-orange-400">Laden...</span>}
|
||||||
|
</div>
|
||||||
|
{v1Matches.length > 0 ? (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{v1Matches.map((match, i) => (
|
||||||
|
<div key={i} className="bg-white/60 border border-orange-100 rounded-lg p-3">
|
||||||
|
<div className="flex items-start justify-between gap-2">
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<div className="flex items-center gap-2 flex-wrap mb-1">
|
||||||
|
{match.matched_source && (
|
||||||
|
<span className="text-xs font-semibold text-blue-800 bg-blue-100 px-1.5 py-0.5 rounded">
|
||||||
|
{match.matched_source}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{match.matched_article && (
|
||||||
|
<span className="text-xs text-blue-700 bg-blue-50 px-1.5 py-0.5 rounded">
|
||||||
|
{match.matched_article}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
<span className={`text-xs font-medium px-1.5 py-0.5 rounded ${
|
||||||
|
match.similarity_score >= 0.85 ? 'bg-green-100 text-green-700' :
|
||||||
|
match.similarity_score >= 0.80 ? 'bg-yellow-100 text-yellow-700' :
|
||||||
|
'bg-gray-100 text-gray-600'
|
||||||
|
}`}>
|
||||||
|
{(match.similarity_score * 100).toFixed(0)}%
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-gray-800">
|
||||||
|
{onNavigateToControl ? (
|
||||||
|
<button
|
||||||
|
onClick={() => onNavigateToControl(match.matched_control_id)}
|
||||||
|
className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded hover:bg-purple-100 hover:underline mr-1.5"
|
||||||
|
>
|
||||||
|
{match.matched_control_id}
|
||||||
|
</button>
|
||||||
|
) : (
|
||||||
|
<span className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded mr-1.5">
|
||||||
|
{match.matched_control_id}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{match.matched_title}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
{onCompare && (
|
||||||
|
<button
|
||||||
|
onClick={() => onCompare(ctrl, v1Matches)}
|
||||||
|
className="text-xs text-orange-600 border border-orange-300 rounded px-2 py-1 hover:bg-orange-100 whitespace-nowrap flex-shrink-0"
|
||||||
|
>
|
||||||
|
Vergleichen
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
) : !loadingV1 ? (
|
||||||
|
<p className="text-sm text-orange-600">Keine regulatorische Abdeckung gefunden. Dieses Control ist eine reine Eigenentwicklung.</p>
|
||||||
|
) : null}
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Rechtsgrundlagen / Traceability (atomic controls) */}
|
{/* Rechtsgrundlagen / Traceability (atomic controls) */}
|
||||||
{traceability && traceability.parent_links.length > 0 && (
|
{traceability && traceability.parent_links.length > 0 && (
|
||||||
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
|
<section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import {
|
|||||||
// Compact Control Panel (used on both sides of the comparison)
|
// Compact Control Panel (used on both sides of the comparison)
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
function ControlPanel({ ctrl, label, highlight }: { ctrl: CanonicalControl; label: string; highlight?: boolean }) {
|
export function ControlPanel({ ctrl, label, highlight }: { ctrl: CanonicalControl; label: string; highlight?: boolean }) {
|
||||||
return (
|
return (
|
||||||
<div className={`flex flex-col h-full overflow-y-auto ${highlight ? 'bg-yellow-50' : 'bg-white'}`}>
|
<div className={`flex flex-col h-full overflow-y-auto ${highlight ? 'bg-yellow-50' : 'bg-white'}`}>
|
||||||
{/* Panel Header */}
|
{/* Panel Header */}
|
||||||
|
|||||||
@@ -0,0 +1,155 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
import {
|
||||||
|
ArrowLeft, ChevronLeft, SkipForward, Scale,
|
||||||
|
} from 'lucide-react'
|
||||||
|
import { CanonicalControl, BACKEND_URL } from './helpers'
|
||||||
|
import { ControlPanel } from './ReviewCompare'
|
||||||
|
|
||||||
|
interface V1Match {
|
||||||
|
matched_control_id: string
|
||||||
|
matched_title: string
|
||||||
|
matched_objective: string
|
||||||
|
matched_severity: string
|
||||||
|
matched_category: string
|
||||||
|
matched_source: string | null
|
||||||
|
matched_article: string | null
|
||||||
|
matched_source_citation: Record<string, string> | null
|
||||||
|
similarity_score: number
|
||||||
|
match_rank: number
|
||||||
|
match_method: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface V1CompareViewProps {
|
||||||
|
v1Control: CanonicalControl
|
||||||
|
matches: V1Match[]
|
||||||
|
onBack: () => void
|
||||||
|
onNavigateToControl?: (controlId: string) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function V1CompareView({ v1Control, matches, onBack, onNavigateToControl }: V1CompareViewProps) {
|
||||||
|
const [currentMatchIndex, setCurrentMatchIndex] = useState(0)
|
||||||
|
const [matchedControl, setMatchedControl] = useState<CanonicalControl | null>(null)
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
|
||||||
|
const currentMatch = matches[currentMatchIndex]
|
||||||
|
|
||||||
|
// Load the full matched control when index changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (!currentMatch) return
|
||||||
|
const load = async () => {
|
||||||
|
setLoading(true)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${encodeURIComponent(currentMatch.matched_control_id)}`)
|
||||||
|
if (res.ok) {
|
||||||
|
setMatchedControl(await res.json())
|
||||||
|
} else {
|
||||||
|
setMatchedControl(null)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
setMatchedControl(null)
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
load()
|
||||||
|
}, [currentMatch])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col h-full">
|
||||||
|
{/* Header */}
|
||||||
|
<div className="border-b border-gray-200 bg-white px-6 py-3 flex items-center justify-between">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<button onClick={onBack} className="text-gray-400 hover:text-gray-600">
|
||||||
|
<ArrowLeft className="w-5 h-5" />
|
||||||
|
</button>
|
||||||
|
<div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Scale className="w-4 h-4 text-orange-500" />
|
||||||
|
<span className="text-sm font-semibold text-gray-900">V1-Vergleich</span>
|
||||||
|
{currentMatch && (
|
||||||
|
<span className={`text-xs font-medium px-2 py-0.5 rounded-full ${
|
||||||
|
currentMatch.similarity_score >= 0.85 ? 'bg-green-100 text-green-700' :
|
||||||
|
currentMatch.similarity_score >= 0.80 ? 'bg-yellow-100 text-yellow-700' :
|
||||||
|
'bg-gray-100 text-gray-600'
|
||||||
|
}`}>
|
||||||
|
{(currentMatch.similarity_score * 100).toFixed(1)}% Aehnlichkeit
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
{/* Navigation */}
|
||||||
|
<div className="flex items-center gap-1">
|
||||||
|
<button
|
||||||
|
onClick={() => setCurrentMatchIndex(Math.max(0, currentMatchIndex - 1))}
|
||||||
|
disabled={currentMatchIndex === 0}
|
||||||
|
className="p-1 text-gray-400 hover:text-gray-600 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
<ChevronLeft className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
<span className="text-xs text-gray-500 font-medium">
|
||||||
|
{currentMatchIndex + 1} / {matches.length}
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={() => setCurrentMatchIndex(Math.min(matches.length - 1, currentMatchIndex + 1))}
|
||||||
|
disabled={currentMatchIndex >= matches.length - 1}
|
||||||
|
className="p-1 text-gray-400 hover:text-gray-600 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
<SkipForward className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Navigate to matched control */}
|
||||||
|
{onNavigateToControl && matchedControl && (
|
||||||
|
<button
|
||||||
|
onClick={() => { onBack(); onNavigateToControl(matchedControl.control_id) }}
|
||||||
|
className="px-3 py-1.5 text-sm text-purple-600 border border-purple-300 rounded-lg hover:bg-purple-50"
|
||||||
|
>
|
||||||
|
Zum Control
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Source info bar */}
|
||||||
|
{currentMatch && (currentMatch.matched_source || currentMatch.matched_article) && (
|
||||||
|
<div className="px-6 py-2 bg-blue-50 border-b border-blue-200 flex items-center gap-2 text-sm">
|
||||||
|
<Scale className="w-3.5 h-3.5 text-blue-600" />
|
||||||
|
{currentMatch.matched_source && (
|
||||||
|
<span className="font-semibold text-blue-900">{currentMatch.matched_source}</span>
|
||||||
|
)}
|
||||||
|
{currentMatch.matched_article && (
|
||||||
|
<span className="text-blue-700">{currentMatch.matched_article}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Side-by-Side Panels */}
|
||||||
|
<div className="flex-1 flex overflow-hidden">
|
||||||
|
{/* Left: V1 Eigenentwicklung */}
|
||||||
|
<div className="w-1/2 border-r border-gray-200 overflow-y-auto">
|
||||||
|
<ControlPanel ctrl={v1Control} label="Eigenentwicklung" highlight />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right: Regulatory match */}
|
||||||
|
<div className="w-1/2 overflow-y-auto">
|
||||||
|
{loading ? (
|
||||||
|
<div className="flex items-center justify-center h-full">
|
||||||
|
<div className="animate-spin rounded-full h-6 w-6 border-2 border-purple-600 border-t-transparent" />
|
||||||
|
</div>
|
||||||
|
) : matchedControl ? (
|
||||||
|
<ControlPanel ctrl={matchedControl} label="Regulatorisch gedeckt" />
|
||||||
|
) : (
|
||||||
|
<div className="flex items-center justify-center h-full text-gray-400 text-sm">
|
||||||
|
Control konnte nicht geladen werden
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -52,6 +52,7 @@ export interface CanonicalControl {
|
|||||||
parent_control_id?: string | null
|
parent_control_id?: string | null
|
||||||
parent_control_title?: string | null
|
parent_control_title?: string | null
|
||||||
decomposition_method?: string | null
|
decomposition_method?: string | null
|
||||||
|
pipeline_version?: number | string | null
|
||||||
created_at: string
|
created_at: string
|
||||||
updated_at: string
|
updated_at: string
|
||||||
}
|
}
|
||||||
@@ -293,7 +294,29 @@ export function TargetAudienceBadge({ audience }: { audience: string | string[]
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
export function GenerationStrategyBadge({ strategy }: { strategy: string | null | undefined }) {
|
export interface CanonicalControlPipelineInfo {
|
||||||
|
pipeline_version?: number | string | null
|
||||||
|
source_citation?: Record<string, string> | null
|
||||||
|
parent_control_uuid?: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isEigenentwicklung(ctrl: CanonicalControlPipelineInfo & { generation_strategy?: string | null }): boolean {
|
||||||
|
return (
|
||||||
|
(!ctrl.generation_strategy || ctrl.generation_strategy === 'ungrouped') &&
|
||||||
|
(!ctrl.pipeline_version || String(ctrl.pipeline_version) === '1') &&
|
||||||
|
!ctrl.source_citation &&
|
||||||
|
!ctrl.parent_control_uuid
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function GenerationStrategyBadge({ strategy, pipelineInfo }: {
|
||||||
|
strategy: string | null | undefined
|
||||||
|
pipelineInfo?: CanonicalControlPipelineInfo & { generation_strategy?: string | null }
|
||||||
|
}) {
|
||||||
|
// Eigenentwicklung detection: v1 + no source + no parent
|
||||||
|
if (pipelineInfo && isEigenentwicklung(pipelineInfo)) {
|
||||||
|
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-orange-100 text-orange-700">Eigenentwicklung</span>
|
||||||
|
}
|
||||||
if (!strategy || strategy === 'ungrouped') {
|
if (!strategy || strategy === 'ungrouped') {
|
||||||
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">v1</span>
|
return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">v1</span>
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import {
|
|||||||
import { ControlForm } from './components/ControlForm'
|
import { ControlForm } from './components/ControlForm'
|
||||||
import { ControlDetail } from './components/ControlDetail'
|
import { ControlDetail } from './components/ControlDetail'
|
||||||
import { ReviewCompare } from './components/ReviewCompare'
|
import { ReviewCompare } from './components/ReviewCompare'
|
||||||
|
import { V1CompareView } from './components/V1CompareView'
|
||||||
import { GeneratorModal } from './components/GeneratorModal'
|
import { GeneratorModal } from './components/GeneratorModal'
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -79,6 +80,17 @@ export default function ControlLibraryPage() {
|
|||||||
const [reviewDuplicates, setReviewDuplicates] = useState<CanonicalControl[]>([])
|
const [reviewDuplicates, setReviewDuplicates] = useState<CanonicalControl[]>([])
|
||||||
const [reviewRule3, setReviewRule3] = useState<CanonicalControl[]>([])
|
const [reviewRule3, setReviewRule3] = useState<CanonicalControl[]>([])
|
||||||
|
|
||||||
|
// V1 Compare mode
|
||||||
|
const [compareMode, setCompareMode] = useState(false)
|
||||||
|
const [compareV1Control, setCompareV1Control] = useState<CanonicalControl | null>(null)
|
||||||
|
const [compareMatches, setCompareMatches] = useState<Array<{
|
||||||
|
matched_control_id: string; matched_title: string; matched_objective: string
|
||||||
|
matched_severity: string; matched_category: string
|
||||||
|
matched_source: string | null; matched_article: string | null
|
||||||
|
matched_source_citation: Record<string, string> | null
|
||||||
|
similarity_score: number; match_rank: number; match_method: string
|
||||||
|
}>>([])
|
||||||
|
|
||||||
// Debounce search
|
// Debounce search
|
||||||
const searchTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
|
const searchTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -398,6 +410,27 @@ export default function ControlLibraryPage() {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// V1 COMPARE MODE
|
||||||
|
if (compareMode && compareV1Control) {
|
||||||
|
return (
|
||||||
|
<V1CompareView
|
||||||
|
v1Control={compareV1Control}
|
||||||
|
matches={compareMatches}
|
||||||
|
onBack={() => { setCompareMode(false) }}
|
||||||
|
onNavigateToControl={async (controlId: string) => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
|
||||||
|
if (res.ok) {
|
||||||
|
setCompareMode(false)
|
||||||
|
setSelectedControl(await res.json())
|
||||||
|
setMode('detail')
|
||||||
|
}
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// DETAIL MODE
|
// DETAIL MODE
|
||||||
if (mode === 'detail' && selectedControl) {
|
if (mode === 'detail' && selectedControl) {
|
||||||
const isDuplicateReview = reviewMode && reviewTab === 'duplicates'
|
const isDuplicateReview = reviewMode && reviewTab === 'duplicates'
|
||||||
@@ -467,6 +500,11 @@ export default function ControlLibraryPage() {
|
|||||||
onDelete={handleDelete}
|
onDelete={handleDelete}
|
||||||
onReview={handleReview}
|
onReview={handleReview}
|
||||||
onRefresh={fullReload}
|
onRefresh={fullReload}
|
||||||
|
onCompare={(ctrl, matches) => {
|
||||||
|
setCompareV1Control(ctrl)
|
||||||
|
setCompareMatches(matches)
|
||||||
|
setCompareMode(true)
|
||||||
|
}}
|
||||||
onNavigateToControl={async (controlId: string) => {
|
onNavigateToControl={async (controlId: string) => {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
|
const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
|
||||||
@@ -806,7 +844,7 @@ export default function ControlLibraryPage() {
|
|||||||
<CategoryBadge category={ctrl.category} />
|
<CategoryBadge category={ctrl.category} />
|
||||||
<EvidenceTypeBadge type={ctrl.evidence_type} />
|
<EvidenceTypeBadge type={ctrl.evidence_type} />
|
||||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} />
|
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||||
{ctrl.risk_score !== null && (
|
{ctrl.risk_score !== null && (
|
||||||
<span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span>
|
<span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span>
|
||||||
|
|||||||
@@ -547,6 +547,15 @@ async def atomic_stats():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/controls/v1-enrichment-stats")
|
||||||
|
async def v1_enrichment_stats_endpoint():
|
||||||
|
"""
|
||||||
|
Uebersicht: Wie viele v1 Controls haben regulatorische Abdeckung?
|
||||||
|
"""
|
||||||
|
from compliance.services.v1_enrichment import get_v1_enrichment_stats
|
||||||
|
return await get_v1_enrichment_stats()
|
||||||
|
|
||||||
|
|
||||||
@router.get("/controls/{control_id}")
|
@router.get("/controls/{control_id}")
|
||||||
async def get_control(control_id: str):
|
async def get_control(control_id: str):
|
||||||
"""Get a single canonical control by its control_id (e.g. AUTH-001)."""
|
"""Get a single canonical control by its control_id (e.g. AUTH-001)."""
|
||||||
@@ -1567,6 +1576,57 @@ async def list_licenses():
|
|||||||
return get_license_matrix(db)
|
return get_license_matrix(db)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# V1 ENRICHMENT (Eigenentwicklung → Regulatorische Abdeckung)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@router.post("/controls/enrich-v1-matches")
|
||||||
|
async def enrich_v1_matches_endpoint(
|
||||||
|
dry_run: bool = Query(True, description="Nur zaehlen, nicht schreiben"),
|
||||||
|
batch_size: int = Query(100, description="Controls pro Durchlauf"),
|
||||||
|
offset: int = Query(0, description="Offset fuer Paginierung"),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Findet regulatorische Abdeckung fuer v1 Eigenentwicklung Controls.
|
||||||
|
|
||||||
|
Eigenentwicklung = generation_strategy='ungrouped', pipeline_version=1,
|
||||||
|
source_citation IS NULL, parent_control_uuid IS NULL.
|
||||||
|
|
||||||
|
Workflow:
|
||||||
|
1. dry_run=true → Statistiken anzeigen
|
||||||
|
2. dry_run=false&batch_size=100&offset=0 → Erste 100 verarbeiten
|
||||||
|
3. Wiederholen mit next_offset bis fertig
|
||||||
|
"""
|
||||||
|
from compliance.services.v1_enrichment import enrich_v1_matches
|
||||||
|
return await enrich_v1_matches(
|
||||||
|
dry_run=dry_run,
|
||||||
|
batch_size=batch_size,
|
||||||
|
offset=offset,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/controls/{control_id}/v1-matches")
|
||||||
|
async def get_v1_matches_endpoint(control_id: str):
|
||||||
|
"""
|
||||||
|
Gibt regulatorische Matches fuer ein v1 Control zurueck.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste von Matches mit Control-Details, Source, Score.
|
||||||
|
"""
|
||||||
|
from compliance.services.v1_enrichment import get_v1_matches
|
||||||
|
|
||||||
|
# Resolve control_id to UUID
|
||||||
|
with SessionLocal() as db:
|
||||||
|
row = db.execute(text("""
|
||||||
|
SELECT id FROM canonical_controls WHERE control_id = :cid
|
||||||
|
"""), {"cid": control_id}).fetchone()
|
||||||
|
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||||
|
|
||||||
|
return await get_v1_matches(str(row.id))
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# INTERNAL HELPERS
|
# INTERNAL HELPERS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
301
backend-compliance/compliance/services/v1_enrichment.py
Normal file
301
backend-compliance/compliance/services/v1_enrichment.py
Normal file
@@ -0,0 +1,301 @@
|
|||||||
|
"""V1 Control Enrichment Service — Match Eigenentwicklung controls to regulations.
|
||||||
|
|
||||||
|
Finds regulatory coverage for v1 controls (generation_strategy='ungrouped',
|
||||||
|
pipeline_version=1, no source_citation) by embedding similarity search.
|
||||||
|
|
||||||
|
Reuses embedding + Qdrant helpers from control_dedup.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from database import SessionLocal
|
||||||
|
from compliance.services.control_dedup import (
|
||||||
|
get_embedding,
|
||||||
|
qdrant_search_cross_regulation,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Similarity threshold — lower than dedup (0.85) since we want informational matches
|
||||||
|
V1_MATCH_THRESHOLD = 0.75
|
||||||
|
V1_MAX_MATCHES = 5
|
||||||
|
|
||||||
|
|
||||||
|
def _is_eigenentwicklung_query() -> str:
|
||||||
|
"""SQL WHERE clause identifying v1 Eigenentwicklung controls."""
|
||||||
|
return """
|
||||||
|
generation_strategy = 'ungrouped'
|
||||||
|
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||||
|
AND source_citation IS NULL
|
||||||
|
AND parent_control_uuid IS NULL
|
||||||
|
AND release_state NOT IN ('rejected', 'merged', 'deprecated')
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def count_v1_controls() -> int:
|
||||||
|
"""Count how many v1 Eigenentwicklung controls exist."""
|
||||||
|
with SessionLocal() as db:
|
||||||
|
row = db.execute(text(f"""
|
||||||
|
SELECT COUNT(*) AS cnt
|
||||||
|
FROM canonical_controls
|
||||||
|
WHERE {_is_eigenentwicklung_query()}
|
||||||
|
""")).fetchone()
|
||||||
|
return row.cnt if row else 0
|
||||||
|
|
||||||
|
|
||||||
|
async def enrich_v1_matches(
|
||||||
|
dry_run: bool = True,
|
||||||
|
batch_size: int = 100,
|
||||||
|
offset: int = 0,
|
||||||
|
) -> dict:
|
||||||
|
"""Find regulatory matches for v1 Eigenentwicklung controls.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dry_run: If True, only count — don't write matches.
|
||||||
|
batch_size: Number of v1 controls to process per call.
|
||||||
|
offset: Pagination offset (v1 control index).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Stats dict with counts, sample matches, and pagination info.
|
||||||
|
"""
|
||||||
|
with SessionLocal() as db:
|
||||||
|
# 1. Load v1 controls (paginated)
|
||||||
|
v1_controls = db.execute(text(f"""
|
||||||
|
SELECT id, control_id, title, objective, category
|
||||||
|
FROM canonical_controls
|
||||||
|
WHERE {_is_eigenentwicklung_query()}
|
||||||
|
ORDER BY control_id
|
||||||
|
LIMIT :limit OFFSET :offset
|
||||||
|
"""), {"limit": batch_size, "offset": offset}).fetchall()
|
||||||
|
|
||||||
|
# Count total for pagination
|
||||||
|
total_row = db.execute(text(f"""
|
||||||
|
SELECT COUNT(*) AS cnt
|
||||||
|
FROM canonical_controls
|
||||||
|
WHERE {_is_eigenentwicklung_query()}
|
||||||
|
""")).fetchone()
|
||||||
|
total_v1 = total_row.cnt if total_row else 0
|
||||||
|
|
||||||
|
if not v1_controls:
|
||||||
|
return {
|
||||||
|
"dry_run": dry_run,
|
||||||
|
"processed": 0,
|
||||||
|
"total_v1": total_v1,
|
||||||
|
"message": "Kein weiterer Batch — alle v1 Controls verarbeitet.",
|
||||||
|
}
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
return {
|
||||||
|
"dry_run": True,
|
||||||
|
"total_v1": total_v1,
|
||||||
|
"offset": offset,
|
||||||
|
"batch_size": batch_size,
|
||||||
|
"sample_controls": [
|
||||||
|
{
|
||||||
|
"control_id": r.control_id,
|
||||||
|
"title": r.title,
|
||||||
|
"category": r.category,
|
||||||
|
}
|
||||||
|
for r in v1_controls[:20]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Process each v1 control
|
||||||
|
processed = 0
|
||||||
|
matches_inserted = 0
|
||||||
|
errors = []
|
||||||
|
sample_matches = []
|
||||||
|
|
||||||
|
for v1 in v1_controls:
|
||||||
|
try:
|
||||||
|
# Build search text
|
||||||
|
search_text = f"{v1.title} — {v1.objective}"
|
||||||
|
|
||||||
|
# Get embedding
|
||||||
|
embedding = await get_embedding(search_text)
|
||||||
|
if not embedding:
|
||||||
|
errors.append({
|
||||||
|
"control_id": v1.control_id,
|
||||||
|
"error": "Embedding fehlgeschlagen",
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Search Qdrant (cross-regulation, no pattern filter)
|
||||||
|
results = await qdrant_search_cross_regulation(
|
||||||
|
embedding, top_k=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter: only regulatory controls (with source_citation)
|
||||||
|
# and above threshold
|
||||||
|
rank = 0
|
||||||
|
for hit in results:
|
||||||
|
score = hit.get("score", 0)
|
||||||
|
if score < V1_MATCH_THRESHOLD:
|
||||||
|
continue
|
||||||
|
|
||||||
|
payload = hit.get("payload", {})
|
||||||
|
matched_uuid = payload.get("control_uuid")
|
||||||
|
if not matched_uuid or matched_uuid == str(v1.id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if matched control has source_citation
|
||||||
|
matched_row = db.execute(text("""
|
||||||
|
SELECT id, control_id, title, source_citation, severity, category
|
||||||
|
FROM canonical_controls
|
||||||
|
WHERE id = CAST(:uuid AS uuid)
|
||||||
|
AND source_citation IS NOT NULL
|
||||||
|
"""), {"uuid": matched_uuid}).fetchone()
|
||||||
|
|
||||||
|
if not matched_row:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rank += 1
|
||||||
|
if rank > V1_MAX_MATCHES:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract source info
|
||||||
|
source_citation = matched_row.source_citation or {}
|
||||||
|
matched_source = source_citation.get("source") if isinstance(source_citation, dict) else None
|
||||||
|
matched_article = source_citation.get("article") if isinstance(source_citation, dict) else None
|
||||||
|
|
||||||
|
# Insert match (ON CONFLICT skip)
|
||||||
|
db.execute(text("""
|
||||||
|
INSERT INTO v1_control_matches
|
||||||
|
(v1_control_uuid, matched_control_uuid, similarity_score,
|
||||||
|
match_rank, matched_source, matched_article, match_method)
|
||||||
|
VALUES
|
||||||
|
(CAST(:v1_uuid AS uuid), CAST(:matched_uuid AS uuid), :score,
|
||||||
|
:rank, :source, :article, 'embedding')
|
||||||
|
ON CONFLICT (v1_control_uuid, matched_control_uuid) DO UPDATE
|
||||||
|
SET similarity_score = EXCLUDED.similarity_score,
|
||||||
|
match_rank = EXCLUDED.match_rank
|
||||||
|
"""), {
|
||||||
|
"v1_uuid": str(v1.id),
|
||||||
|
"matched_uuid": str(matched_row.id),
|
||||||
|
"score": round(score, 3),
|
||||||
|
"rank": rank,
|
||||||
|
"source": matched_source,
|
||||||
|
"article": matched_article,
|
||||||
|
})
|
||||||
|
matches_inserted += 1
|
||||||
|
|
||||||
|
# Collect sample
|
||||||
|
if len(sample_matches) < 20:
|
||||||
|
sample_matches.append({
|
||||||
|
"v1_control_id": v1.control_id,
|
||||||
|
"v1_title": v1.title,
|
||||||
|
"matched_control_id": matched_row.control_id,
|
||||||
|
"matched_title": matched_row.title,
|
||||||
|
"matched_source": matched_source,
|
||||||
|
"matched_article": matched_article,
|
||||||
|
"similarity_score": round(score, 3),
|
||||||
|
"match_rank": rank,
|
||||||
|
})
|
||||||
|
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("V1 enrichment error for %s: %s", v1.control_id, e)
|
||||||
|
errors.append({
|
||||||
|
"control_id": v1.control_id,
|
||||||
|
"error": str(e),
|
||||||
|
})
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
# Pagination
|
||||||
|
next_offset = offset + batch_size if len(v1_controls) == batch_size else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"dry_run": False,
|
||||||
|
"offset": offset,
|
||||||
|
"batch_size": batch_size,
|
||||||
|
"next_offset": next_offset,
|
||||||
|
"total_v1": total_v1,
|
||||||
|
"processed": processed,
|
||||||
|
"matches_inserted": matches_inserted,
|
||||||
|
"errors": errors[:10],
|
||||||
|
"sample_matches": sample_matches,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def get_v1_matches(control_uuid: str) -> list[dict]:
|
||||||
|
"""Get all regulatory matches for a specific v1 control.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
control_uuid: The UUID of the v1 control.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of match dicts with control details.
|
||||||
|
"""
|
||||||
|
with SessionLocal() as db:
|
||||||
|
rows = db.execute(text("""
|
||||||
|
SELECT
|
||||||
|
m.similarity_score,
|
||||||
|
m.match_rank,
|
||||||
|
m.matched_source,
|
||||||
|
m.matched_article,
|
||||||
|
m.match_method,
|
||||||
|
c.control_id AS matched_control_id,
|
||||||
|
c.title AS matched_title,
|
||||||
|
c.objective AS matched_objective,
|
||||||
|
c.severity AS matched_severity,
|
||||||
|
c.category AS matched_category,
|
||||||
|
c.source_citation AS matched_source_citation
|
||||||
|
FROM v1_control_matches m
|
||||||
|
JOIN canonical_controls c ON c.id = m.matched_control_uuid
|
||||||
|
WHERE m.v1_control_uuid = CAST(:uuid AS uuid)
|
||||||
|
ORDER BY m.match_rank
|
||||||
|
"""), {"uuid": control_uuid}).fetchall()
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"matched_control_id": r.matched_control_id,
|
||||||
|
"matched_title": r.matched_title,
|
||||||
|
"matched_objective": r.matched_objective,
|
||||||
|
"matched_severity": r.matched_severity,
|
||||||
|
"matched_category": r.matched_category,
|
||||||
|
"matched_source": r.matched_source,
|
||||||
|
"matched_article": r.matched_article,
|
||||||
|
"matched_source_citation": r.matched_source_citation,
|
||||||
|
"similarity_score": float(r.similarity_score),
|
||||||
|
"match_rank": r.match_rank,
|
||||||
|
"match_method": r.match_method,
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def get_v1_enrichment_stats() -> dict:
|
||||||
|
"""Get overview stats for v1 enrichment."""
|
||||||
|
with SessionLocal() as db:
|
||||||
|
total_v1 = db.execute(text(f"""
|
||||||
|
SELECT COUNT(*) AS cnt FROM canonical_controls
|
||||||
|
WHERE {_is_eigenentwicklung_query()}
|
||||||
|
""")).fetchone()
|
||||||
|
|
||||||
|
matched_v1 = db.execute(text(f"""
|
||||||
|
SELECT COUNT(DISTINCT m.v1_control_uuid) AS cnt
|
||||||
|
FROM v1_control_matches m
|
||||||
|
JOIN canonical_controls c ON c.id = m.v1_control_uuid
|
||||||
|
WHERE {_is_eigenentwicklung_query().replace('release_state', 'c.release_state').replace('generation_strategy', 'c.generation_strategy').replace('pipeline_version', 'c.pipeline_version').replace('source_citation', 'c.source_citation').replace('parent_control_uuid', 'c.parent_control_uuid')}
|
||||||
|
""")).fetchone()
|
||||||
|
|
||||||
|
total_matches = db.execute(text("""
|
||||||
|
SELECT COUNT(*) AS cnt FROM v1_control_matches
|
||||||
|
""")).fetchone()
|
||||||
|
|
||||||
|
avg_score = db.execute(text("""
|
||||||
|
SELECT AVG(similarity_score) AS avg_score FROM v1_control_matches
|
||||||
|
""")).fetchone()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_v1_controls": total_v1.cnt if total_v1 else 0,
|
||||||
|
"v1_with_matches": matched_v1.cnt if matched_v1 else 0,
|
||||||
|
"v1_without_matches": (total_v1.cnt if total_v1 else 0) - (matched_v1.cnt if matched_v1 else 0),
|
||||||
|
"total_matches": total_matches.cnt if total_matches else 0,
|
||||||
|
"avg_similarity_score": round(float(avg_score.avg_score), 3) if avg_score and avg_score.avg_score else None,
|
||||||
|
}
|
||||||
18
backend-compliance/migrations/080_v1_control_matches.sql
Normal file
18
backend-compliance/migrations/080_v1_control_matches.sql
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
-- V1 Control Enrichment: Cross-reference table for matching
|
||||||
|
-- Eigenentwicklung (v1, ungrouped, no source) → regulatorische Controls
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS v1_control_matches (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
v1_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||||
|
matched_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||||
|
similarity_score NUMERIC(4,3) NOT NULL,
|
||||||
|
match_rank SMALLINT NOT NULL DEFAULT 1,
|
||||||
|
matched_source TEXT, -- e.g. "DSGVO (EU) 2016/679"
|
||||||
|
matched_article TEXT, -- e.g. "Art. 32"
|
||||||
|
match_method VARCHAR(30) NOT NULL DEFAULT 'embedding',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
CONSTRAINT uq_v1_match UNIQUE (v1_control_uuid, matched_control_uuid)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_v1m_v1 ON v1_control_matches(v1_control_uuid);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_v1m_matched ON v1_control_matches(matched_control_uuid);
|
||||||
220
backend-compliance/tests/test_v1_enrichment.py
Normal file
220
backend-compliance/tests/test_v1_enrichment.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
"""Tests for V1 Control Enrichment (Eigenentwicklung matching)."""
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, ".")
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
from compliance.services.v1_enrichment import (
|
||||||
|
enrich_v1_matches,
|
||||||
|
get_v1_matches,
|
||||||
|
count_v1_controls,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestV1EnrichmentDryRun:
|
||||||
|
"""Dry-run mode should return statistics without touching DB."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dry_run_returns_stats(self):
|
||||||
|
mock_v1 = [
|
||||||
|
MagicMock(
|
||||||
|
id="uuid-v1-1",
|
||||||
|
control_id="ACC-013",
|
||||||
|
title="Zugriffskontrolle",
|
||||||
|
objective="Zugriff einschraenken",
|
||||||
|
category="access",
|
||||||
|
),
|
||||||
|
MagicMock(
|
||||||
|
id="uuid-v1-2",
|
||||||
|
control_id="SEC-005",
|
||||||
|
title="Verschluesselung",
|
||||||
|
objective="Daten verschluesseln",
|
||||||
|
category="encryption",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_count = MagicMock(cnt=863)
|
||||||
|
|
||||||
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||||
|
db = MagicMock()
|
||||||
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||||
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||||
|
# First call: v1 controls, second call: count
|
||||||
|
db.execute.return_value.fetchall.return_value = mock_v1
|
||||||
|
db.execute.return_value.fetchone.return_value = mock_count
|
||||||
|
|
||||||
|
result = await enrich_v1_matches(dry_run=True, batch_size=100, offset=0)
|
||||||
|
|
||||||
|
assert result["dry_run"] is True
|
||||||
|
assert result["total_v1"] == 863
|
||||||
|
assert len(result["sample_controls"]) == 2
|
||||||
|
assert result["sample_controls"][0]["control_id"] == "ACC-013"
|
||||||
|
|
||||||
|
|
||||||
|
class TestV1EnrichmentExecution:
|
||||||
|
"""Execution mode should find matches and insert them."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_processes_and_inserts_matches(self):
|
||||||
|
mock_v1 = [
|
||||||
|
MagicMock(
|
||||||
|
id="uuid-v1-1",
|
||||||
|
control_id="ACC-013",
|
||||||
|
title="Zugriffskontrolle",
|
||||||
|
objective="Zugriff auf Systeme einschraenken",
|
||||||
|
category="access",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_count = MagicMock(cnt=1)
|
||||||
|
mock_matched_row = MagicMock(
|
||||||
|
id="uuid-reg-1",
|
||||||
|
control_id="SEC-042",
|
||||||
|
title="Verschluesselung personenbezogener Daten",
|
||||||
|
source_citation={"source": "DSGVO (EU) 2016/679", "article": "Art. 32"},
|
||||||
|
severity="high",
|
||||||
|
category="encryption",
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_qdrant_results = [
|
||||||
|
{
|
||||||
|
"score": 0.89,
|
||||||
|
"payload": {
|
||||||
|
"control_uuid": "uuid-reg-1",
|
||||||
|
"control_id": "SEC-042",
|
||||||
|
"title": "Verschluesselung",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"score": 0.65, # Below threshold
|
||||||
|
"payload": {
|
||||||
|
"control_uuid": "uuid-reg-2",
|
||||||
|
"control_id": "SEC-100",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||||
|
db = MagicMock()
|
||||||
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||||
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
# Multiple execute calls: v1 list, count, matched_row lookup, insert
|
||||||
|
call_count = [0]
|
||||||
|
def side_effect_execute(query, params=None):
|
||||||
|
call_count[0] += 1
|
||||||
|
result = MagicMock()
|
||||||
|
# fetchall for v1 controls list
|
||||||
|
result.fetchall.return_value = mock_v1
|
||||||
|
# fetchone for count and matched row
|
||||||
|
if "COUNT" in str(query):
|
||||||
|
result.fetchone.return_value = mock_count
|
||||||
|
elif "source_citation IS NOT NULL" in str(query):
|
||||||
|
result.fetchone.return_value = mock_matched_row
|
||||||
|
else:
|
||||||
|
result.fetchone.return_value = mock_count
|
||||||
|
return result
|
||||||
|
|
||||||
|
db.execute.side_effect = side_effect_execute
|
||||||
|
|
||||||
|
with patch("compliance.services.v1_enrichment.get_embedding") as mock_embed, \
|
||||||
|
patch("compliance.services.v1_enrichment.qdrant_search_cross_regulation") as mock_qdrant:
|
||||||
|
mock_embed.return_value = [0.1] * 1024
|
||||||
|
mock_qdrant.return_value = mock_qdrant_results
|
||||||
|
|
||||||
|
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=0)
|
||||||
|
|
||||||
|
assert result["dry_run"] is False
|
||||||
|
assert result["processed"] == 1
|
||||||
|
assert result["matches_inserted"] == 1
|
||||||
|
assert len(result["sample_matches"]) == 1
|
||||||
|
assert result["sample_matches"][0]["matched_control_id"] == "SEC-042"
|
||||||
|
assert result["sample_matches"][0]["similarity_score"] == 0.89
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_empty_batch_returns_done(self):
|
||||||
|
mock_count = MagicMock(cnt=863)
|
||||||
|
|
||||||
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||||
|
db = MagicMock()
|
||||||
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||||
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||||
|
db.execute.return_value.fetchall.return_value = []
|
||||||
|
db.execute.return_value.fetchone.return_value = mock_count
|
||||||
|
|
||||||
|
result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=9999)
|
||||||
|
|
||||||
|
assert result["processed"] == 0
|
||||||
|
assert "alle v1 Controls verarbeitet" in result["message"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestV1MatchesEndpoint:
|
||||||
|
"""Test the matches retrieval."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_returns_matches(self):
|
||||||
|
mock_rows = [
|
||||||
|
MagicMock(
|
||||||
|
matched_control_id="SEC-042",
|
||||||
|
matched_title="Verschluesselung",
|
||||||
|
matched_objective="Daten verschluesseln",
|
||||||
|
matched_severity="high",
|
||||||
|
matched_category="encryption",
|
||||||
|
matched_source="DSGVO (EU) 2016/679",
|
||||||
|
matched_article="Art. 32",
|
||||||
|
matched_source_citation={"source": "DSGVO (EU) 2016/679"},
|
||||||
|
similarity_score=0.89,
|
||||||
|
match_rank=1,
|
||||||
|
match_method="embedding",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||||
|
db = MagicMock()
|
||||||
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||||
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||||
|
db.execute.return_value.fetchall.return_value = mock_rows
|
||||||
|
|
||||||
|
result = await get_v1_matches("uuid-v1-1")
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["matched_control_id"] == "SEC-042"
|
||||||
|
assert result[0]["similarity_score"] == 0.89
|
||||||
|
assert result[0]["matched_source"] == "DSGVO (EU) 2016/679"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_empty_matches(self):
|
||||||
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||||
|
db = MagicMock()
|
||||||
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||||
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||||
|
db.execute.return_value.fetchall.return_value = []
|
||||||
|
|
||||||
|
result = await get_v1_matches("uuid-nonexistent")
|
||||||
|
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestEigenentwicklungDetection:
|
||||||
|
"""Verify the Eigenentwicklung detection query."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_count_v1_controls(self):
|
||||||
|
mock_count = MagicMock(cnt=863)
|
||||||
|
|
||||||
|
with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
|
||||||
|
db = MagicMock()
|
||||||
|
mock_session.return_value.__enter__ = MagicMock(return_value=db)
|
||||||
|
mock_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||||
|
db.execute.return_value.fetchone.return_value = mock_count
|
||||||
|
|
||||||
|
result = await count_v1_controls()
|
||||||
|
|
||||||
|
assert result == 863
|
||||||
|
# Verify the query includes all conditions
|
||||||
|
call_args = db.execute.call_args[0][0]
|
||||||
|
query_str = str(call_args)
|
||||||
|
assert "generation_strategy = 'ungrouped'" in query_str
|
||||||
|
assert "source_citation IS NULL" in query_str
|
||||||
|
assert "parent_control_uuid IS NULL" in query_str
|
||||||
Reference in New Issue
Block a user