feat: V1 Control Enrichment — Eigenentwicklung-Label, regulatorisches Matching & Vergleichsansicht

863 v1-Controls (manuell geschrieben, ohne Rechtsgrundlage) werden als "Eigenentwicklung" gekennzeichnet und automatisch mit regulatorischen Controls (DSGVO, NIS2, OWASP etc.) per Embedding-Similarity abgeglichen. Backend: - Migration 080: v1_control_matches Tabelle (Cross-Reference) - v1_enrichment.py: Batch-Matching via BGE-M3 + Qdrant (Threshold 0.75) - 3 neue API-Endpoints: enrich-v1-matches, v1-matches, v1-enrichment-stats - 6 Tests (dry-run, execution, matches, pagination, detection) Frontend: - Orange "Eigenentwicklung"-Badge statt grauem "v1" (wenn kein Source) - "Regulatorische Abdeckung"-Sektion im ControlDetail mit Match-Karten - Side-by-Side V1CompareView (Eigenentwicklung vs. regulatorisch gedeckt) - Prev/Next Navigation durch alle Matches Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 10:32:08 +01:00
parent cb034b8009
commit db7c207464
11 changed files with 939 additions and 6 deletions
@@ -135,6 +135,19 @@ export async function GET(request: NextRequest) {
        backendPath = '/api/compliance/v1/canonical/blocked-sources'
        break
      case 'v1-matches': {
        const matchId = searchParams.get('id')
        if (!matchId) {
          return NextResponse.json({ error: 'Missing control id' }, { status: 400 })
        }
        backendPath = `/api/compliance/v1/canonical/controls/${encodeURIComponent(matchId)}/v1-matches`
        break
      }
      case 'v1-enrichment-stats':
        backendPath = '/api/compliance/v1/canonical/controls/v1-enrichment-stats'
        break
      case 'controls-customer': {
        const custSeverity = searchParams.get('severity')
        const custDomain = searchParams.get('domain')
@@ -201,6 +214,11 @@ export async function POST(request: NextRequest) {
      backendPath = '/api/compliance/v1/canonical/generate/bulk-review'
    } else if (endpoint === 'blocked-sources-cleanup') {
      backendPath = '/api/compliance/v1/canonical/blocked-sources/cleanup'
    } else if (endpoint === 'enrich-v1-matches') {
      const dryRun = searchParams.get('dry_run') ?? 'true'
      const batchSize = searchParams.get('batch_size') ?? '100'
      const enrichOffset = searchParams.get('offset') ?? '0'
      backendPath = `/api/compliance/v1/canonical/controls/enrich-v1-matches?dry_run=${dryRun}&batch_size=${batchSize}&offset=${enrichOffset}`
    } else if (endpoint === 'similarity-check') {
      const controlId = searchParams.get('id')
      if (!controlId) {
@@ -308,7 +308,7 @@ export default function AtomicControlsPage() {
                    <StateBadge state={ctrl.release_state} />
                    <CategoryBadge category={ctrl.category} />
                    <TargetAudienceBadge audience={ctrl.target_audience} />
-                    <GenerationStrategyBadge strategy={ctrl.generation_strategy} />
+                    <GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
                    <ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
                  </div>
                  <h3 className="text-sm font-medium text-gray-900 group-hover:text-violet-700">{ctrl.title}</h3>
@@ -9,7 +9,7 @@ import {
 import {
  CanonicalControl, EFFORT_LABELS, BACKEND_URL,
  SeverityBadge, StateBadge, LicenseRuleBadge, VerificationMethodBadge, CategoryBadge, EvidenceTypeBadge, TargetAudienceBadge,
-  ObligationTypeBadge, GenerationStrategyBadge,
+  ObligationTypeBadge, GenerationStrategyBadge, isEigenentwicklung,
  ExtractionMethodBadge, RegulationCountBadge,
  VERIFICATION_METHODS, CATEGORY_OPTIONS, EVIDENCE_TYPE_OPTIONS,
  ObligationInfo, DocumentReference, MergedDuplicate, RegulationSummary,
@@ -65,6 +65,20 @@ interface TraceabilityData {
  regulations_summary?: RegulationSummary[]
 }
 interface V1Match {
  matched_control_id: string
  matched_title: string
  matched_objective: string
  matched_severity: string
  matched_category: string
  matched_source: string | null
  matched_article: string | null
  matched_source_citation: Record<string, string> | null
  similarity_score: number
  match_rank: number
  match_method: string
 }
 interface ControlDetailProps {
  ctrl: CanonicalControl
  onBack: () => void
@@ -73,6 +87,7 @@ interface ControlDetailProps {
  onReview: (controlId: string, action: string) => void
  onRefresh?: () => void
  onNavigateToControl?: (controlId: string) => void
  onCompare?: (ctrl: CanonicalControl, matches: V1Match[]) => void
  // Review mode navigation
  reviewMode?: boolean
  reviewIndex?: number
@@ -89,6 +104,7 @@ export function ControlDetail({
  onReview,
  onRefresh,
  onNavigateToControl,
  onCompare,
  reviewMode,
  reviewIndex = 0,
  reviewTotal = 0,
@@ -101,6 +117,9 @@ export function ControlDetail({
  const [merging, setMerging] = useState(false)
  const [traceability, setTraceability] = useState<TraceabilityData | null>(null)
  const [loadingTrace, setLoadingTrace] = useState(false)
  const [v1Matches, setV1Matches] = useState<V1Match[]>([])
  const [loadingV1, setLoadingV1] = useState(false)
  const eigenentwicklung = isEigenentwicklung(ctrl)
  const loadTraceability = useCallback(async () => {
    setLoadingTrace(true)
@@ -117,9 +136,21 @@ export function ControlDetail({
    finally { setLoadingTrace(false) }
  }, [ctrl.control_id])
  const loadV1Matches = useCallback(async () => {
    if (!eigenentwicklung) { setV1Matches([]); return }
    setLoadingV1(true)
    try {
      const res = await fetch(`${BACKEND_URL}?endpoint=v1-matches&id=${ctrl.control_id}`)
      if (res.ok) setV1Matches(await res.json())
      else setV1Matches([])
    } catch { setV1Matches([]) }
    finally { setLoadingV1(false) }
  }, [ctrl.control_id, eigenentwicklung])
  useEffect(() => {
    loadSimilarControls()
    loadTraceability()
    loadV1Matches()
    setSelectedDuplicates(new Set())
  // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [ctrl.control_id])
@@ -187,7 +218,7 @@ export function ControlDetail({
              <CategoryBadge category={ctrl.category} />
              <EvidenceTypeBadge type={ctrl.evidence_type} />
              <TargetAudienceBadge audience={ctrl.target_audience} />
-              <GenerationStrategyBadge strategy={ctrl.generation_strategy} />
+              <GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
              <ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
            </div>
            <h2 className="text-lg font-semibold text-gray-900 mt-1">{ctrl.title}</h2>
@@ -303,6 +334,75 @@ export function ControlDetail({
          </section>
        )}
        {/* Regulatorische Abdeckung (Eigenentwicklung) */}
        {eigenentwicklung && (
          <section className="bg-orange-50 border border-orange-200 rounded-lg p-4">
            <div className="flex items-center gap-2 mb-3">
              <Scale className="w-4 h-4 text-orange-600" />
              <h3 className="text-sm font-semibold text-orange-900">
                Regulatorische Abdeckung
              </h3>
              {loadingV1 && <span className="text-xs text-orange-400">Laden...</span>}
            </div>
            {v1Matches.length > 0 ? (
              <div className="space-y-2">
                {v1Matches.map((match, i) => (
                  <div key={i} className="bg-white/60 border border-orange-100 rounded-lg p-3">
                    <div className="flex items-start justify-between gap-2">
                      <div className="flex-1 min-w-0">
                        <div className="flex items-center gap-2 flex-wrap mb-1">
                          {match.matched_source && (
                            <span className="text-xs font-semibold text-blue-800 bg-blue-100 px-1.5 py-0.5 rounded">
                              {match.matched_source}
                            </span>
                          )}
                          {match.matched_article && (
                            <span className="text-xs text-blue-700 bg-blue-50 px-1.5 py-0.5 rounded">
                              {match.matched_article}
                            </span>
                          )}
                          <span className={`text-xs font-medium px-1.5 py-0.5 rounded ${
                            match.similarity_score >= 0.85 ? 'bg-green-100 text-green-700' :
                            match.similarity_score >= 0.80 ? 'bg-yellow-100 text-yellow-700' :
                            'bg-gray-100 text-gray-600'
                          }`}>
                            {(match.similarity_score * 100).toFixed(0)}%
                          </span>
                        </div>
                        <p className="text-sm text-gray-800">
                          {onNavigateToControl ? (
                            <button
                              onClick={() => onNavigateToControl(match.matched_control_id)}
                              className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded hover:bg-purple-100 hover:underline mr-1.5"
                            >
                              {match.matched_control_id}
                            </button>
                          ) : (
                            <span className="font-mono text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded mr-1.5">
                              {match.matched_control_id}
                            </span>
                          )}
                          {match.matched_title}
                        </p>
                      </div>
                      {onCompare && (
                        <button
                          onClick={() => onCompare(ctrl, v1Matches)}
                          className="text-xs text-orange-600 border border-orange-300 rounded px-2 py-1 hover:bg-orange-100 whitespace-nowrap flex-shrink-0"
                        >
                          Vergleichen
                        </button>
                      )}
                    </div>
                  </div>
                ))}
              </div>
            ) : !loadingV1 ? (
              <p className="text-sm text-orange-600">Keine regulatorische Abdeckung gefunden. Dieses Control ist eine reine Eigenentwicklung.</p>
            ) : null}
          </section>
        )}
        {/* Rechtsgrundlagen / Traceability (atomic controls) */}
        {traceability && traceability.parent_links.length > 0 && (
          <section className="bg-violet-50 border border-violet-200 rounded-lg p-4">
@@ -15,7 +15,7 @@ import {
 // Compact Control Panel (used on both sides of the comparison)
 // =============================================================================
-function ControlPanel({ ctrl, label, highlight }: { ctrl: CanonicalControl; label: string; highlight?: boolean }) {
+export function ControlPanel({ ctrl, label, highlight }: { ctrl: CanonicalControl; label: string; highlight?: boolean }) {
  return (
    <div className={`flex flex-col h-full overflow-y-auto ${highlight ? 'bg-yellow-50' : 'bg-white'}`}>
      {/* Panel Header */}
@@ -0,0 +1,155 @@
 'use client'
 import { useState, useEffect } from 'react'
 import {
  ArrowLeft, ChevronLeft, SkipForward, Scale,
 } from 'lucide-react'
 import { CanonicalControl, BACKEND_URL } from './helpers'
 import { ControlPanel } from './ReviewCompare'
 interface V1Match {
  matched_control_id: string
  matched_title: string
  matched_objective: string
  matched_severity: string
  matched_category: string
  matched_source: string | null
  matched_article: string | null
  matched_source_citation: Record<string, string> | null
  similarity_score: number
  match_rank: number
  match_method: string
 }
 interface V1CompareViewProps {
  v1Control: CanonicalControl
  matches: V1Match[]
  onBack: () => void
  onNavigateToControl?: (controlId: string) => void
 }
 export function V1CompareView({ v1Control, matches, onBack, onNavigateToControl }: V1CompareViewProps) {
  const [currentMatchIndex, setCurrentMatchIndex] = useState(0)
  const [matchedControl, setMatchedControl] = useState<CanonicalControl | null>(null)
  const [loading, setLoading] = useState(false)
  const currentMatch = matches[currentMatchIndex]
  // Load the full matched control when index changes
  useEffect(() => {
    if (!currentMatch) return
    const load = async () => {
      setLoading(true)
      try {
        const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${encodeURIComponent(currentMatch.matched_control_id)}`)
        if (res.ok) {
          setMatchedControl(await res.json())
        } else {
          setMatchedControl(null)
        }
      } catch {
        setMatchedControl(null)
      } finally {
        setLoading(false)
      }
    }
    load()
  }, [currentMatch])
  return (
    <div className="flex flex-col h-full">
      {/* Header */}
      <div className="border-b border-gray-200 bg-white px-6 py-3 flex items-center justify-between">
        <div className="flex items-center gap-3">
          <button onClick={onBack} className="text-gray-400 hover:text-gray-600">
            <ArrowLeft className="w-5 h-5" />
          </button>
          <div>
            <div className="flex items-center gap-2">
              <Scale className="w-4 h-4 text-orange-500" />
              <span className="text-sm font-semibold text-gray-900">V1-Vergleich</span>
              {currentMatch && (
                <span className={`text-xs font-medium px-2 py-0.5 rounded-full ${
                  currentMatch.similarity_score >= 0.85 ? 'bg-green-100 text-green-700' :
                  currentMatch.similarity_score >= 0.80 ? 'bg-yellow-100 text-yellow-700' :
                  'bg-gray-100 text-gray-600'
                }`}>
                  {(currentMatch.similarity_score * 100).toFixed(1)}% Aehnlichkeit
                </span>
              )}
            </div>
          </div>
        </div>
        <div className="flex items-center gap-2">
          {/* Navigation */}
          <div className="flex items-center gap-1">
            <button
              onClick={() => setCurrentMatchIndex(Math.max(0, currentMatchIndex - 1))}
              disabled={currentMatchIndex === 0}
              className="p-1 text-gray-400 hover:text-gray-600 disabled:opacity-30"
            >
              <ChevronLeft className="w-4 h-4" />
            </button>
            <span className="text-xs text-gray-500 font-medium">
              {currentMatchIndex + 1} / {matches.length}
            </span>
            <button
              onClick={() => setCurrentMatchIndex(Math.min(matches.length - 1, currentMatchIndex + 1))}
              disabled={currentMatchIndex >= matches.length - 1}
              className="p-1 text-gray-400 hover:text-gray-600 disabled:opacity-30"
            >
              <SkipForward className="w-4 h-4" />
            </button>
          </div>
          {/* Navigate to matched control */}
          {onNavigateToControl && matchedControl && (
            <button
              onClick={() => { onBack(); onNavigateToControl(matchedControl.control_id) }}
              className="px-3 py-1.5 text-sm text-purple-600 border border-purple-300 rounded-lg hover:bg-purple-50"
            >
              Zum Control
            </button>
          )}
        </div>
      </div>
      {/* Source info bar */}
      {currentMatch && (currentMatch.matched_source || currentMatch.matched_article) && (
        <div className="px-6 py-2 bg-blue-50 border-b border-blue-200 flex items-center gap-2 text-sm">
          <Scale className="w-3.5 h-3.5 text-blue-600" />
          {currentMatch.matched_source && (
            <span className="font-semibold text-blue-900">{currentMatch.matched_source}</span>
          )}
          {currentMatch.matched_article && (
            <span className="text-blue-700">{currentMatch.matched_article}</span>
          )}
        </div>
      )}
      {/* Side-by-Side Panels */}
      <div className="flex-1 flex overflow-hidden">
        {/* Left: V1 Eigenentwicklung */}
        <div className="w-1/2 border-r border-gray-200 overflow-y-auto">
          <ControlPanel ctrl={v1Control} label="Eigenentwicklung" highlight />
        </div>
        {/* Right: Regulatory match */}
        <div className="w-1/2 overflow-y-auto">
          {loading ? (
            <div className="flex items-center justify-center h-full">
              <div className="animate-spin rounded-full h-6 w-6 border-2 border-purple-600 border-t-transparent" />
            </div>
          ) : matchedControl ? (
            <ControlPanel ctrl={matchedControl} label="Regulatorisch gedeckt" />
          ) : (
            <div className="flex items-center justify-center h-full text-gray-400 text-sm">
              Control konnte nicht geladen werden
            </div>
          )}
        </div>
      </div>
    </div>
  )
 }
@@ -52,6 +52,7 @@ export interface CanonicalControl {
  parent_control_id?: string | null
  parent_control_title?: string | null
  decomposition_method?: string | null
  pipeline_version?: number | string | null
  created_at: string
  updated_at: string
 }
@@ -293,7 +294,29 @@ export function TargetAudienceBadge({ audience }: { audience: string | string[]
  )
 }
-export function GenerationStrategyBadge({ strategy }: { strategy: string | null | undefined }) {
+export interface CanonicalControlPipelineInfo {
  pipeline_version?: number | string | null
  source_citation?: Record<string, string> | null
  parent_control_uuid?: string | null
 }
 export function isEigenentwicklung(ctrl: CanonicalControlPipelineInfo & { generation_strategy?: string | null }): boolean {
  return (
    (!ctrl.generation_strategy || ctrl.generation_strategy === 'ungrouped') &&
    (!ctrl.pipeline_version || String(ctrl.pipeline_version) === '1') &&
    !ctrl.source_citation &&
    !ctrl.parent_control_uuid
  )
 }
 export function GenerationStrategyBadge({ strategy, pipelineInfo }: {
  strategy: string | null | undefined
  pipelineInfo?: CanonicalControlPipelineInfo & { generation_strategy?: string | null }
 }) {
  // Eigenentwicklung detection: v1 + no source + no parent
  if (pipelineInfo && isEigenentwicklung(pipelineInfo)) {
    return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-orange-100 text-orange-700">Eigenentwicklung</span>
  }
  if (!strategy || strategy === 'ungrouped') {
    return <span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-gray-100 text-gray-500">v1</span>
  }
@@ -15,6 +15,7 @@ import {
 import { ControlForm } from './components/ControlForm'
 import { ControlDetail } from './components/ControlDetail'
 import { ReviewCompare } from './components/ReviewCompare'
 import { V1CompareView } from './components/V1CompareView'
 import { GeneratorModal } from './components/GeneratorModal'
 // =============================================================================
@@ -79,6 +80,17 @@ export default function ControlLibraryPage() {
  const [reviewDuplicates, setReviewDuplicates] = useState<CanonicalControl[]>([])
  const [reviewRule3, setReviewRule3] = useState<CanonicalControl[]>([])
  // V1 Compare mode
  const [compareMode, setCompareMode] = useState(false)
  const [compareV1Control, setCompareV1Control] = useState<CanonicalControl | null>(null)
  const [compareMatches, setCompareMatches] = useState<Array<{
    matched_control_id: string; matched_title: string; matched_objective: string
    matched_severity: string; matched_category: string
    matched_source: string | null; matched_article: string | null
    matched_source_citation: Record<string, string> | null
    similarity_score: number; match_rank: number; match_method: string
  }>>([])
  // Debounce search
  const searchTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
  useEffect(() => {
@@ -398,6 +410,27 @@ export default function ControlLibraryPage() {
    )
  }
  // V1 COMPARE MODE
  if (compareMode && compareV1Control) {
    return (
      <V1CompareView
        v1Control={compareV1Control}
        matches={compareMatches}
        onBack={() => { setCompareMode(false) }}
        onNavigateToControl={async (controlId: string) => {
          try {
            const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
            if (res.ok) {
              setCompareMode(false)
              setSelectedControl(await res.json())
              setMode('detail')
            }
          } catch { /* ignore */ }
        }}
      />
    )
  }
  // DETAIL MODE
  if (mode === 'detail' && selectedControl) {
    const isDuplicateReview = reviewMode && reviewTab === 'duplicates'
@@ -467,6 +500,11 @@ export default function ControlLibraryPage() {
            onDelete={handleDelete}
            onReview={handleReview}
            onRefresh={fullReload}
            onCompare={(ctrl, matches) => {
              setCompareV1Control(ctrl)
              setCompareMatches(matches)
              setCompareMode(true)
            }}
            onNavigateToControl={async (controlId: string) => {
              try {
                const res = await fetch(`${BACKEND_URL}?endpoint=control&id=${controlId}`)
@@ -806,7 +844,7 @@ export default function ControlLibraryPage() {
                    <CategoryBadge category={ctrl.category} />
                    <EvidenceTypeBadge type={ctrl.evidence_type} />
                    <TargetAudienceBadge audience={ctrl.target_audience} />
-                    <GenerationStrategyBadge strategy={ctrl.generation_strategy} />
+                    <GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
                    <ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
                    {ctrl.risk_score !== null && (
                      <span className="text-xs text-gray-400">Score: {ctrl.risk_score}</span>
@@ -547,6 +547,15 @@ async def atomic_stats():
    }
@router.get("/controls/v1-enrichment-stats")
 async def v1_enrichment_stats_endpoint():
    """
    Uebersicht: Wie viele v1 Controls haben regulatorische Abdeckung?
    """
    from compliance.services.v1_enrichment import get_v1_enrichment_stats
    return await get_v1_enrichment_stats()
@router.get("/controls/{control_id}")
 async def get_control(control_id: str):
    """Get a single canonical control by its control_id (e.g. AUTH-001)."""
@@ -1567,6 +1576,57 @@ async def list_licenses():
        return get_license_matrix(db)
 # =============================================================================
 # V1 ENRICHMENT (Eigenentwicklung → Regulatorische Abdeckung)
 # =============================================================================
@router.post("/controls/enrich-v1-matches")
 async def enrich_v1_matches_endpoint(
    dry_run: bool = Query(True, description="Nur zaehlen, nicht schreiben"),
    batch_size: int = Query(100, description="Controls pro Durchlauf"),
    offset: int = Query(0, description="Offset fuer Paginierung"),
 ):
    """
    Findet regulatorische Abdeckung fuer v1 Eigenentwicklung Controls.
    Eigenentwicklung = generation_strategy='ungrouped', pipeline_version=1,
    source_citation IS NULL, parent_control_uuid IS NULL.
    Workflow:
      1. dry_run=true → Statistiken anzeigen
      2. dry_run=false&batch_size=100&offset=0 → Erste 100 verarbeiten
      3. Wiederholen mit next_offset bis fertig
    """
    from compliance.services.v1_enrichment import enrich_v1_matches
    return await enrich_v1_matches(
        dry_run=dry_run,
        batch_size=batch_size,
        offset=offset,
    )
@router.get("/controls/{control_id}/v1-matches")
 async def get_v1_matches_endpoint(control_id: str):
    """
    Gibt regulatorische Matches fuer ein v1 Control zurueck.
    Returns:
        Liste von Matches mit Control-Details, Source, Score.
    """
    from compliance.services.v1_enrichment import get_v1_matches
    # Resolve control_id to UUID
    with SessionLocal() as db:
        row = db.execute(text("""
            SELECT id FROM canonical_controls WHERE control_id = :cid
        """), {"cid": control_id}).fetchone()
    if not row:
        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
    return await get_v1_matches(str(row.id))
 # =============================================================================
 # INTERNAL HELPERS
 # =============================================================================
@@ -0,0 +1,301 @@
 """V1 Control Enrichment Service — Match Eigenentwicklung controls to regulations.
 Finds regulatory coverage for v1 controls (generation_strategy='ungrouped',
 pipeline_version=1, no source_citation) by embedding similarity search.
 Reuses embedding + Qdrant helpers from control_dedup.py.
 """
 import logging
 from typing import Optional
 from sqlalchemy import text
 from database import SessionLocal
 from compliance.services.control_dedup import (
    get_embedding,
    qdrant_search_cross_regulation,
 )
 logger = logging.getLogger(__name__)
 # Similarity threshold — lower than dedup (0.85) since we want informational matches
 V1_MATCH_THRESHOLD = 0.75
 V1_MAX_MATCHES = 5
 def _is_eigenentwicklung_query() -> str:
    """SQL WHERE clause identifying v1 Eigenentwicklung controls."""
    return """
        generation_strategy = 'ungrouped'
        AND (pipeline_version = '1' OR pipeline_version IS NULL)
        AND source_citation IS NULL
        AND parent_control_uuid IS NULL
        AND release_state NOT IN ('rejected', 'merged', 'deprecated')
    """
 async def count_v1_controls() -> int:
    """Count how many v1 Eigenentwicklung controls exist."""
    with SessionLocal() as db:
        row = db.execute(text(f"""
            SELECT COUNT(*) AS cnt
            FROM canonical_controls
            WHERE {_is_eigenentwicklung_query()}
        """)).fetchone()
        return row.cnt if row else 0
 async def enrich_v1_matches(
    dry_run: bool = True,
    batch_size: int = 100,
    offset: int = 0,
 ) -> dict:
    """Find regulatory matches for v1 Eigenentwicklung controls.
    Args:
        dry_run: If True, only count — don't write matches.
        batch_size: Number of v1 controls to process per call.
        offset: Pagination offset (v1 control index).
    Returns:
        Stats dict with counts, sample matches, and pagination info.
    """
    with SessionLocal() as db:
        # 1. Load v1 controls (paginated)
        v1_controls = db.execute(text(f"""
            SELECT id, control_id, title, objective, category
            FROM canonical_controls
            WHERE {_is_eigenentwicklung_query()}
            ORDER BY control_id
            LIMIT :limit OFFSET :offset
        """), {"limit": batch_size, "offset": offset}).fetchall()
        # Count total for pagination
        total_row = db.execute(text(f"""
            SELECT COUNT(*) AS cnt
            FROM canonical_controls
            WHERE {_is_eigenentwicklung_query()}
        """)).fetchone()
        total_v1 = total_row.cnt if total_row else 0
        if not v1_controls:
            return {
                "dry_run": dry_run,
                "processed": 0,
                "total_v1": total_v1,
                "message": "Kein weiterer Batch — alle v1 Controls verarbeitet.",
            }
        if dry_run:
            return {
                "dry_run": True,
                "total_v1": total_v1,
                "offset": offset,
                "batch_size": batch_size,
                "sample_controls": [
                    {
                        "control_id": r.control_id,
                        "title": r.title,
                        "category": r.category,
                    }
                    for r in v1_controls[:20]
                ],
            }
        # 2. Process each v1 control
        processed = 0
        matches_inserted = 0
        errors = []
        sample_matches = []
        for v1 in v1_controls:
            try:
                # Build search text
                search_text = f"{v1.title} — {v1.objective}"
                # Get embedding
                embedding = await get_embedding(search_text)
                if not embedding:
                    errors.append({
                        "control_id": v1.control_id,
                        "error": "Embedding fehlgeschlagen",
                    })
                    continue
                # Search Qdrant (cross-regulation, no pattern filter)
                results = await qdrant_search_cross_regulation(
                    embedding, top_k=10,
                )
                # Filter: only regulatory controls (with source_citation)
                # and above threshold
                rank = 0
                for hit in results:
                    score = hit.get("score", 0)
                    if score < V1_MATCH_THRESHOLD:
                        continue
                    payload = hit.get("payload", {})
                    matched_uuid = payload.get("control_uuid")
                    if not matched_uuid or matched_uuid == str(v1.id):
                        continue
                    # Check if matched control has source_citation
                    matched_row = db.execute(text("""
                        SELECT id, control_id, title, source_citation, severity, category
                        FROM canonical_controls
                        WHERE id = CAST(:uuid AS uuid)
                          AND source_citation IS NOT NULL
                    """), {"uuid": matched_uuid}).fetchone()
                    if not matched_row:
                        continue
                    rank += 1
                    if rank > V1_MAX_MATCHES:
                        break
                    # Extract source info
                    source_citation = matched_row.source_citation or {}
                    matched_source = source_citation.get("source") if isinstance(source_citation, dict) else None
                    matched_article = source_citation.get("article") if isinstance(source_citation, dict) else None
                    # Insert match (ON CONFLICT skip)
                    db.execute(text("""
                        INSERT INTO v1_control_matches
                            (v1_control_uuid, matched_control_uuid, similarity_score,
                             match_rank, matched_source, matched_article, match_method)
                        VALUES
                            (CAST(:v1_uuid AS uuid), CAST(:matched_uuid AS uuid), :score,
                             :rank, :source, :article, 'embedding')
                        ON CONFLICT (v1_control_uuid, matched_control_uuid) DO UPDATE
                        SET similarity_score = EXCLUDED.similarity_score,
                            match_rank = EXCLUDED.match_rank
                    """), {
                        "v1_uuid": str(v1.id),
                        "matched_uuid": str(matched_row.id),
                        "score": round(score, 3),
                        "rank": rank,
                        "source": matched_source,
                        "article": matched_article,
                    })
                    matches_inserted += 1
                    # Collect sample
                    if len(sample_matches) < 20:
                        sample_matches.append({
                            "v1_control_id": v1.control_id,
                            "v1_title": v1.title,
                            "matched_control_id": matched_row.control_id,
                            "matched_title": matched_row.title,
                            "matched_source": matched_source,
                            "matched_article": matched_article,
                            "similarity_score": round(score, 3),
                            "match_rank": rank,
                        })
                processed += 1
            except Exception as e:
                logger.warning("V1 enrichment error for %s: %s", v1.control_id, e)
                errors.append({
                    "control_id": v1.control_id,
                    "error": str(e),
                })
        db.commit()
    # Pagination
    next_offset = offset + batch_size if len(v1_controls) == batch_size else None
    return {
        "dry_run": False,
        "offset": offset,
        "batch_size": batch_size,
        "next_offset": next_offset,
        "total_v1": total_v1,
        "processed": processed,
        "matches_inserted": matches_inserted,
        "errors": errors[:10],
        "sample_matches": sample_matches,
    }
 async def get_v1_matches(control_uuid: str) -> list[dict]:
    """Get all regulatory matches for a specific v1 control.
    Args:
        control_uuid: The UUID of the v1 control.
    Returns:
        List of match dicts with control details.
    """
    with SessionLocal() as db:
        rows = db.execute(text("""
            SELECT
                m.similarity_score,
                m.match_rank,
                m.matched_source,
                m.matched_article,
                m.match_method,
                c.control_id AS matched_control_id,
                c.title AS matched_title,
                c.objective AS matched_objective,
                c.severity AS matched_severity,
                c.category AS matched_category,
                c.source_citation AS matched_source_citation
            FROM v1_control_matches m
            JOIN canonical_controls c ON c.id = m.matched_control_uuid
            WHERE m.v1_control_uuid = CAST(:uuid AS uuid)
            ORDER BY m.match_rank
        """), {"uuid": control_uuid}).fetchall()
        return [
            {
                "matched_control_id": r.matched_control_id,
                "matched_title": r.matched_title,
                "matched_objective": r.matched_objective,
                "matched_severity": r.matched_severity,
                "matched_category": r.matched_category,
                "matched_source": r.matched_source,
                "matched_article": r.matched_article,
                "matched_source_citation": r.matched_source_citation,
                "similarity_score": float(r.similarity_score),
                "match_rank": r.match_rank,
                "match_method": r.match_method,
            }
            for r in rows
        ]
 async def get_v1_enrichment_stats() -> dict:
    """Get overview stats for v1 enrichment."""
    with SessionLocal() as db:
        total_v1 = db.execute(text(f"""
            SELECT COUNT(*) AS cnt FROM canonical_controls
            WHERE {_is_eigenentwicklung_query()}
        """)).fetchone()
        matched_v1 = db.execute(text(f"""
            SELECT COUNT(DISTINCT m.v1_control_uuid) AS cnt
            FROM v1_control_matches m
            JOIN canonical_controls c ON c.id = m.v1_control_uuid
            WHERE {_is_eigenentwicklung_query().replace('release_state', 'c.release_state').replace('generation_strategy', 'c.generation_strategy').replace('pipeline_version', 'c.pipeline_version').replace('source_citation', 'c.source_citation').replace('parent_control_uuid', 'c.parent_control_uuid')}
        """)).fetchone()
        total_matches = db.execute(text("""
            SELECT COUNT(*) AS cnt FROM v1_control_matches
        """)).fetchone()
        avg_score = db.execute(text("""
            SELECT AVG(similarity_score) AS avg_score FROM v1_control_matches
        """)).fetchone()
        return {
            "total_v1_controls": total_v1.cnt if total_v1 else 0,
            "v1_with_matches": matched_v1.cnt if matched_v1 else 0,
            "v1_without_matches": (total_v1.cnt if total_v1 else 0) - (matched_v1.cnt if matched_v1 else 0),
            "total_matches": total_matches.cnt if total_matches else 0,
            "avg_similarity_score": round(float(avg_score.avg_score), 3) if avg_score and avg_score.avg_score else None,
        }
@@ -0,0 +1,18 @@
 -- V1 Control Enrichment: Cross-reference table for matching
 -- Eigenentwicklung (v1, ungrouped, no source) → regulatorische Controls
 CREATE TABLE IF NOT EXISTS v1_control_matches (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    v1_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
    matched_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
    similarity_score NUMERIC(4,3) NOT NULL,
    match_rank SMALLINT NOT NULL DEFAULT 1,
    matched_source TEXT,           -- e.g. "DSGVO (EU) 2016/679"
    matched_article TEXT,          -- e.g. "Art. 32"
    match_method VARCHAR(30) NOT NULL DEFAULT 'embedding',
    created_at TIMESTAMPTZ DEFAULT NOW(),
    CONSTRAINT uq_v1_match UNIQUE (v1_control_uuid, matched_control_uuid)
 );
 CREATE INDEX IF NOT EXISTS idx_v1m_v1 ON v1_control_matches(v1_control_uuid);
 CREATE INDEX IF NOT EXISTS idx_v1m_matched ON v1_control_matches(matched_control_uuid);
@@ -0,0 +1,220 @@
 """Tests for V1 Control Enrichment (Eigenentwicklung matching)."""
 import sys
 sys.path.insert(0, ".")
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 from compliance.services.v1_enrichment import (
    enrich_v1_matches,
    get_v1_matches,
    count_v1_controls,
 )
 class TestV1EnrichmentDryRun:
    """Dry-run mode should return statistics without touching DB."""
    @pytest.mark.asyncio
    async def test_dry_run_returns_stats(self):
        mock_v1 = [
            MagicMock(
                id="uuid-v1-1",
                control_id="ACC-013",
                title="Zugriffskontrolle",
                objective="Zugriff einschraenken",
                category="access",
            ),
            MagicMock(
                id="uuid-v1-2",
                control_id="SEC-005",
                title="Verschluesselung",
                objective="Daten verschluesseln",
                category="encryption",
            ),
        ]
        mock_count = MagicMock(cnt=863)
        with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
            db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=db)
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            # First call: v1 controls, second call: count
            db.execute.return_value.fetchall.return_value = mock_v1
            db.execute.return_value.fetchone.return_value = mock_count
            result = await enrich_v1_matches(dry_run=True, batch_size=100, offset=0)
        assert result["dry_run"] is True
        assert result["total_v1"] == 863
        assert len(result["sample_controls"]) == 2
        assert result["sample_controls"][0]["control_id"] == "ACC-013"
 class TestV1EnrichmentExecution:
    """Execution mode should find matches and insert them."""
    @pytest.mark.asyncio
    async def test_processes_and_inserts_matches(self):
        mock_v1 = [
            MagicMock(
                id="uuid-v1-1",
                control_id="ACC-013",
                title="Zugriffskontrolle",
                objective="Zugriff auf Systeme einschraenken",
                category="access",
            ),
        ]
        mock_count = MagicMock(cnt=1)
        mock_matched_row = MagicMock(
            id="uuid-reg-1",
            control_id="SEC-042",
            title="Verschluesselung personenbezogener Daten",
            source_citation={"source": "DSGVO (EU) 2016/679", "article": "Art. 32"},
            severity="high",
            category="encryption",
        )
        mock_qdrant_results = [
            {
                "score": 0.89,
                "payload": {
                    "control_uuid": "uuid-reg-1",
                    "control_id": "SEC-042",
                    "title": "Verschluesselung",
                },
            },
            {
                "score": 0.65,  # Below threshold
                "payload": {
                    "control_uuid": "uuid-reg-2",
                    "control_id": "SEC-100",
                },
            },
        ]
        with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
            db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=db)
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            # Multiple execute calls: v1 list, count, matched_row lookup, insert
            call_count = [0]
            def side_effect_execute(query, params=None):
                call_count[0] += 1
                result = MagicMock()
                # fetchall for v1 controls list
                result.fetchall.return_value = mock_v1
                # fetchone for count and matched row
                if "COUNT" in str(query):
                    result.fetchone.return_value = mock_count
                elif "source_citation IS NOT NULL" in str(query):
                    result.fetchone.return_value = mock_matched_row
                else:
                    result.fetchone.return_value = mock_count
                return result
            db.execute.side_effect = side_effect_execute
            with patch("compliance.services.v1_enrichment.get_embedding") as mock_embed, \
                 patch("compliance.services.v1_enrichment.qdrant_search_cross_regulation") as mock_qdrant:
                mock_embed.return_value = [0.1] * 1024
                mock_qdrant.return_value = mock_qdrant_results
                result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=0)
        assert result["dry_run"] is False
        assert result["processed"] == 1
        assert result["matches_inserted"] == 1
        assert len(result["sample_matches"]) == 1
        assert result["sample_matches"][0]["matched_control_id"] == "SEC-042"
        assert result["sample_matches"][0]["similarity_score"] == 0.89
    @pytest.mark.asyncio
    async def test_empty_batch_returns_done(self):
        mock_count = MagicMock(cnt=863)
        with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
            db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=db)
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            db.execute.return_value.fetchall.return_value = []
            db.execute.return_value.fetchone.return_value = mock_count
            result = await enrich_v1_matches(dry_run=False, batch_size=100, offset=9999)
        assert result["processed"] == 0
        assert "alle v1 Controls verarbeitet" in result["message"]
 class TestV1MatchesEndpoint:
    """Test the matches retrieval."""
    @pytest.mark.asyncio
    async def test_returns_matches(self):
        mock_rows = [
            MagicMock(
                matched_control_id="SEC-042",
                matched_title="Verschluesselung",
                matched_objective="Daten verschluesseln",
                matched_severity="high",
                matched_category="encryption",
                matched_source="DSGVO (EU) 2016/679",
                matched_article="Art. 32",
                matched_source_citation={"source": "DSGVO (EU) 2016/679"},
                similarity_score=0.89,
                match_rank=1,
                match_method="embedding",
            ),
        ]
        with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
            db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=db)
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            db.execute.return_value.fetchall.return_value = mock_rows
            result = await get_v1_matches("uuid-v1-1")
        assert len(result) == 1
        assert result[0]["matched_control_id"] == "SEC-042"
        assert result[0]["similarity_score"] == 0.89
        assert result[0]["matched_source"] == "DSGVO (EU) 2016/679"
    @pytest.mark.asyncio
    async def test_empty_matches(self):
        with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
            db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=db)
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            db.execute.return_value.fetchall.return_value = []
            result = await get_v1_matches("uuid-nonexistent")
        assert result == []
 class TestEigenentwicklungDetection:
    """Verify the Eigenentwicklung detection query."""
    @pytest.mark.asyncio
    async def test_count_v1_controls(self):
        mock_count = MagicMock(cnt=863)
        with patch("compliance.services.v1_enrichment.SessionLocal") as mock_session:
            db = MagicMock()
            mock_session.return_value.__enter__ = MagicMock(return_value=db)
            mock_session.return_value.__exit__ = MagicMock(return_value=False)
            db.execute.return_value.fetchone.return_value = mock_count
            result = await count_v1_controls()
        assert result == 863
        # Verify the query includes all conditions
        call_args = db.execute.call_args[0][0]
        query_str = str(call_args)
        assert "generation_strategy = 'ungrouped'" in query_str
        assert "source_citation IS NULL" in query_str
        assert "parent_control_uuid IS NULL" in query_str