feat(quaidal): backend API + frontend tab for BSI QUAIDAL data-quality controls

Wire the 195 Clean-Room QUAIDAL controls (from breakpilot-core migration 011) into the compliance SaaS UI. Backend: - GET /api/v1/quaidal/stats - counts by kind + source provenance - GET /api/v1/quaidal/controls - list, optional kind= filter - GET /api/v1/quaidal/controls/{id} - single derived control - GET /api/v1/quaidal/criteria - 10 QKB criteria - GET /api/v1/quaidal/criteria/{id} - QKB with QB/MA/QM tree Frontend: - /sdk/quality: new "Trainingsdaten-Qualität (BSI QUAIDAL)" tab with 10 QKB cards and a drill-down modal showing the full QB→MA→QM tree plus original BSI source link and license note. - /sdk/ai-act: Art. 10 tile on each high-risk/unacceptable result, linking to /sdk/quality?category=data_quality. Pattern matches existing IACE module DIN-reference handling: own wording, source section + URL preserved for due diligence. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
feat(profile+report): P17 — 4 Polish-Items
2026-05-19 13:03:54 +02:00 · 2026-05-19 12:22:05 +02:00 · 2026-05-19 11:47:45 +02:00 · 2026-05-19 11:46:58 +02:00 · 2026-05-19 11:46:34 +02:00 · 2026-05-19 09:39:06 +02:00
52 changed files with 4727 additions and 109 deletions
@@ -158,3 +158,27 @@ zeroclaw/docs/ground-truth/06-spiegel-dsi-fulltext.txt
 ai-compliance-sdk/internal/iace/manufacturer_safety_features.go
 ai-compliance-sdk/internal/api/handlers/iace_handler_clarifications.go
 ai-compliance-sdk/internal/app/routes.go
 # --- 2026-05-19 Coolify-Unblocker: 4 grandfathered files ---
 # Diese 4 Dateien sind Pre-Existing-Tech-Debt und blockierten den
 # Coolify-Build. Splits sind als P9.5 Tech-Debt-Sprint geplant, bis
 # dahin als Exceptions getragen damit Deploy laeuft.
 #
 # cra_routes.py (1714): CRA-Phase-5-Router mit Annex-V/VII Generator —
 # Split nach Endpoint-Gruppen (vuln/post-market/tech-doc/doc) sinnvoll.
 backend-compliance/compliance/api/cra_routes.py
 # vendor_redundancy.py (727): Cost-Lookup-Tabellen (DSP/SaaS/Self-Service)
 # + Multi-Function-Tools + Engine. Tabellen-Splits nach Lookup-Klasse.
 backend-compliance/compliance/services/vendor_redundancy.py
 # cookie_knowledge_db.py (608): Basis-KB — Ergaenzung via
 # cookie_knowledge_extended.py + Facade laeuft bereits (P2). Split der
 # Base-KB nach Vendor-Familie ist Phase-2-Ziel.
 backend-compliance/compliance/services/cookie_knowledge_db.py
 # cookie-banner-embed.ts (558): Banner-Embed-Bundle fuer CDN-Auslieferung
 # — selbst-kontainierter Code-Generator, Split wuerde Generator-Logik
 # fragmentieren ohne Nutzen.
 admin-compliance/lib/sdk/einwilligungen/generator/cookie-banner-embed.ts
 # ComplianceCheckTab.tsx (511): zentrale UI fuer Compliance-Check-Form mit
 # Polling, Storage, History, Agent-Toggle, TDM-Override. Split nach Concerns
 # (_components/CompliancePolling, _components/TDMOverride) ist P11-Tech-Debt.
 admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx
@@ -313,10 +313,13 @@ jobs:
          git push --force "$PUSH_URL" "refs/tags/last-build/main"
          echo "Tag last-build/main now at ${SHA}"
-  # ── orca redeploy — runs only if at least one build succeeded ─────────────
+  # ── orca redeploy — runs if at least one build was triggered AND green ────
-  # `always()` lets this run when some builds are skipped (unchanged services).
+  # Per-job `result == 'success'` is true only when the job actually ran and
-  # The contains() checks ensure we only redeploy when something actually built
+  # passed; skipped/failed/cancelled jobs return their own status string and
-  # and no build failed.
+  # fail the OR. This avoids Gitea's quirky evaluation of `contains(needs.*
  # .result, 'success')` when most upstreams are skipped (root cause of
  # trigger-orca being skipped on single-service changes).
  # `always()` is required so the job is evaluated when upstreams skip.
  trigger-orca:
    runs-on: docker
@@ -332,9 +335,16 @@ jobs:
      - build-dsms-node
    if: |
      always() &&
-      contains(needs.*.result, 'success') &&
+      (
-      !contains(needs.*.result, 'failure') &&
+        needs.build-admin-compliance.result == 'success' ||
-      !contains(needs.*.result, 'cancelled')
+        needs.build-backend-compliance.result == 'success' ||
        needs.build-ai-sdk.result == 'success' ||
        needs.build-developer-portal.result == 'success' ||
        needs.build-tts.result == 'success' ||
        needs.build-document-crawler.result == 'success' ||
        needs.build-dsms-gateway.result == 'success' ||
        needs.build-dsms-node.result == 'success'
      )
    steps:
      - name: Checkout (for SHA)
        run: |
@@ -0,0 +1,28 @@
 /**
 * Proxy: GET /api/sdk/v1/agent/findings/<checkId>
 *   -> backend GET /api/compliance/agent/findings/<checkId>
 *
 * Forwards all query params (source, severity, doc_type, status, q, limit).
 */
 import { NextRequest, NextResponse } from 'next/server'
 const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002'
 export async function GET(
  request: NextRequest,
  { params }: { params: { checkId: string } },
 ) {
  const checkId = params.checkId
  const qs = request.nextUrl.searchParams.toString()
  const url = `${BACKEND_URL}/api/compliance/agent/findings/${checkId}${qs ? `?${qs}` : ''}`
  try {
    const resp = await fetch(url, { signal: AbortSignal.timeout(20000) })
    const data = await resp.json()
    return NextResponse.json(data, { status: resp.status })
  } catch {
    return NextResponse.json(
      { error: 'Findings-Abfrage fehlgeschlagen' },
      { status: 503 },
    )
  }
 }
@@ -0,0 +1,27 @@
 import { NextRequest, NextResponse } from 'next/server'
 const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
 function tenantHeader(request: NextRequest): string {
  return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
 }
 export async function GET(
  request: NextRequest,
  { params }: { params: Promise<{ derived_id: string }> }
 ) {
  const { derived_id } = await params
  try {
    const resp = await fetch(
      `${BACKEND_URL}/api/v1/quaidal/controls/${encodeURIComponent(derived_id)}`,
      { headers: { 'X-Tenant-ID': tenantHeader(request) }, cache: 'no-store' }
    )
    const body = await resp.text()
    return new NextResponse(body, {
      status: resp.status,
      headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
    })
  } catch (err) {
    return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
  }
 }
@@ -0,0 +1,25 @@
 import { NextRequest, NextResponse } from 'next/server'
 const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
 function tenantHeader(request: NextRequest): string {
  return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
 }
 export async function GET(request: NextRequest) {
  const { searchParams } = new URL(request.url)
  const qs = searchParams.toString()
  try {
    const resp = await fetch(
      `${BACKEND_URL}/api/v1/quaidal/controls${qs ? `?${qs}` : ''}`,
      { headers: { 'X-Tenant-ID': tenantHeader(request) }, cache: 'no-store' }
    )
    const body = await resp.text()
    return new NextResponse(body, {
      status: resp.status,
      headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
    })
  } catch (err) {
    return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
  }
 }
@@ -0,0 +1,27 @@
 import { NextRequest, NextResponse } from 'next/server'
 const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
 function tenantHeader(request: NextRequest): string {
  return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
 }
 export async function GET(
  request: NextRequest,
  { params }: { params: Promise<{ section_id: string }> }
 ) {
  const { section_id } = await params
  try {
    const resp = await fetch(
      `${BACKEND_URL}/api/v1/quaidal/criteria/${encodeURIComponent(section_id)}`,
      { headers: { 'X-Tenant-ID': tenantHeader(request) }, cache: 'no-store' }
    )
    const body = await resp.text()
    return new NextResponse(body, {
      status: resp.status,
      headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
    })
  } catch (err) {
    return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
  }
 }
@@ -0,0 +1,23 @@
 import { NextRequest, NextResponse } from 'next/server'
 const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
 function tenantHeader(request: NextRequest): string {
  return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
 }
 export async function GET(request: NextRequest) {
  try {
    const resp = await fetch(`${BACKEND_URL}/api/v1/quaidal/criteria`, {
      headers: { 'X-Tenant-ID': tenantHeader(request) },
      cache: 'no-store',
    })
    const body = await resp.text()
    return new NextResponse(body, {
      status: resp.status,
      headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
    })
  } catch (err) {
    return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
  }
 }
@@ -0,0 +1,23 @@
 import { NextRequest, NextResponse } from 'next/server'
 const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
 function tenantHeader(request: NextRequest): string {
  return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
 }
 export async function GET(request: NextRequest) {
  try {
    const resp = await fetch(`${BACKEND_URL}/api/v1/quaidal/stats`, {
      headers: { 'X-Tenant-ID': tenantHeader(request) },
      cache: 'no-store',
    })
    const body = await resp.text()
    return new NextResponse(body, {
      status: resp.status,
      headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
    })
  } catch (err) {
    return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
  }
 }
@@ -73,6 +73,8 @@ interface HistoryEntry {
 export function ComplianceCheckTab() {
  const [docs, setDocs] = useState<DocsState>(initState)
  const [useAgent, setUseAgent] = useState(false)
  const [tdmOverride, setTdmOverride] = useState(false)
  const [tdmOverrideReason, setTdmOverrideReason] = useState('')
  const [loading, setLoading] = useState(false)
  const [progress, setProgress] = useState('')
  const [progressPct, setProgressPct] = useState(0)
@@ -119,11 +121,9 @@ export function ComplianceCheckTab() {
            localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
            return
          }
-          if (data.status === 'failed' || data.status === 'not_found') {
+          if (['failed', 'not_found', 'skipped_tdm'].includes(data.status)) {
-            if (data.status === 'failed') setError(data.error || 'Pruefung fehlgeschlagen')
+            if (data.status !== 'not_found') setError(data.error || (data.status === 'skipped_tdm' ? 'TDM-Vorbehalt erkannt — Crawl uebersprungen' : 'Pruefung fehlgeschlagen'))
-            setProgress(''); setProgressPct(0); setLoading(false)
+            setProgress(''); setProgressPct(0); setLoading(false); localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId(''); return
            localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
            return
          }
        } catch { /* retry */ }
      }
@@ -199,6 +199,8 @@ export function ComplianceCheckTab() {
        body: JSON.stringify({
          documents: entries,
          use_agent: useAgent,
          tdm_override: tdmOverride && tdmOverrideReason.trim().length >= 10,
          tdm_override_reason: tdmOverrideReason.trim(),
        }),
      })
      if (!startRes.ok) throw new Error(`Pruefung konnte nicht gestartet werden: ${startRes.status}`)
@@ -236,9 +238,9 @@ export function ComplianceCheckTab() {
          localStorage.setItem(STORAGE_KEY_HISTORY, JSON.stringify(updated))
          break
        }
-        if (pollData.status === 'failed') {
+        if (['failed', 'skipped_tdm'].includes(pollData.status)) {
          localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
-          throw new Error(pollData.error || 'Pruefung fehlgeschlagen')
+          throw new Error(pollData.error || (pollData.status === 'skipped_tdm' ? 'TDM-Vorbehalt' : 'Pruefung fehlgeschlagen'))
        }
        attempts++
      }
@@ -321,10 +323,15 @@ export function ComplianceCheckTab() {
        </span>
      </div>
      <div className="bg-amber-50/60 border border-amber-200 rounded-lg p-3 space-y-2">
        <label className="flex items-start gap-2 cursor-pointer"><input type="checkbox" checked={tdmOverride} onChange={e => setTdmOverride(e.target.checked)} className="mt-0.5 accent-amber-600" /><span className="text-xs text-amber-900"><strong>Schriftliche Crawl-Erlaubnis vorhanden</strong> — uebergeht TDM-Vorbehalte (robots.txt / ai.txt)</span></label>
        {tdmOverride && <input type="text" value={tdmOverrideReason} onChange={e => setTdmOverrideReason(e.target.value)} placeholder="z.B. Auftragsbeziehung Safetykon GmbH, Email Hr. X vom 18.05.2026" className="w-full px-3 py-2 text-xs border border-amber-300 rounded bg-white" />}
        {tdmOverride && tdmOverrideReason.trim().length < 10 && <p className="text-[10px] text-amber-700">Pflicht: Reason mit min. 10 Zeichen (Audit-Spur).</p>}
      </div>
      {/* Submit button */}
      <button
        onClick={handleSubmit}
-        disabled={loading || filledCount === 0}
+        disabled={loading || filledCount === 0 || (tdmOverride && tdmOverrideReason.trim().length < 10)}
        className="w-full px-4 py-3 bg-purple-600 text-white rounded-lg font-medium hover:bg-purple-700 disabled:opacity-50 transition-colors text-sm flex items-center justify-center gap-2"
      >
        {loading ? (
@@ -0,0 +1,275 @@
 'use client'
 import React, { useEffect, useMemo, useState } from 'react'
 type Finding = {
  id: number
  source_type: string
  doc_type: string
  severity: string
  status: string
  regulation: string
  label: string
  hint: string
  action_recipe: Record<string, string>
  anchor_excerpt: string
  anchor_conf: number
  vendor_name: string
  category: string
  payload: Record<string, unknown>
 }
 type Summary = {
  total: number
  by_source: Record<string, number>
  by_severity: Record<string, number>
  by_status: Record<string, number>
  by_doc_type: Record<string, number>
 }
 type Resp = {
  found: boolean
  summary: Summary
  count: number
  findings: Finding[]
 }
 const SOURCE_LABEL: Record<string, string> = {
  all: 'Alle Quellen',
  mc: 'Master-Controls',
  pflichtangabe: 'Pflichtangaben',
  vendor: 'Vendor-Findings',
  redundanz: 'Redundanzen',
 }
 const SEVERITY_COLOR: Record<string, string> = {
  CRITICAL: 'bg-red-600 text-white',
  HIGH: 'bg-red-100 text-red-800',
  MEDIUM: 'bg-amber-100 text-amber-800',
  LOW: 'bg-blue-100 text-blue-800',
  INFO: 'bg-gray-100 text-gray-600',
 }
 const STATUS_LABEL: Record<string, string> = {
  failed: 'Fail',
  passed: 'Pass',
  skipped: 'Skip',
  na: 'N/A',
  info: 'Info',
 }
 const SEVERITY_OPTS = ['all', 'CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'INFO']
 const STATUS_OPTS = ['all', 'failed', 'passed', 'skipped', 'na', 'info']
 export default function FindingsTab({ checkId }: { checkId: string }) {
  const [data, setData] = useState<Resp | null>(null)
  const [loading, setLoading] = useState(true)
  const [error, setError] = useState<string | null>(null)
  const [source, setSource] = useState('all')
  const [severity, setSeverity] = useState('all')
  const [docType, setDocType] = useState('all')
  const [status, setStatus] = useState('failed')
  const [q, setQ] = useState('')
  const [expanded, setExpanded] = useState<number | null>(null)
  useEffect(() => {
    let cancelled = false
    setLoading(true)
    const qs = new URLSearchParams({
      source, severity, doc_type: docType, status, q, limit: '1500',
    }).toString()
    fetch(`/api/sdk/v1/agent/findings/${checkId}?${qs}`)
      .then(r => r.json())
      .then(d => { if (!cancelled) setData(d) })
      .catch(e => { if (!cancelled) setError(String(e)) })
      .finally(() => { if (!cancelled) setLoading(false) })
    return () => { cancelled = true }
  }, [checkId, source, severity, docType, status, q])
  const docTypes = useMemo(
    () => Object.keys(data?.summary?.by_doc_type ?? {}).filter(d => d !== '-').sort(),
    [data],
  )
  const csvExport = () => {
    const rows = data?.findings ?? []
    const head = ['Quelle', 'Doc', 'Severity', 'Status', 'Regulation', 'Label', 'Vendor', 'Hint']
    const lines = [head.join(',')]
    for (const r of rows) {
      const cells = [
        r.source_type, r.doc_type, r.severity, r.status,
        r.regulation, r.label, r.vendor_name, r.hint,
      ].map(c => `"${String(c ?? '').replace(/"/g, '""').replace(/\n/g, ' ')}"`)
      lines.push(cells.join(','))
    }
    const blob = new Blob([lines.join('\n')], { type: 'text/csv;charset=utf-8' })
    const url = URL.createObjectURL(blob)
    const a = document.createElement('a')
    a.href = url
    a.download = `findings-${checkId}.csv`
    a.click()
    URL.revokeObjectURL(url)
  }
  if (loading && !data) return <div className="p-6 text-sm text-gray-500">Lade Voll-Audit…</div>
  if (error) return <div className="p-6 text-sm text-red-600">Fehler: {error}</div>
  if (!data?.found) {
    return (
      <div className="p-6 text-sm text-gray-500">
        Keine unified findings für diesen Run gespeichert (alter Run vor P5?).
      </div>
    )
  }
  const sum = data.summary
  const findings = data.findings
  return (
    <div className="space-y-4">
      {/* Summary Cards */}
      <div className="grid grid-cols-2 md:grid-cols-4 gap-3 text-xs">
        {Object.entries(SOURCE_LABEL).filter(([k]) => k !== 'all').map(([k, label]) => {
          const count = sum.by_source?.[k] ?? 0
          return (
            <button key={k}
              onClick={() => setSource(source === k ? 'all' : k)}
              className={`text-left rounded-lg border px-3 py-2 transition ${
                source === k
                  ? 'border-blue-500 bg-blue-50 text-blue-900'
                  : 'border-gray-200 hover:border-gray-300 bg-white'
              }`}>
              <div className="text-[10px] uppercase tracking-wide text-gray-500">{label}</div>
              <div className="text-lg font-semibold">{count}</div>
            </button>
          )
        })}
      </div>
      {/* Filter row */}
      <div className="flex flex-wrap gap-2 items-center text-xs">
        <select value={severity} onChange={e => setSeverity(e.target.value)}
          className="border border-gray-200 rounded px-2 py-1">
          {SEVERITY_OPTS.map(s => (
            <option key={s} value={s}>
              {s === 'all' ? 'Alle Severities' : s}
              {s !== 'all' && sum.by_severity?.[s] != null ? ` (${sum.by_severity[s]})` : ''}
            </option>
          ))}
        </select>
        <select value={status} onChange={e => setStatus(e.target.value)}
          className="border border-gray-200 rounded px-2 py-1">
          {STATUS_OPTS.map(s => (
            <option key={s} value={s}>
              {s === 'all' ? 'Alle Status' : STATUS_LABEL[s] ?? s}
              {s !== 'all' && sum.by_status?.[s] != null ? ` (${sum.by_status[s]})` : ''}
            </option>
          ))}
        </select>
        <select value={docType} onChange={e => setDocType(e.target.value)}
          className="border border-gray-200 rounded px-2 py-1">
          <option value="all">Alle Doc-Types</option>
          {docTypes.map(d => (
            <option key={d} value={d}>{d} ({sum.by_doc_type?.[d] ?? 0})</option>
          ))}
        </select>
        <input value={q} onChange={e => setQ(e.target.value)}
          placeholder="Suche Label / Anbieter…"
          className="border border-gray-200 rounded px-2 py-1 min-w-[180px]" />
        <button onClick={csvExport}
          className="ml-auto border border-gray-200 hover:border-gray-300 rounded px-2 py-1">
          CSV exportieren
        </button>
        <span className="text-gray-500">{data.count} Treffer</span>
      </div>
      {/* Findings table */}
      <div className="border rounded-lg overflow-hidden">
        <table className="w-full text-xs">
          <thead className="bg-gray-50 text-gray-600">
            <tr>
              <th className="px-3 py-2 text-left">Quelle</th>
              <th className="px-3 py-2 text-left">Doc</th>
              <th className="px-3 py-2 text-left">Sev</th>
              <th className="px-3 py-2 text-left">Status</th>
              <th className="px-3 py-2 text-left">Finding</th>
            </tr>
          </thead>
          <tbody>
            {findings.map(f => (
              <React.Fragment key={f.id}>
                <tr className="border-t cursor-pointer hover:bg-gray-50"
                    onClick={() => setExpanded(expanded === f.id ? null : f.id)}>
                  <td className="px-3 py-2 text-gray-500 capitalize">{f.source_type}</td>
                  <td className="px-3 py-2 text-gray-700">{f.doc_type === '-' ? '—' : f.doc_type}</td>
                  <td className="px-3 py-2">
                    <span className={`px-2 py-0.5 rounded text-[10px] font-medium ${
                      SEVERITY_COLOR[f.severity] || 'bg-gray-100'
                    }`}>{f.severity}</span>
                  </td>
                  <td className="px-3 py-2 text-gray-600">{STATUS_LABEL[f.status] ?? f.status}</td>
                  <td className="px-3 py-2 text-gray-900">
                    {f.label}
                    {f.vendor_name && (
                      <span className="ml-2 text-[10px] text-gray-400">
                        · {f.vendor_name}
                      </span>
                    )}
                    {(() => {
                      const rl = String(f.payload?.risk_label ?? '')
                      if (!rl) return null
                      const cls = rl === 'kritisch' ? 'bg-red-600 text-white' :
                        rl === 'hoch' ? 'bg-red-100 text-red-800' :
                        rl === 'mittel' ? 'bg-amber-100 text-amber-800' :
                        rl === 'gering' ? 'bg-green-50 text-green-700' :
                        'bg-gray-100 text-gray-500'
                      return <span className={`ml-2 px-1.5 py-0.5 rounded text-[10px] font-medium ${cls}`}>Risk: {rl}</span>
                    })()}
                  </td>
                </tr>
                {expanded === f.id && (
                  <tr className="bg-gray-50/50">
                    <td colSpan={5} className="px-3 py-3 text-xs space-y-2">
                      {f.hint && (
                        <div className="text-gray-700">{f.hint}</div>
                      )}
                      {f.action_recipe?.fix_text && (
                        <div className="bg-amber-50 border-l-2 border-amber-300 pl-3 py-2">
                          <div className="font-medium text-amber-800 mb-1">Empfehlung</div>
                          <div className="whitespace-pre-line text-amber-900">
                            {f.action_recipe.fix_text}
                          </div>
                          {f.action_recipe.where && (
                            <div className="text-[10px] text-amber-700 mt-1">
                              Einfuegen in: {f.action_recipe.where}
                            </div>
                          )}
                        </div>
                      )}
                      {f.anchor_excerpt && (
                        <div className="bg-blue-50 border-l-2 border-blue-300 pl-3 py-2">
                          <div className="font-medium text-blue-800 mb-1">
                            Fundstelle im Dokument (Konfidenz {Math.round((f.anchor_conf || 0) * 100)}%)
                          </div>
                          <div className="italic text-blue-900">"{f.anchor_excerpt}"</div>
                        </div>
                      )}
                      <div className="text-[10px] text-gray-400">
                        Source: {f.source_type} · Regulation: {f.regulation || '—'}
                        {f.category && ` · Kategorie: ${f.category}`}
                      </div>
                    </td>
                  </tr>
                )}
              </React.Fragment>
            ))}
            {findings.length === 0 && (
              <tr><td colSpan={5} className="px-3 py-6 text-center text-gray-400">
                Keine Findings fuer die aktuellen Filter.
              </td></tr>
            )}
          </tbody>
        </table>
      </div>
    </div>
  )
 }
@@ -2,6 +2,7 @@
 import React, { useEffect, useState, useMemo } from 'react'
 import { use as useUnwrap } from 'react'
 import FindingsTab from './FindingsTab'
 type MCRow = {
  id: number
@@ -41,19 +42,43 @@ type AuditResponse = {
  results?: MCRow[]
 }
-const SEVERITY_COLOR: Record<string, string> = {
+// P8: MC-Audit ist eine Checkliste, KEINE Severity-Drohung. Statt
-  CRITICAL: 'bg-red-600 text-white',
+// rotem HIGH-Badge zeigen wir die Quellen-Prioritaet (Gesetz vs.
-  HIGH: 'bg-red-100 text-red-800',
+// Behoerden-Leitlinie vs. Best-Practice) und einen 3-Tier-Status
-  MEDIUM: 'bg-amber-100 text-amber-800',
+// (erfuellt / nicht erfuellt / selbst pruefen).
-  LOW: 'bg-blue-100 text-blue-800',
+
-  INFO: 'bg-gray-100 text-gray-600',
+const PRIORITY_BADGE: Record<string, string> = {
  Gesetz: 'bg-slate-800 text-white',
  'Behoerden-Leitlinie': 'bg-blue-100 text-blue-800',
  'Best-Practice': 'bg-gray-100 text-gray-600',
  '—': 'bg-gray-50 text-gray-400',
 }
 function regulationToPriority(reg: string): keyof typeof PRIORITY_BADGE {
  const r = (reg || '').toLowerCase()
  if (/dsgvo|gdpr|eprivacy|tdddg|tkg|bdsg|ttdsg/.test(r)) return 'Gesetz'
  if (/edpb|dsk|cnil|lfdi|eugh|orientierungshilfe|leitlinie|guideline/.test(r))
    return 'Behoerden-Leitlinie'
  if (/iso|nist|bsi|cobit|sox/.test(r)) return 'Best-Practice'
  return '—'
 }
 const _CONDITIONAL_RE = /\b(falls|sofern|wenn|soweit|ggf\.|gegebenenfalls)\b/i
 function rowReviewStatus(r: MCRow): 'pass' | 'fail' | 'review' | 'na' {
  if (r.passed) return 'pass'
  if (r.skipped) return 'na'
  // failed: harter Fail nur bei matched_text-Beleg ODER nicht-konditionalem Label
  if (!r.matched_text && _CONDITIONAL_RE.test(r.label || '')) return 'review'
  return 'fail'
 }
 const STATUS_FILTERS = [
  { value: 'all', label: 'Alle' },
-  { value: 'failed', label: 'Nur Fail' },
+  { value: 'fail', label: 'Nicht erfuellt' },
-  { value: 'passed', label: 'Nur Pass' },
+  { value: 'review', label: 'Selbst pruefen' },
-  { value: 'skipped', label: 'Nur Skipped' },
+  { value: 'pass', label: 'Erfuellt' },
  { value: 'na', label: 'Nicht anwendbar' },
 ] as const
 export default function AuditPage(
@@ -63,10 +88,11 @@ export default function AuditPage(
  const [data, setData] = useState<AuditResponse | null>(null)
  const [loading, setLoading] = useState(true)
  const [error, setError] = useState<string | null>(null)
-  const [filterStatus, setFilterStatus] = useState<typeof STATUS_FILTERS[number]['value']>('failed')
+  const [filterStatus, setFilterStatus] = useState<typeof STATUS_FILTERS[number]['value']>('fail')
  const [filterReg, setFilterReg] = useState<string>('')
  const [filterDoc, setFilterDoc] = useState<string>('')
  const [expanded, setExpanded] = useState<number | null>(null)
  const [tab, setTab] = useState<'mc' | 'all'>('all')
  useEffect(() => {
    let cancelled = false
@@ -90,9 +116,7 @@ export default function AuditPage(
  )
  const filtered = allRows.filter(r => {
-    if (filterStatus === 'failed' && (r.passed || r.skipped)) return false
+    if (filterStatus !== 'all' && rowReviewStatus(r) !== filterStatus) return false
    if (filterStatus === 'passed' && !r.passed) return false
    if (filterStatus === 'skipped' && !r.skipped) return false
    if (filterReg && r.regulation !== filterReg) return false
    if (filterDoc && r.doc_type !== filterDoc) return false
    return true
@@ -127,6 +151,25 @@ export default function AuditPage(
        </p>
      </div>
      {/* Tab switcher */}
      <div className="flex gap-2 border-b border-gray-200">
        {([
          { key: 'all', label: 'Voll-Audit (alle Findings)' },
          { key: 'mc', label: 'Nur MC-Scorecard' },
        ] as const).map(t => (
          <button key={t.key}
            onClick={() => setTab(t.key)}
            className={`px-4 py-2 text-sm border-b-2 -mb-px transition ${
              tab === t.key
                ? 'border-blue-600 text-blue-700 font-medium'
                : 'border-transparent text-gray-500 hover:text-gray-700'
            }`}>{t.label}</button>
        ))}
      </div>
      {tab === 'all' && <FindingsTab checkId={checkId} />}
      {tab === 'mc' && <>
      {/* Scorecard */}
      <div className="border rounded-lg overflow-hidden">
        <div className="px-4 py-3 bg-blue-50 border-b border-blue-100">
@@ -212,7 +255,7 @@ export default function AuditPage(
              <th className="px-3 py-2 text-left">Doc</th>
              <th className="px-3 py-2 text-left">Regulation</th>
              <th className="px-3 py-2 text-left">MC</th>
-              <th className="px-3 py-2 text-left">Severity</th>
+              <th className="px-3 py-2 text-left">Prioritaet</th>
            </tr>
          </thead>
          <tbody>
@@ -221,21 +264,26 @@ export default function AuditPage(
                <tr className="border-t cursor-pointer hover:bg-gray-50"
                    onClick={() => setExpanded(expanded === row.id ? null : row.id)}>
                  <td className="px-3 py-2">
-                    {row.passed ? (
+                    {(() => {
-                      <span className="text-green-600">✓</span>
+                      const st = rowReviewStatus(row)
-                    ) : row.skipped ? (
+                      if (st === 'pass') return <span className="text-green-600" title="Erfuellt">✓</span>
-                      <span className="text-gray-400">—</span>
+                      if (st === 'na') return <span className="text-gray-400" title="Nicht anwendbar">—</span>
-                    ) : (
+                      if (st === 'review') return <span className="text-amber-600" title="Selbst pruefen">?</span>
-                      <span className="text-red-600">✗</span>
+                      return <span className="text-red-600" title="Nicht erfuellt">✗</span>
-                    )}
+                    })()}
                  </td>
                  <td className="px-3 py-2 text-gray-700">{row.doc_type}</td>
                  <td className="px-3 py-2 text-gray-500">{row.regulation || '—'}</td>
                  <td className="px-3 py-2 text-gray-900">{row.label}</td>
                  <td className="px-3 py-2">
-                    <span className={`px-2 py-0.5 rounded text-[10px] font-medium ${
+                    {(() => {
-                      SEVERITY_COLOR[row.severity] || 'bg-gray-100'
+                      const prio = regulationToPriority(row.regulation)
-                    }`}>{row.severity || '—'}</span>
+                      return (
                        <span className={`px-2 py-0.5 rounded text-[10px] font-medium ${PRIORITY_BADGE[prio]}`}>
                          {prio}
                        </span>
                      )
                    })()}
                  </td>
                </tr>
                {expanded === row.id && (
@@ -272,6 +320,7 @@ export default function AuditPage(
          </tbody>
        </table>
      </div>
      </>}
    </div>
  )
 }
@@ -0,0 +1,45 @@
 'use client'
 import Link from 'next/link'
 interface Props {
  /** Risk classification of the AI system. Tile is only rendered for high_risk / unacceptable. */
  riskLevel: string
 }
 /**
 * Renders a tile pointing to the BSI QUAIDAL-based data-quality control tab.
 * AI Act Article 10 obligations (training-data quality) apply only to high-risk
 * systems, so the tile is skipped for limited / minimal / not-applicable classes.
 */
 export function Art10Tile({ riskLevel }: Props) {
  if (riskLevel !== 'high_risk' && riskLevel !== 'unacceptable') return null
  return (
    <Link
      href="/sdk/quality?category=data_quality"
      className="block mt-3 p-3 rounded-lg border border-purple-200 bg-purple-50 hover:bg-purple-100 transition-colors"
    >
      <div className="flex items-start gap-3">
        <div className="w-9 h-9 rounded-full bg-purple-200 text-purple-700 flex items-center justify-center shrink-0">
          <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
              d="M3 7v10a2 2 0 002 2h14a2 2 0 002-2V7M3 7l9 6 9-6M3 7l9-4 9 4" />
          </svg>
        </div>
        <div className="flex-1 min-w-0">
          <div className="text-sm font-semibold text-purple-900">
            Art. 10 Datenqualität (Hochrisiko-KI)
          </div>
          <div className="text-xs text-purple-700 mt-0.5">
            BSI QUAIDAL Controls: 10 Kriterien, 15 Bausteine, 30 Maßnahmen, 140 Metriken.
            Klicken zum Öffnen des Trainingsdaten-Qualität-Moduls.
          </div>
        </div>
        <svg className="w-4 h-4 text-purple-500 shrink-0 mt-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
        </svg>
      </div>
    </Link>
  )
 }
@@ -9,6 +9,7 @@ import { RiskPyramid } from './_components/RiskPyramid'
 import { AddSystemForm } from './_components/AddSystemForm'
 import { AISystemCard } from './_components/AISystemCard'
 import DecisionTreeWizard from '@/components/sdk/ai-act/DecisionTreeWizard'
 import { Art10Tile } from './_components/Art10Tile'
 type TabId = 'overview' | 'decision-tree' | 'results'
@@ -136,6 +137,7 @@ function SavedResultsTab() {
              Löschen
            </button>
          </div>
          <Art10Tile riskLevel={r.high_risk_result} />
        </div>
      ))}
    </div>
@@ -0,0 +1,211 @@
 'use client'
 import { useState, useEffect, useCallback } from 'react'
 import { useParams } from 'next/navigation'
 type Suggestion = {
  name: string
  reduction_type: 'design' | 'protection' | 'information' | string
  description: string
  source_project_count: number
  source_project_names: string[]
  is_customer_standard: boolean
  has_verified_instances: boolean
 }
 type ProjectInfo = { customer_name?: string; machine_name?: string }
 // /sdk/iace/[projectId]/customer-standards
 //
 // Surfaces mitigations that the expert flagged as "Kundenstandard" (or
 // successfully verified) in earlier projects of the SAME customer. Picking
 // one and clicking "Übernehmen" applies it to all matching hazards in the
 // current project — every match is set to is_relevant=true,
 // is_customer_standard=true, status='verified'. Saves the round-trip
 // through Massnahmen + Verifikation for the cases where the safety expert
 // already knows the answer from a prior plant at the same site.
 //
 // Filter "Auch verifizierte einbeziehen" widens the pool beyond strictly
 // is_customer_standard=true to also include status='verified' rows — useful
 // when the customer-standard habit is not yet established in the corpus.
 export default function CustomerStandardsPage() {
  const params = useParams()
  const projectId = params.projectId as string
  const [suggestions, setSuggestions] = useState<Suggestion[]>([])
  const [project, setProject] = useState<ProjectInfo | null>(null)
  const [loading, setLoading] = useState(true)
  const [includeVerified, setIncludeVerified] = useState(false)
  const [importing, setImporting] = useState<string | null>(null)
  const [importedNames, setImportedNames] = useState<Set<string>>(new Set())
  const [selected, setSelected] = useState<Set<string>>(new Set())
  const [error, setError] = useState<string | null>(null)
  const load = useCallback(async () => {
    setLoading(true)
    setError(null)
    try {
      const [sgRes, prRes] = await Promise.all([
        fetch(`/api/sdk/v1/iace/projects/${projectId}/customer-standards?include_verified=${includeVerified}`),
        fetch(`/api/sdk/v1/iace/projects/${projectId}`),
      ])
      if (sgRes.ok) {
        const j = await sgRes.json()
        setSuggestions(j.suggestions || [])
      }
      if (prRes.ok) {
        const j = await prRes.json()
        const p = j.project || j
        setProject({ customer_name: p.customer_name, machine_name: p.machine_name })
      }
    } catch (e) {
      setError(e instanceof Error ? e.message : String(e))
    } finally {
      setLoading(false)
    }
  }, [projectId, includeVerified])
  useEffect(() => { load() }, [load])
  function toggleSelect(name: string) {
    setSelected((prev) => {
      const next = new Set(prev)
      if (next.has(name)) next.delete(name); else next.add(name)
      return next
    })
  }
  async function importOne(name: string) {
    setImporting(name)
    try {
      const r = await fetch(`/api/sdk/v1/iace/projects/${projectId}/customer-standards/import`, {
        method: 'POST', headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ name }),
      })
      if (r.ok) {
        setImportedNames((prev) => new Set(prev).add(name))
        setSelected((prev) => { const n = new Set(prev); n.delete(name); return n })
      } else {
        const j = await r.json().catch(() => null)
        setError(j?.error || `HTTP ${r.status}`)
      }
    } finally {
      setImporting(null)
    }
  }
  async function importSelected() {
    const names = Array.from(selected)
    for (const n of names) {
      await importOne(n)
    }
  }
  if (loading) return (
    <div className="flex items-center justify-center h-64">
      <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-purple-600" />
    </div>
  )
  // No customer set → guide the user to set it first
  const hasCustomer = !!(project?.customer_name && project.customer_name.trim() !== '')
  if (!hasCustomer) {
    return (
      <div className="space-y-4 max-w-3xl">
        <h1 className="text-2xl font-bold">Kundenstandards</h1>
        <div className="rounded-md border border-amber-200 bg-amber-50 px-4 py-3 text-sm text-amber-900">
          Dieses Projekt hat noch keinen <em>Kundennamen</em>. Damit Massnahmen aus früheren
          Anlagen desselben Kunden wiederverwendet werden können, trage den Kundennamen
          unter <a className="text-purple-700 underline" href={`/sdk/iace/${projectId}/order`}>Auftrag → Kunde</a> ein.
          Sobald der Kundenname gesetzt ist, erscheint hier die Liste der wiederverwendbaren
          Maßnahmen aus seinen Vorprojekten.
        </div>
      </div>
    )
  }
  return (
    <div className="space-y-4">
      <div className="flex items-baseline justify-between">
        <div>
          <h1 className="text-2xl font-bold text-gray-900 dark:text-white">Kundenstandards</h1>
          <p className="mt-1 text-sm text-gray-500">
            Übernimm Maßnahmen, die der Kunde <strong>{project?.customer_name}</strong> in
            anderen Anlagen bereits als Standard etabliert hat. Übernehmen setzt sie für alle
            passenden Gefährdungen <em>relevant</em> und <em>verifiziert</em> ohne Nachweis.
          </p>
        </div>
        <div className="flex items-center gap-3">
          <label className="flex items-center gap-1.5 text-xs text-gray-600">
            <input type="checkbox" checked={includeVerified}
              onChange={(e) => setIncludeVerified(e.target.checked)}
              className="accent-purple-600" />
            Auch <em>verifizierte</em> einbeziehen
          </label>
          {selected.size > 0 && (
            <button onClick={importSelected} disabled={!!importing}
              className="px-3 py-1.5 text-xs bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50">
              {importing ? 'Übernehme…' : `${selected.size} übernehmen`}
            </button>
          )}
        </div>
      </div>
      {error && <div className="text-red-600 text-sm">Fehler: {error}</div>}
      {suggestions.length === 0 && (
        <div className="rounded-md border border-gray-200 bg-gray-50 px-4 py-6 text-sm text-gray-600">
          Keine wiederverwendbaren Maßnahmen für <strong>{project?.customer_name}</strong> gefunden.
          {!includeVerified && ' Aktiviere „Auch verifizierte einbeziehen" oben rechts, um den Pool zu erweitern.'}
        </div>
      )}
      {suggestions.length > 0 && (
        <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 overflow-hidden">
          <div className="grid grid-cols-[28px_2fr_120px_100px_120px] gap-3 px-4 py-2 bg-gray-50 dark:bg-gray-750 text-xs font-medium text-gray-500 uppercase tracking-wider">
            <div />
            <div>Massnahme</div>
            <div className="text-center">Vorprojekte</div>
            <div>Status</div>
            <div className="text-right">Aktion</div>
          </div>
          {suggestions.map((s) => {
            const imported = importedNames.has(s.name)
            return (
              <div key={s.name} className={`grid grid-cols-[28px_2fr_120px_100px_120px] gap-3 px-4 py-2.5 border-t border-gray-100 dark:border-gray-700 ${imported ? 'bg-green-50/40' : ''} ${selected.has(s.name) ? 'bg-purple-50' : ''}`}>
                <div className="pt-0.5">
                  <input type="checkbox" checked={selected.has(s.name)} onChange={() => toggleSelect(s.name)} disabled={imported}
                    className="accent-purple-600" />
                </div>
                <div className="min-w-0">
                  <div className="text-sm text-gray-900 dark:text-white">{s.name}</div>
                  {s.description && <div className="text-[11px] text-gray-500 mt-0.5 line-clamp-2">{s.description}</div>}
                  {s.source_project_names.length > 0 && (
                    <div className="text-[10px] text-gray-400 mt-1">aus: {s.source_project_names.slice(0,3).join(', ')}{s.source_project_names.length > 3 ? ` (+${s.source_project_names.length - 3})` : ''}</div>
                  )}
                </div>
                <div className="text-center self-center">
                  <span className="text-sm font-semibold text-purple-700">{s.source_project_count}×</span>
                </div>
                <div className="self-center flex flex-wrap gap-1">
                  {s.is_customer_standard && <span className="text-[10px] px-1.5 py-0.5 rounded bg-blue-100 text-blue-700">Kundenstandard</span>}
                  {s.has_verified_instances && !s.is_customer_standard && <span className="text-[10px] px-1.5 py-0.5 rounded bg-green-100 text-green-700">Verifiziert</span>}
                </div>
                <div className="text-right self-center">
                  {imported ? (
                    <span className="text-[11px] text-green-700">✓ Übernommen</span>
                  ) : (
                    <button onClick={() => importOne(s.name)} disabled={!!importing}
                      className="px-2.5 py-1 text-[11px] bg-purple-600 text-white rounded hover:bg-purple-700 disabled:opacity-50">
                      {importing === s.name ? 'Übernehme…' : 'Übernehmen'}
                    </button>
                  )}
                </div>
              </div>
            )
          })}
        </div>
      )}
    </div>
  )
 }
@@ -68,10 +68,14 @@ export default function OrderPage() {
    setSaveState('saving')
    try {
      const merged = { ...existingMetaRef.current, order_data: next }
      // Mirror Auftraggeber.Firmenname into the top-level customer_name
      // column so the Customer-Standards-Reuse feature can index by it.
      // Empty string → null on the backend, no broken reuse for fresh projects.
      const customerName = (next.client.company || '').trim()
      await fetch(`/api/sdk/v1/iace/projects/${projectId}`, {
        method: 'PUT',
        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ metadata: merged }),
+        body: JSON.stringify({ metadata: merged, customer_name: customerName }),
      })
      existingMetaRef.current = merged
      setSaveState('saved')
@@ -16,6 +16,7 @@ const IACE_NAV_ITEMS = [
  { id: 'mitigations', label: 'Massnahmen', href: '/mitigations', icon: 'shield' },
  { id: 'clarifications', label: 'Klärungen', href: '/clarifications', icon: 'chat' },
  { id: 'verification', label: 'Verifikation', href: '/verification', icon: 'check' },
  { id: 'customer-standards', label: 'Kundenstandards', href: '/customer-standards', icon: 'building' },
  { id: 'evidence', label: 'Nachweise', href: '/evidence', icon: 'document' },
  { id: 'tech-file', label: 'CE-Akte', href: '/tech-file', icon: 'folder' },
 ]
@@ -67,6 +68,12 @@ function NavIcon({ icon, className }: { icon: string; className?: string }) {
          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
        </svg>
      )
    case 'building':
      return (
        <svg className={cls} fill="none" viewBox="0 0 24 24" stroke="currentColor">
          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0H5m14 0h2m-16 0H3m4-4h2m-2-4h2m-2-4h2m4 8h2m-2-4h2m-2-4h2" />
        </svg>
      )
    case 'document':
      return (
        <svg className={cls} fill="none" viewBox="0 0 24 24" stroke="currentColor">
@@ -0,0 +1,152 @@
 'use client'
 import { useEffect, useState } from 'react'
 import { fetchCriterionTree, type QuaidalControl, type QuaidalCriterionTree } from '../_hooks/useQuaidalData'
 interface Props {
  sectionId: string
  onClose: () => void
 }
 function ControlBlock({ ctrl, badgeColor }: { ctrl: QuaidalControl; badgeColor: string }) {
  return (
    <div className="border border-gray-200 rounded-lg p-4 bg-white">
      <div className="flex items-start justify-between gap-3 mb-2">
        <h4 className="font-semibold text-gray-900">{ctrl.canonical_name}</h4>
        <span className={`px-2 py-0.5 text-xs rounded-full ${badgeColor} shrink-0`}>{ctrl.source.section}</span>
      </div>
      <p className="text-sm text-gray-600 mb-3 whitespace-pre-line">{ctrl.description}</p>
      {ctrl.source.url && (
        <a
          href={ctrl.source.url}
          target="_blank"
          rel="noreferrer noopener"
          className="text-xs text-purple-600 hover:text-purple-800 underline"
        >
          BSI-Quelle ansehen ({ctrl.source.framework})
        </a>
      )}
    </div>
  )
 }
 export function QuaidalCriterionDetail({ sectionId, onClose }: Props) {
  const [tree, setTree] = useState<QuaidalCriterionTree | null>(null)
  const [loading, setLoading] = useState(true)
  useEffect(() => {
    let active = true
    setLoading(true)
    fetchCriterionTree(sectionId).then(t => {
      if (active) {
        setTree(t)
        setLoading(false)
      }
    })
    return () => { active = false }
  }, [sectionId])
  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/40 p-4">
      <div className="bg-white rounded-2xl shadow-xl w-full max-w-4xl max-h-[90vh] overflow-hidden flex flex-col">
        <div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
          <div>
            <div className="text-xs text-gray-500 uppercase tracking-wide">QUAIDAL Kriterium</div>
            <h2 className="text-xl font-bold text-gray-900">
              {tree?.criterion.canonical_name || sectionId}
            </h2>
          </div>
          <button
            onClick={onClose}
            className="w-8 h-8 rounded-full hover:bg-gray-100 flex items-center justify-center text-gray-500"
            aria-label="Schliessen"
          >×</button>
        </div>
        <div className="overflow-y-auto p-6 space-y-6">
          {loading && <div className="text-center text-gray-400 py-12">Lade...</div>}
          {tree && (
            <>
              <div>
                <h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-2">
                  Anforderung (eigene Formulierung)
                </h3>
                <div className="bg-purple-50 border border-purple-200 rounded-lg p-4">
                  <p className="text-gray-800 whitespace-pre-line">{tree.criterion.description}</p>
                </div>
                <div className="mt-3 flex flex-wrap items-center gap-3 text-xs text-gray-500">
                  <span>Regulierung: <span className="font-medium text-gray-700">{tree.criterion.regulation_anchor || '—'}</span></span>
                  <span>Quelle: <span className="font-medium text-gray-700">{tree.criterion.source.framework} {tree.criterion.source.section}</span></span>
                  {tree.criterion.source.url && (
                    <a href={tree.criterion.source.url} target="_blank" rel="noreferrer noopener" className="text-purple-600 hover:text-purple-800 underline">
                      Originalquelle
                    </a>
                  )}
                </div>
              </div>
              {tree.criterion.external_refs.length > 0 && (
                <div>
                  <h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-2">
                    Externe Referenzen (nicht ingestiert, nur Verweis)
                  </h3>
                  <div className="flex flex-wrap gap-2">
                    {tree.criterion.external_refs.map((ref, i) => (
                      <span key={i} className="px-2 py-1 text-xs bg-gray-100 text-gray-700 rounded">
                        {ref.framework}{ref.citation ? ` — ${ref.citation}` : ''}
                      </span>
                    ))}
                  </div>
                </div>
              )}
              {tree.building_blocks.length > 0 && (
                <div>
                  <h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-3">
                    Bausteine ({tree.building_blocks.length})
                  </h3>
                  <div className="grid grid-cols-1 md:grid-cols-2 gap-3">
                    {tree.building_blocks.map(qb => (
                      <ControlBlock key={qb.derived_id} ctrl={qb} badgeColor="bg-blue-100 text-blue-700" />
                    ))}
                  </div>
                </div>
              )}
              {tree.measures.length > 0 && (
                <div>
                  <h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-3">
                    Maßnahmen ({tree.measures.length})
                  </h3>
                  <div className="grid grid-cols-1 md:grid-cols-2 gap-3">
                    {tree.measures.map(m => (
                      <ControlBlock key={m.derived_id} ctrl={m} badgeColor="bg-green-100 text-green-700" />
                    ))}
                  </div>
                </div>
              )}
              {tree.metrics.length > 0 && (
                <div>
                  <h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-3">
                    Metriken & Methoden ({tree.metrics.length})
                  </h3>
                  <div className="grid grid-cols-1 md:grid-cols-2 gap-3">
                    {tree.metrics.map(qm => (
                      <ControlBlock key={qm.derived_id} ctrl={qm} badgeColor="bg-amber-100 text-amber-700" />
                    ))}
                  </div>
                </div>
              )}
            </>
          )}
        </div>
        <div className="px-6 py-3 border-t border-gray-200 bg-gray-50 text-xs text-gray-500">
          Eigene Clean-Room-Ableitung von BSI QUAIDAL. Quellverweis und Lizenz-Note pro Eintrag.
        </div>
      </div>
    </div>
  )
 }
@@ -0,0 +1,109 @@
 'use client'
 import { useState } from 'react'
 import { useQuaidalData, type QuaidalControl } from '../_hooks/useQuaidalData'
 import { QuaidalCriterionDetail } from './QuaidalCriterionDetail'
 function CriterionCard({ ctrl, onOpen }: { ctrl: QuaidalControl; onOpen: () => void }) {
  return (
    <button
      onClick={onOpen}
      className="text-left bg-white rounded-xl border border-gray-200 p-5 hover:border-purple-400 hover:shadow-sm transition-all"
    >
      <div className="flex items-start justify-between mb-2">
        <h3 className="font-semibold text-gray-900">{ctrl.canonical_name}</h3>
        <span className="px-2 py-0.5 text-xs rounded-full bg-purple-100 text-purple-700">
          {ctrl.source.section}
        </span>
      </div>
      <p className="text-sm text-gray-600 line-clamp-3">{ctrl.description}</p>
      <div className="mt-3 flex flex-wrap items-center gap-2 text-xs">
        <span className="text-gray-500">Bausteine: <span className="font-medium text-gray-700">{ctrl.related_quaidal_ids.length}</span></span>
        {ctrl.external_refs.slice(0, 2).map((r, i) => (
          <span key={i} className="px-1.5 py-0.5 bg-gray-100 text-gray-600 rounded">
            {r.framework}
          </span>
        ))}
      </div>
    </button>
  )
 }
 export function TrainingDataQualityTab() {
  const { criteria, stats, loading, error } = useQuaidalData()
  const [openSection, setOpenSection] = useState<string | null>(null)
  if (loading) {
    return <div className="text-center text-gray-400 py-12">Lade QUAIDAL-Katalog...</div>
  }
  if (error) {
    return (
      <div className="bg-red-50 border border-red-200 rounded-lg p-4 text-red-700">
        QUAIDAL-Daten konnten nicht geladen werden: {error}
      </div>
    )
  }
  return (
    <div className="space-y-6">
      <div className="bg-purple-50 border border-purple-200 rounded-xl p-5">
        <h2 className="text-lg font-semibold text-gray-900">Trainingsdaten-Qualität nach BSI QUAIDAL</h2>
        <p className="text-sm text-gray-600 mt-1">
          Operative Umsetzung von EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI) auf Basis des
          BSI-Katalogs QUAIDAL. Alle Controls sind eigenständig formuliert (Clean-Room) und verweisen
          auf die jeweilige QUAIDAL-Sektion.
        </p>
        {stats && (
          <div className="mt-4 grid grid-cols-2 md:grid-cols-4 gap-3 text-sm">
            <div>
              <div className="text-xs text-gray-500">Qualitätskriterien</div>
              <div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.criterion ?? 0}</div>
            </div>
            <div>
              <div className="text-xs text-gray-500">Bausteine</div>
              <div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.building_block ?? 0}</div>
            </div>
            <div>
              <div className="text-xs text-gray-500">Maßnahmen</div>
              <div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.measure ?? 0}</div>
            </div>
            <div>
              <div className="text-xs text-gray-500">Metriken & Methoden</div>
              <div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.metric ?? 0}</div>
            </div>
          </div>
        )}
      </div>
      <div>
        <h3 className="text-lg font-semibold text-gray-900 mb-4">10 Qualitätskriterien</h3>
        {criteria.length === 0 ? (
          <div className="bg-white rounded-xl border border-gray-200 p-8 text-center text-gray-400">
            Keine Kriterien gefunden. Bitte Backend-Ingest prüfen.
          </div>
        ) : (
          <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
            {criteria.map(c => (
              <CriterionCard
                key={c.derived_id}
                ctrl={c}
                onOpen={() => setOpenSection(c.source.section)}
              />
            ))}
          </div>
        )}
      </div>
      {stats?.license_note && (
        <div className="text-xs text-gray-500 italic">{stats.license_note}</div>
      )}
      {openSection && (
        <QuaidalCriterionDetail
          sectionId={openSection}
          onClose={() => setOpenSection(null)}
        />
      )}
    </div>
  )
 }
@@ -0,0 +1,86 @@
 'use client'
 import { useCallback, useEffect, useState } from 'react'
 export interface QuaidalExternalRef {
  framework: string
  citation: string | null
 }
 export interface QuaidalSource {
  framework: string
  section: string
  url: string | null
  commit_sha: string | null
  title_original: string | null
  license_note: string | null
 }
 export interface QuaidalControl {
  derived_id: string
  kind: 'criterion' | 'building_block' | 'measure' | 'metric'
  canonical_name: string
  description: string
  regulation_anchor: string | null
  related_quaidal_ids: string[]
  external_refs: QuaidalExternalRef[]
  source: QuaidalSource
  plagiarism_score: number | null
 }
 export interface QuaidalStats {
  counts_by_kind: Record<string, number>
  source_framework: string
  source_commit_sha: string | null
  license_note: string | null
 }
 export interface QuaidalCriterionTree {
  criterion: QuaidalControl
  building_blocks: QuaidalControl[]
  measures: QuaidalControl[]
  metrics: QuaidalControl[]
 }
 const API_BASE = '/api/sdk/v1/quaidal'
 export function useQuaidalData() {
  const [criteria, setCriteria] = useState<QuaidalControl[]>([])
  const [stats, setStats] = useState<QuaidalStats | null>(null)
  const [loading, setLoading] = useState(true)
  const [error, setError] = useState<string | null>(null)
  const loadAll = useCallback(async () => {
    setLoading(true)
    setError(null)
    try {
      const [criteriaRes, statsRes] = await Promise.all([
        fetch(`${API_BASE}/criteria`, { cache: 'no-store' }),
        fetch(`${API_BASE}/stats`, { cache: 'no-store' }),
      ])
      if (criteriaRes.ok) {
        const data = (await criteriaRes.json()) as QuaidalControl[]
        setCriteria(Array.isArray(data) ? data : [])
      } else {
        setError(`Criteria endpoint returned ${criteriaRes.status}`)
      }
      if (statsRes.ok) {
        setStats(await statsRes.json())
      }
    } catch (err) {
      setError(String(err))
    } finally {
      setLoading(false)
    }
  }, [])
  useEffect(() => { loadAll() }, [loadAll])
  return { criteria, stats, loading, error, reload: loadAll }
 }
 export async function fetchCriterionTree(sectionId: string): Promise<QuaidalCriterionTree | null> {
  const res = await fetch(`${API_BASE}/criteria/${encodeURIComponent(sectionId)}`, { cache: 'no-store' })
  if (!res.ok) return null
  return (await res.json()) as QuaidalCriterionTree
 }
@@ -1,15 +1,23 @@
 'use client'
 import { useState, useEffect } from 'react'
 import { useSearchParams } from 'next/navigation'
 import { useSDK } from '@/lib/sdk'
 import { useQualityData } from './_hooks/useQualityData'
 import { MetricCard, type QualityMetric } from './_components/MetricCard'
 import { TestRow } from './_components/TestRow'
 import { MetricModal } from './_components/MetricModal'
 import { TestModal } from './_components/TestModal'
 import { TrainingDataQualityTab } from './_components/TrainingDataQualityTab'
 type TabId = 'model_quality' | 'data_quality'
 export default function QualityPage() {
  const { state } = useSDK()
  const searchParams = useSearchParams()
  const initialTab: TabId = searchParams?.get('category') === 'data_quality' ? 'data_quality' : 'model_quality'
  const [tab, setTab] = useState<TabId>(initialTab)
  const {
    metrics,
    tests,
@@ -41,24 +49,54 @@ export default function QualityPage() {
          <h1 className="text-2xl font-bold text-gray-900">AI Quality Dashboard</h1>
          <p className="mt-1 text-gray-500">Ueberwachen Sie die Qualitaet und Fairness Ihrer KI-Systeme</p>
        </div>
-        <div className="flex items-center gap-2">
+        {tab === 'model_quality' && (
-          <button
+          <div className="flex items-center gap-2">
-            onClick={() => setShowTestModal(true)}
+            <button
-            className="flex items-center gap-2 px-4 py-2 border border-purple-300 text-purple-700 rounded-lg hover:bg-purple-50 transition-colors"
+              onClick={() => setShowTestModal(true)}
-          >
+              className="flex items-center gap-2 px-4 py-2 border border-purple-300 text-purple-700 rounded-lg hover:bg-purple-50 transition-colors"
-            <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>
+            >
-            Test hinzufuegen
+              <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>
-          </button>
+              Test hinzufuegen
-          <button
+            </button>
-            onClick={() => { setEditMetric(undefined); setShowMetricModal(true) }}
+            <button
-            className="flex items-center gap-2 px-4 py-2 bg-purple-600 text-white rounded-lg hover:bg-purple-700 transition-colors"
+              onClick={() => { setEditMetric(undefined); setShowMetricModal(true) }}
-          >
+              className="flex items-center gap-2 px-4 py-2 bg-purple-600 text-white rounded-lg hover:bg-purple-700 transition-colors"
-            <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" /></svg>
+            >
-            Messung hinzufuegen
+              <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" /></svg>
-          </button>
+              Messung hinzufuegen
-        </div>
+            </button>
          </div>
        )}
      </div>
      <div className="border-b border-gray-200">
        <nav className="-mb-px flex gap-6">
          <button
            onClick={() => setTab('model_quality')}
            className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
              tab === 'model_quality'
                ? 'border-purple-500 text-purple-600'
                : 'border-transparent text-gray-500 hover:text-gray-700'
            }`}
          >
            Modell-Qualität
          </button>
          <button
            onClick={() => setTab('data_quality')}
            className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
              tab === 'data_quality'
                ? 'border-purple-500 text-purple-600'
                : 'border-transparent text-gray-500 hover:text-gray-700'
            }`}
          >
            Trainingsdaten-Qualität (BSI QUAIDAL)
          </button>
        </nav>
      </div>
      {tab === 'data_quality' && <TrainingDataQualityTab />}
      {tab === 'model_quality' && (
        <>
      <div className="grid grid-cols-1 md:grid-cols-4 gap-4">
        <div className="bg-white rounded-xl border border-gray-200 p-6">
          <div className="text-sm text-gray-500">Durchschnittlicher Score</div>
@@ -141,6 +179,8 @@ export default function QualityPage() {
          </div>
        </div>
      </div>
        </>
      )}
      {showMetricModal && (
        <MetricModal
@@ -0,0 +1,69 @@
 package handlers
 import (
 	"net/http"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 )
 // ListCustomerStandardSuggestions handles
 //   GET /api/v1/iace/projects/:id/customer-standards?include_verified=true|false
 //
 // Returns the set of reusable mitigations from prior projects of the same
 // customer. Empty array when the project has no customer_name or no
 // matching priors. The include_verified query flag controls whether
 // status='verified' mitigations are included alongside the explicit
 // is_customer_standard=true ones.
 func (h *IACEHandler) ListCustomerStandardSuggestions(c *gin.Context) {
 	pid, err := uuid.Parse(c.Param("id"))
 	if err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
 		return
 	}
 	includeVerified := c.Query("include_verified") == "true"
 	suggestions, err := h.store.ListCustomerStandardSuggestions(c.Request.Context(), pid, includeVerified)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
 	if suggestions == nil {
 		suggestions = []iace.CustomerStandardSuggestion{}
 	}
 	c.JSON(http.StatusOK, gin.H{
 		"suggestions": suggestions,
 		"count":       len(suggestions),
 	})
 }
 // ImportCustomerStandardSuggestion handles
 //   POST /api/v1/iace/projects/:id/customer-standards/import
 //   Body: { "name": "Sicherheitszeichen nach ISO 7010" }
 //
 // Applies one suggestion to all matching hazards in the current project.
 // New mitigations are created idempotently; existing ones are flipped to
 // is_relevant=true + is_customer_standard=true + status='verified'.
 func (h *IACEHandler) ImportCustomerStandardSuggestion(c *gin.Context) {
 	pid, err := uuid.Parse(c.Param("id"))
 	if err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
 		return
 	}
 	var body struct {
 		Name string `json:"name" binding:"required"`
 	}
 	if err := c.ShouldBindJSON(&body); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}
 	n, err := h.store.ImportCustomerStandardSuggestion(c.Request.Context(), pid, body.Name)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
 	c.JSON(http.StatusOK, gin.H{
 		"imported": n,
 		"name":     body.Name,
 	})
 }
@@ -459,6 +459,11 @@ func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
 		iaceRoutes.GET("/projects/:id/clarifications/:cid/detail", h.ListClarificationDetail)
 		iaceRoutes.POST("/projects/:id/clarifications/:cid/answer", h.AnswerClarification)
 		iaceRoutes.POST("/projects/:id/clarifications/:cid/comment", h.PostClarificationComment)
 		// Customer-Standard Reuse (migration 031): pull reusable mitigations
 		// across prior projects of the same customer.
 		iaceRoutes.GET("/projects/:id/customer-standards", h.ListCustomerStandardSuggestions)
 		iaceRoutes.POST("/projects/:id/customer-standards/import", h.ImportCustomerStandardSuggestion)
 	}
 }
@@ -16,6 +16,7 @@ type CreateProjectRequest struct {
 	MachineName     string          `json:"machine_name" binding:"required"`
 	MachineType     string          `json:"machine_type" binding:"required"`
 	Manufacturer    string          `json:"manufacturer" binding:"required"`
 	CustomerName    string          `json:"customer_name,omitempty"`
 	Description     string          `json:"description,omitempty"`
 	NarrativeText   string          `json:"narrative_text,omitempty"`
 	CEMarkingTarget string          `json:"ce_marking_target,omitempty"`
@@ -27,6 +28,7 @@ type UpdateProjectRequest struct {
 	MachineName     *string          `json:"machine_name,omitempty"`
 	MachineType     *string          `json:"machine_type,omitempty"`
 	Manufacturer    *string          `json:"manufacturer,omitempty"`
 	CustomerName    *string          `json:"customer_name,omitempty"`
 	Description     *string          `json:"description,omitempty"`
 	NarrativeText   *string          `json:"narrative_text,omitempty"`
 	CEMarkingTarget *string          `json:"ce_marking_target,omitempty"`
@@ -19,6 +19,11 @@ type Project struct {
 	MachineName          string          `json:"machine_name"`
 	MachineType          string          `json:"machine_type"`
 	Manufacturer         string          `json:"manufacturer"`
 	// CustomerName is the end customer (Anlagenbetreiber). Optional —
 	// projects without a customer are still valid, but customer-standard
 	// reuse only fires across projects sharing the same non-empty value
 	// (case-insensitive match, see customerKey()).
 	CustomerName         string          `json:"customer_name,omitempty"`
 	Description          string          `json:"description,omitempty"`
 	NarrativeText        string          `json:"narrative_text,omitempty"`
 	Status               ProjectStatus   `json:"status"`
@@ -0,0 +1,211 @@
 package iace
 import (
 	"context"
 	"fmt"
 	"strings"
 	"github.com/google/uuid"
 )
 // CustomerStandardSuggestion aggregates one reusable mitigation across prior
 // projects of the same customer. The same mitigation name may appear in
 // multiple prior projects; we collapse them into a single suggestion and
 // count the prior occurrences so the expert sees a confidence signal.
 type CustomerStandardSuggestion struct {
 	Name              string `json:"name"`
 	ReductionType     string `json:"reduction_type"`
 	Description       string `json:"description"`
 	// SourceProjectCount tells the expert in how many of the customer's
 	// earlier projects this mitigation was already flagged. Higher count
 	// = stronger reuse signal.
 	SourceProjectCount   int      `json:"source_project_count"`
 	SourceProjectNames   []string `json:"source_project_names"`
 	IsCustomerStandard   bool     `json:"is_customer_standard"`
 	HasVerifiedInstances bool     `json:"has_verified_instances"`
 }
 // ListCustomerStandardSuggestions returns reusable mitigations from prior
 // projects of the same customer as projectID. The customer key is the
 // case-insensitive trimmed customer_name; an empty customer_name short-
 // circuits to an empty result.
 //
 // includeVerified=false → only mitigations with is_customer_standard=true
 // includeVerified=true  → also include status='verified' mitigations
 //                         (broader pool, useful when the customer-standard
 //                         habit isn't yet established in the data)
 func (s *Store) ListCustomerStandardSuggestions(
 	ctx context.Context,
 	projectID uuid.UUID,
 	includeVerified bool,
 ) ([]CustomerStandardSuggestion, error) {
 	// Resolve the customer + tenant for the current project.
 	var tenantID uuid.UUID
 	var customerName string
 	err := s.pool.QueryRow(ctx,
 		`SELECT tenant_id, COALESCE(customer_name, '') FROM iace_projects WHERE id = $1`,
 		projectID,
 	).Scan(&tenantID, &customerName)
 	if err != nil {
 		return nil, fmt.Errorf("resolve project for customer-standards: %w", err)
 	}
 	customerName = strings.TrimSpace(customerName)
 	if customerName == "" {
 		return []CustomerStandardSuggestion{}, nil
 	}
 	filterClause := "m.is_customer_standard = TRUE"
 	if includeVerified {
 		filterClause = "(m.is_customer_standard = TRUE OR m.status = 'verified')"
 	}
 	query := fmt.Sprintf(`
 		SELECT
 			m.name,
 			m.reduction_type,
 			MAX(m.description) AS description,
 			COUNT(DISTINCT p.id) AS source_count,
 			array_agg(DISTINCT p.machine_name ORDER BY p.machine_name) AS source_names,
 			BOOL_OR(m.is_customer_standard) AS has_customer_std,
 			BOOL_OR(m.status = 'verified')   AS has_verified
 		FROM iace_mitigations m
 		JOIN iace_hazards   h ON h.id = m.hazard_id
 		JOIN iace_projects  p ON p.id = h.project_id
 		WHERE p.tenant_id = $1
 		  AND p.id <> $2
 		  AND p.archived_at IS NULL
 		  AND LOWER(TRIM(COALESCE(p.customer_name, ''))) = LOWER($3)
 		  AND %s
 		GROUP BY m.name, m.reduction_type
 		ORDER BY source_count DESC, m.name
 	`, filterClause)
 	rows, err := s.pool.Query(ctx, query, tenantID, projectID, customerName)
 	if err != nil {
 		return nil, fmt.Errorf("query customer-standards: %w", err)
 	}
 	defer rows.Close()
 	var out []CustomerStandardSuggestion
 	for rows.Next() {
 		var sg CustomerStandardSuggestion
 		if scanErr := rows.Scan(
 			&sg.Name, &sg.ReductionType, &sg.Description,
 			&sg.SourceProjectCount, &sg.SourceProjectNames,
 			&sg.IsCustomerStandard, &sg.HasVerifiedInstances,
 		); scanErr != nil {
 			return nil, fmt.Errorf("scan customer-standards: %w", scanErr)
 		}
 		out = append(out, sg)
 	}
 	return out, nil
 }
 // ImportCustomerStandardSuggestion applies a suggestion to the current
 // project: for every hazard in the project whose name matches one of the
 // suggestion's source hazards (by mitigation.name → hazard.name pairing in
 // prior projects), it ensures a relevant + customer-standard mitigation
 // exists. New mitigations are inserted via CreateMitigation (idempotent
 // via UNIQUE(hazard_id, name)), existing ones are flipped to
 // is_relevant=true + is_customer_standard=true + status='verified'.
 //
 // Returns the number of mitigations affected (created + updated).
 func (s *Store) ImportCustomerStandardSuggestion(
 	ctx context.Context,
 	projectID uuid.UUID,
 	mitigationName string,
 ) (int, error) {
 	// Find tenant + customer of the target project.
 	var tenantID uuid.UUID
 	var customerName string
 	if err := s.pool.QueryRow(ctx,
 		`SELECT tenant_id, COALESCE(customer_name, '') FROM iace_projects WHERE id = $1`,
 		projectID,
 	).Scan(&tenantID, &customerName); err != nil {
 		return 0, fmt.Errorf("resolve project: %w", err)
 	}
 	customerName = strings.TrimSpace(customerName)
 	if customerName == "" {
 		return 0, fmt.Errorf("project has no customer_name — nothing to reuse")
 	}
 	// Collect the hazard names this mitigation was attached to in the
 	// customer's prior projects + a representative reduction_type/description.
 	priorRows, err := s.pool.Query(ctx, `
 		SELECT DISTINCT h.name, m.reduction_type, COALESCE(m.description, '')
 		FROM iace_mitigations m
 		JOIN iace_hazards  h ON h.id = m.hazard_id
 		JOIN iace_projects p ON p.id = h.project_id
 		WHERE p.tenant_id = $1
 		  AND p.id <> $2
 		  AND p.archived_at IS NULL
 		  AND LOWER(TRIM(COALESCE(p.customer_name, ''))) = LOWER($3)
 		  AND m.name = $4
 	`, tenantID, projectID, customerName, mitigationName)
 	if err != nil {
 		return 0, fmt.Errorf("collect prior hazards: %w", err)
 	}
 	defer priorRows.Close()
 	type proto struct{ hazardName, reductionType, description string }
 	var prototypes []proto
 	for priorRows.Next() {
 		var p proto
 		if err := priorRows.Scan(&p.hazardName, &p.reductionType, &p.description); err != nil {
 			return 0, err
 		}
 		prototypes = append(prototypes, p)
 	}
 	if len(prototypes) == 0 {
 		return 0, nil
 	}
 	// For every prototype hazard name, find the matching hazard in the
 	// current project (same name) and ensure a relevant + customer-standard
 	// mitigation with mitigationName exists for it.
 	affected := 0
 	for _, p := range prototypes {
 		var hazardIDs []uuid.UUID
 		hazRows, err := s.pool.Query(ctx,
 			`SELECT id FROM iace_hazards WHERE project_id = $1 AND name = $2`,
 			projectID, p.hazardName,
 		)
 		if err != nil {
 			return affected, fmt.Errorf("find target hazards: %w", err)
 		}
 		for hazRows.Next() {
 			var hid uuid.UUID
 			if scanErr := hazRows.Scan(&hid); scanErr != nil {
 				hazRows.Close()
 				return affected, scanErr
 			}
 			hazardIDs = append(hazardIDs, hid)
 		}
 		hazRows.Close()
 		for _, hid := range hazardIDs {
 			// Idempotent insert; UPDATE sets relevance + verified state.
 			_, err := s.CreateMitigation(ctx, CreateMitigationRequest{
 				HazardID:      hid,
 				Name:          mitigationName,
 				Description:   p.description,
 				ReductionType: ReductionType(p.reductionType),
 			})
 			if err != nil {
 				return affected, fmt.Errorf("create mitigation: %w", err)
 			}
 			if _, err := s.pool.Exec(ctx, `
 				UPDATE iace_mitigations
 				   SET is_relevant = TRUE,
 				       is_customer_standard = TRUE,
 				       status = 'verified',
 				       updated_at = NOW()
 				 WHERE hazard_id = $1 AND name = $2
 			`, hid, mitigationName); err != nil {
 				return affected, fmt.Errorf("upgrade mitigation: %w", err)
 			}
 			affected++
 		}
 	}
 	return affected, nil
 }
@@ -23,6 +23,7 @@ func (s *Store) CreateProject(ctx context.Context, tenantID uuid.UUID, req Creat
 		MachineName:     req.MachineName,
 		MachineType:     req.MachineType,
 		Manufacturer:    req.Manufacturer,
 		CustomerName:    req.CustomerName,
 		Description:     req.Description,
 		NarrativeText:   req.NarrativeText,
 		Status:          ProjectStatusDraft,
@@ -35,19 +36,19 @@ func (s *Store) CreateProject(ctx context.Context, tenantID uuid.UUID, req Creat
 	_, err := s.pool.Exec(ctx, `
 		INSERT INTO iace_projects (
 			id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
-			description, narrative_text, status, ce_marking_target,
+			customer_name, description, narrative_text, status, ce_marking_target,
 			completeness_score, risk_summary, triggered_regulations, metadata,
 			created_at, updated_at, archived_at
 		) VALUES (
 			$1, $2, $3, $4, $5, $6,
-			$7, $8, $9, $10,
+			$7, $8, $9, $10, $11,
-			$11, $12, $13, $14,
+			$12, $13, $14, $15,
-			$15, $16, $17
+			$16, $17, $18
 		)
 	`,
 		project.ID, project.TenantID, project.ParentProjectID,
 		project.MachineName, project.MachineType, project.Manufacturer,
-		project.Description, project.NarrativeText, string(project.Status), project.CEMarkingTarget,
+		project.CustomerName, project.Description, project.NarrativeText, string(project.Status), project.CEMarkingTarget,
 		project.CompletenessScore, nil, project.TriggeredRegulations, project.Metadata,
 		project.CreatedAt, project.UpdatedAt, project.ArchivedAt,
 	)
@@ -67,13 +68,13 @@ func (s *Store) GetProject(ctx context.Context, id uuid.UUID) (*Project, error)
 	err := s.pool.QueryRow(ctx, `
 		SELECT
 			id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
-			description, narrative_text, status, ce_marking_target,
+			COALESCE(customer_name, ''), description, narrative_text, status, ce_marking_target,
 			completeness_score, risk_summary, triggered_regulations, metadata,
 			created_at, updated_at, archived_at
 		FROM iace_projects WHERE id = $1
 	`, id).Scan(
 		&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
-		&p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
+		&p.CustomerName, &p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
 		&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
 		&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
 	)
@@ -97,7 +98,7 @@ func (s *Store) ListProjects(ctx context.Context, tenantID uuid.UUID) ([]Project
 	rows, err := s.pool.Query(ctx, `
 		SELECT
 			id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
-			description, narrative_text, status, ce_marking_target,
+			COALESCE(customer_name, ''), description, narrative_text, status, ce_marking_target,
 			completeness_score, risk_summary, triggered_regulations, metadata,
 			created_at, updated_at, archived_at
 		FROM iace_projects WHERE tenant_id = $1
@@ -116,7 +117,7 @@ func (s *Store) ListProjects(ctx context.Context, tenantID uuid.UUID) ([]Project
 		err := rows.Scan(
 			&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
-			&p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
+			&p.CustomerName, &p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
 			&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
 			&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
 		)
@@ -156,6 +157,9 @@ func (s *Store) UpdateProject(ctx context.Context, id uuid.UUID, req UpdateProje
 	if req.Manufacturer != nil {
 		project.Manufacturer = *req.Manufacturer
 	}
 	if req.CustomerName != nil {
 		project.CustomerName = *req.CustomerName
 	}
 	if req.Description != nil {
 		project.Description = *req.Description
 	}
@@ -174,11 +178,13 @@ func (s *Store) UpdateProject(ctx context.Context, id uuid.UUID, req UpdateProje
 	_, err = s.pool.Exec(ctx, `
 		UPDATE iace_projects SET
 			machine_name = $2, machine_type = $3, manufacturer = $4,
-			description = $5, narrative_text = $6, ce_marking_target = $7,
+			customer_name = $5,
-			metadata = $8, updated_at = $9
+			description = $6, narrative_text = $7, ce_marking_target = $8,
 			metadata = $9, updated_at = $10
 		WHERE id = $1
 	`,
 		id, project.MachineName, project.MachineType, project.Manufacturer,
 		project.CustomerName,
 		project.Description, project.NarrativeText, project.CEMarkingTarget,
 		project.Metadata, project.UpdatedAt,
 	)
@@ -250,7 +256,7 @@ func (s *Store) ListVariants(ctx context.Context, parentID uuid.UUID) ([]Project
 	rows, err := s.pool.Query(ctx, `
 		SELECT
 			id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
-			description, narrative_text, status, ce_marking_target,
+			COALESCE(customer_name, ''), description, narrative_text, status, ce_marking_target,
 			completeness_score, risk_summary, triggered_regulations, metadata,
 			created_at, updated_at, archived_at
 		FROM iace_projects WHERE parent_project_id = $1
@@ -269,7 +275,7 @@ func (s *Store) ListVariants(ctx context.Context, parentID uuid.UUID) ([]Project
 		err := rows.Scan(
 			&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
-			&p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
+			&p.CustomerName, &p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
 			&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
 			&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
 		)
@@ -0,0 +1,27 @@
 -- Migration 031: customer_name on iace_projects + reuse-helper index
 -- ==========================================================================
 -- The IACE module is operated by a single Maschinenhersteller (the SDK
 -- user), but their plants land at many different end customers. A safety
 -- expert who commissions the second or third plant at the same customer
 -- often finds that whole classes of mitigations are already in place
 -- there (company-wide PPE rules, locked-out energy isolation, customer-
 -- standard signage, etc.). Today, this expert knowledge is rediscovered
 -- per project.
 --
 -- This migration introduces a plain customer_name field on the project
 -- (no separate customer table yet — Option A from the design discussion;
 -- normalised iace_customers can come later when a real customer-management
 -- screen is built). The field is optional so existing projects without a
 -- customer remain valid.
 --
 -- The partial index makes the customer-standards lookup cheap: only
 -- projects with a non-empty customer_name participate, since reuse is
 -- meaningless without it.
 -- ==========================================================================
 ALTER TABLE iace_projects
    ADD COLUMN IF NOT EXISTS customer_name TEXT;
 CREATE INDEX IF NOT EXISTS idx_iace_projects_customer_name
    ON iace_projects(customer_name)
    WHERE customer_name IS NOT NULL AND customer_name <> '';
@@ -46,6 +46,11 @@ class ComplianceCheckRequest(BaseModel):
    documents: list[DocumentInput]
    use_agent: bool = False
    recipient: str = "dsb@breakpilot.local"
    # P12: Override fuer TDM-Vorbehalt bei dokumentierter Kunden-Erlaubnis.
    # Pflichtfeld tdm_override_reason wenn tdm_override=True
    # (z.B. "Auftragsbeziehung Safetykon GmbH, Email Hr. X 18.05.2026").
    tdm_override: bool = False
    tdm_override_reason: str = ""
 class ComplianceCheckStartResponse(BaseModel):
@@ -166,6 +171,50 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
        except Exception:
            pass
        # P7: TDM-Reservation-Check der Base-Domain (§ 44b UrhG).
        # Bei reserved/denied: Run sofort beenden, kein Crawl.
        try:
            from compliance.services.tdm_reservation_check import (
                check_tdm_reservation, is_crawl_allowed,
            )
            first_url = next(
                (d.url for d in req.documents if d.url), "",
            )
            if first_url:
                tdm = await check_tdm_reservation(first_url)
                _compliance_check_jobs[check_id]["tdm"] = tdm
                # P12: Bei tdm_override + Reason wird NICHT abgebrochen,
                # sondern nur dokumentiert. Override ohne Reason wird ignoriert.
                override_active = (
                    req.tdm_override
                    and len((req.tdm_override_reason or "").strip()) >= 10
                )
                if not is_crawl_allowed(tdm) and not override_active:
                    _compliance_check_jobs[check_id]["status"] = "skipped_tdm"
                    _compliance_check_jobs[check_id]["error"] = (
                        f"TDM-Vorbehalt fuer {tdm.get('domain')} erkannt "
                        f"(status={tdm.get('status')}) — Crawl nach § 44b "
                        f"UrhG nicht zulaessig. Signals: "
                        f"{[s.get('src') for s in tdm.get('signals', [])]}"
                    )
                    _compliance_check_jobs[check_id]["progress_pct"] = 100
                    logger.info("TDM-skip check_id=%s domain=%s status=%s",
                                check_id, tdm.get("domain"), tdm.get("status"))
                    return
                if override_active and not is_crawl_allowed(tdm):
                    _compliance_check_jobs[check_id]["tdm_override"] = {
                        "reason": req.tdm_override_reason.strip()[:500],
                        "original_status": tdm.get("status"),
                    }
                    logger.warning(
                        "TDM-Override aktiv: check_id=%s domain=%s "
                        "status=%s reason=%r",
                        check_id, tdm.get("domain"), tdm.get("status"),
                        req.tdm_override_reason.strip()[:80],
                    )
        except Exception as e:
            logger.warning("TDM-check failed (proceeding): %s", e)
        # Step 1: Resolve texts (fetch from URL if needed) — 0-30%
        _update(check_id, "Texte werden geladen...", 1)
        doc_texts: dict[str, str] = {}
@@ -226,9 +275,73 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
            if entry.get("text"):
                doc_texts[entry["doc_type"]] = entry["text"]
        # P15: Dedupe — wenn mehrere Doc-Types DASSELBE Dokument referenzieren
        # (z.B. Safetykon: User gibt /datenschutz fuer dse + cookie + widerruf),
        # behalten wir nur den primaeren Doc-Type. Andere: leeren + note.
        # Priorität: dse > impressum > cookie > widerruf > agb > nutzungsbedingungen
        _DOC_PRIORITY = ["dse", "impressum", "cookie", "widerruf", "agb",
                         "nutzungsbedingungen", "social_media", "dsb"]
        seen_text_hash: dict[int, str] = {}
        for dt in _DOC_PRIORITY:
            entry = next((e for e in doc_entries if e.get("doc_type") == dt
                          and e.get("text")), None)
            if not entry:
                continue
            text_hash = hash((entry.get("text") or "").strip()[:1000])
            if text_hash in seen_text_hash:
                primary = seen_text_hash[text_hash]
                logger.info(
                    "P15 dedup: doc_type=%s referenziert dasselbe Dokument "
                    "wie %s (URL=%s) -> als Duplikat markiert.",
                    dt, primary, entry.get("url", "")[:60],
                )
                entry["text"] = ""
                entry["word_count"] = 0
                entry["url"] = ""
                entry["dup_of"] = primary
                doc_texts.pop(dt, None)
            else:
                seen_text_hash[text_hash] = dt
        # Step 2: Detect business profile (35-40%)
        _update(check_id, "Geschaeftsmodell wird erkannt...", 37)
-        profile = await detect_business_profile(doc_texts)
+        # P16: Homepage-Text mit fuer Profile-Detection (no_direct_sales
        # B2B-Indikatoren wie "CE-Zertifizierung" / "Schulungen" stehen oft
        # nur im Homepage-Menue, nicht im Pflichttext).
        profile_input = dict(doc_texts)
        try:
            base_url = ""
            for e in doc_entries:
                if e.get("url"):
                    from urllib.parse import urlparse
                    p = urlparse(e["url"])
                    if p.scheme and p.netloc:
                        base_url = f"{p.scheme}://{p.netloc}/"
                        break
            if base_url:
                import re as _re
                async with httpx.AsyncClient(
                    timeout=8.0, follow_redirects=True,
                    headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
                             "AppleWebKit/537.36 HeadlessChrome/120.0.0.0"},
                ) as _hc:
                    _hr = await _hc.get(base_url)
                    if _hr.status_code == 200 and "text/html" in _hr.headers.get(
                            "content-type", ""):
                        _html = _hr.text[:60000]
                        _html = _re.sub(r"<script[^>]*>.*?</script>", " ",
                                        _html, flags=_re.DOTALL | _re.IGNORECASE)
                        _html = _re.sub(r"<style[^>]*>.*?</style>", " ",
                                        _html, flags=_re.DOTALL | _re.IGNORECASE)
                        _html = _re.sub(r"<[^>]+>", " ", _html)
                        _html = _re.sub(r"\s+", " ", _html).strip()
                        if len(_html.split()) > 30:
                            profile_input["__homepage"] = _html[:20000]
                            logger.info("P16 homepage merged for profile: %d words",
                                        len(_html.split()))
        except Exception as e:
            logger.debug("homepage fetch for profile failed: %s", e)
        profile = await detect_business_profile(profile_input)
        profile_dict = asdict(profile)
        # Step 3: Check each document
@@ -274,6 +387,15 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
            _update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct)
            if not text or len(text) < 50:
                # P15: duplicate doc that was deduped against a primary doc
                if entry.get("dup_of"):
                    results.append(DocCheckResult(
                        label=label, url="", doc_type=doc_type,
                        error=f"Nicht separat vorhanden — wird im Dokument "
                              f"'{_doc_type_label(entry['dup_of'])}' "
                              f"mit-geprueft.",
                    ))
                    continue
                # Empty entry — either from auto-discovery padding (no URL
                # to fetch) or from a fetch that returned nothing. If there
                # was a URL we keep the error so the user knows the fetch
@@ -290,6 +412,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
                text, doc_type, label, url,
                entry["word_count"], use_agent_flag,
                business_scope=business_scope,
                business_profile={"no_direct_sales": getattr(profile, "no_direct_sales", False)},
            )
            # Apply profile context filter
@@ -413,6 +536,15 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
                        cookie_payloads.extend(e["cmp_payloads"])
                    if e.get("text"):
                        cookie_text = e["text"]
            # P17-D: Fallback wenn cookie via P15 deduped wurde — nutze DSE-Text
            # sofern Cookie-Begriffe drin sind, damit LLM-Vendor-Extract trotzdem
            # greifen kann.
            if not cookie_text and not cookie_payloads:
                dse_t = doc_texts.get("dse", "")
                if dse_t and any(w in dse_t.lower() for w in
                                  ("cookie", "tracking", "google analytics", "consent")):
                    cookie_text = dse_t
                    logger.info("P17-D: vendor-extract Fallback auf DSE (Cookie deduped)")
            # Site-owner derived from the submitted URLs — drives the
            # INTERNAL/GROUP_COMPANY classification of vendor records.
            owner_name = _company_name_from_url(doc_entries) or ""
@@ -526,15 +658,78 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
        report_html = build_html_report(results, None, doc_texts)
        profile_html = _build_profile_html(profile)
-        # O4: Vendor-Redundanz / EU-Alternativen + Cost-Savings-Block —
+        # O4: Vendor-Redundanz / EU-Alternativen + Cost-Savings-Block
        # zwischen VVT und Doc-Report einsortiert, damit Geschaeftsfuehrung
        # die Einsparung sieht bevor sie in die Detail-Pruefung geht.
        from .agent_doc_check_redundancy import build_redundancy_html
        redundancy_html = build_redundancy_html(redundancy_report)
        # P1: Executive-Summary GANZ oben — CFO/GF sieht 4 KPIs + 2 CTAs.
        from .agent_doc_check_exec_summary import build_exec_summary_html
        # Site-Name fuer Header bestimmen (gleiche Logik wie Email-Subject)
        url_company_for_exec = _company_name_from_url(doc_entries)
        domain_for_exec = _extract_domain(doc_entries)
        site_name_for_exec = url_company_for_exec or domain_for_exec or ""
        exec_summary_html = build_exec_summary_html(
            scorecard=scorecard,
            previous_scorecard=prev_scorecard,
            cmp_vendors=cmp_vendors,
            redundancy_report=redundancy_report,
            site_name=site_name_for_exec,
        )
        # P10: Cookie-Policy-Architecture-Detection (BMW-Pattern erkennen)
        cookie_arch_html = ""
        try:
            from compliance.services.cookie_policy_architecture import (
                detect_architecture, build_architecture_html,
            )
            cookie_doc_url = ""
            cookie_doc_text = doc_texts.get("cookie", "")
            cookie_cmp_payloads: list[dict] = []
            for e in doc_entries:
                if (e.get("doc_type") or "").lower() in ("cookie", "cookie_policy"):
                    cookie_doc_url = e.get("url", "")
                    cookie_cmp_payloads = e.get("cmp_payloads") or []
                    break
            # P17-A: Fallback wenn Cookie-Doc via P15 deduped wurde — nutze
            # den DSE-Text wenn er Cookie-Schluesselwoerter enthaelt.
            if not cookie_doc_text:
                dse_text = doc_texts.get("dse", "")
                if dse_text and any(w in dse_text.lower() for w in
                                     ("cookie", "tracking", "google analytics",
                                      "consent")):
                    cookie_doc_text = dse_text
                    dse_entry = next((e for e in doc_entries
                                      if e.get("doc_type") == "dse"), {})
                    cookie_doc_url = dse_entry.get("url", "")
                    cookie_cmp_payloads = dse_entry.get("cmp_payloads") or []
                    logger.info("P17-A: cookie-arch fallback auf DSE (Cookie-Doc deduped)")
            if cookie_doc_text:
                arch = detect_architecture(
                    doc_url=cookie_doc_url,
                    doc_text=cookie_doc_text,
                    cmp_payloads=cookie_cmp_payloads,
                    homepage_cmp_payloads=cmp_payloads or [],
                )
                cookie_arch_html = build_architecture_html(arch)
                logger.info("cookie-arch: layer=%s versioned=%s risk=%s",
                            arch["layer_separation"], arch["versioned"], arch["risk_label"])
        except Exception as e:
            logger.warning("cookie-architecture detection failed: %s", e)
        # Reihenfolge — Sales-optimiert:
        #   1) Exec-Summary (KPIs + Saving + CTAs)
        #   2) summary_html (Konkrete Aufgaben fuer die Geschaeftsfuehrung)
        #   3) scanned_urls (Quellen-Transparenz)
        #   4) profile_html (Erkanntes Geschaeftsmodell)
        #   5) scorecard_html (MC-Scorecard)
        #   6) redundancy_html (Optimierungspotenzial — direkt nach Compliance-Score)
        #   7) providers_html + vvt_html (Vendor-Liste)
        #   8) report_html (Doc-Pruefung Details)
        full_html = (
-            summary_html + scanned_html + profile_html + scorecard_html
+            exec_summary_html + cookie_arch_html + summary_html
-            + providers_html + vvt_html + redundancy_html + report_html
+            + scanned_html + profile_html
            + scorecard_html + redundancy_html
            + providers_html + vvt_html + report_html
        )
        # Step 6: Send email — derive site name primarily from entered URL.
@@ -619,6 +814,21 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
                vendors=cmp_vendors,
                profile=extracted_profile,
            )
            # Unified findings (P5): bundle MC + Pflichtangaben + Vendor +
            # Redundanz in one searchable table behind /agent/findings/<id>.
            try:
                from compliance.services.unified_findings_collector import collect
                from compliance.services.unified_findings_store import record_findings
                unified = collect(
                    check_id=check_id,
                    results=results,
                    cmp_vendors=cmp_vendors,
                    redundancy_report=redundancy_report,
                    doc_texts=doc_texts,
                )
                record_findings(check_id, unified)
            except Exception as e:
                logger.warning("Unified findings collect failed: %s", e)
        except Exception as e:
            logger.warning("Audit persistence skipped: %s", e)
@@ -696,11 +906,19 @@ async def _fetch_text(url: str, doc_type: str = "") -> tuple[str, list[dict]]:
    except Exception as e:
        logger.warning("Consent-tester fetch failed for %s: %s", url, e)
-    # 2. Fallback: direct HTTP fetch (works for SSR pages like BMW)
+    # 2. Fallback: direct HTTP fetch (works for SSR pages like BMW).
    # P7: kenntlicher UA + per-Domain Rate-Limit.
    try:
        import re as _re
-        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        from compliance.services.compliance_user_agent import (
-            resp = await client.get(url)
+            default_request_headers, DomainRateLimiter,
        )
        async with httpx.AsyncClient(
            timeout=30.0, follow_redirects=True,
            headers=default_request_headers(),
        ) as client:
            async with DomainRateLimiter(url):
                resp = await client.get(url)
            if resp.status_code == 200 and "text/html" in resp.headers.get("content-type", ""):
                html = resp.text
                # Strip HTML tags, decode entities
@@ -929,6 +1147,7 @@ async def _check_single(
    text: str, doc_type: str, label: str, url: str,
    word_count: int, use_agent: bool,
    business_scope: set[str] | None = None,
    business_profile: dict | None = None,
 ):
    """Run regex + MC checks on a single document."""
    from compliance.services.doc_checks.runner import check_document_completeness
@@ -936,7 +1155,8 @@ async def _check_single(
    from .agent_doc_check_routes import CheckItem, DocCheckResult
    # Regex checklist
-    findings = check_document_completeness(text, doc_type, label, url)
+    findings = check_document_completeness(text, doc_type, label, url,
                                           business_profile=business_profile)
    all_checks: list[CheckItem] = []
    completeness = 0
@@ -1135,8 +1355,25 @@ def _company_name_from_url(doc_entries: list[dict]) -> str | None:
 def _get_skip_types(profile) -> dict[str, str]:
-    """Doc_types to skip entirely. Currently empty — we check everything
+    """Doc_types to skip entirely with a per-type reason message.
-    and flag irrelevant items as INFO instead of skipping."""
+
    Heute primaer fuer OEM-Konfigurator-Pattern (BMW/Audi/Mercedes):
    wenn die Site kein Direkt-Vertrieb macht, sind AGB/Widerruf/
    Nutzungsbedingungen nicht Pflicht auf der Website — sie werden
    beim Vertragshaendler ausgehaendigt.
    """
    if getattr(profile, "no_direct_sales", False):
        msg = (
            "Nicht anwendbar — die Webseite schliesst keinen Direkt-"
            "Kaufvertrag (OEM-Konfigurator-Pattern, Vertrag laeuft "
            "ueber Vertragshaendler). AGB/Widerruf werden beim "
            "Haendler ausgehaendigt."
        )
        return {
            "agb": msg,
            "widerruf": msg,
            "nutzungsbedingungen": msg,
        }
    return {}
@@ -0,0 +1,135 @@
 """
 Executive-Summary-Block — der oberste Email-Abschnitt.
 Zeigt CFO / GF in 4 Zahlen den Gesamt-Mehrwert des Compliance-Checks:
  1) Compliance-Score (Trend vs Vorlauf)
  2) Anzahl analysierter Anbieter
  3) Geschaetztes jaehrliches Sparpotenzial (Range)
  4) Konsolidierungs-Potenzial (Anbieter koennen reduziert werden)
 Plus zwei Big-CTA-Buttons:
  - "Compliance-Maengel im Detail"  → springt zum Doc-Pruefungs-Block
  - "Konsolidierungs-Plan ansehen"  → springt zum Redundanz-Block
 Ziel: in 5 Sekunden sieht der Vorstand den ROI. Wenn neugierig, scrollt
 er weiter in die Detail-Bloecke (die UNTER dieser Summary liegen).
 """
 from __future__ import annotations
 def _fmt_eur_range(low: int, high: int) -> str:
    if not low and not high:
        return "—"
    if low == high:
        return f"~{low:,} €".replace(",", ".")
    return f"{low:,}–{high:,} €".replace(",", ".")
 def build_exec_summary_html(
    scorecard: dict | None,
    previous_scorecard: dict | None,
    cmp_vendors: list[dict] | None,
    redundancy_report: dict | None,
    site_name: str = "",
 ) -> str:
    """Build the top-of-email Executive Summary with 4 KPIs + 2 CTAs."""
    # 1) Compliance-Score
    pct = 0
    delta_str = ""
    score_color = "#94a3b8"
    if scorecard:
        totals = scorecard.get("totals") or {}
        pct = int(totals.get("pct", 0))
        score_color = ("#16a34a" if pct >= 80 else
                       "#d97706" if pct >= 50 else "#dc2626")
        if previous_scorecard:
            prev_pct = int((previous_scorecard.get("totals") or {}).get("pct", 0))
            d = pct - prev_pct
            if d:
                trend_color = "#16a34a" if d > 0 else "#dc2626"
                delta_str = (
                    f'<span style="font-size:14px;color:{trend_color};margin-left:6px">'
                    f'{"+" if d > 0 else ""}{d} pp</span>'
                )
    # 2) Vendor-Count
    n_vendors = len(cmp_vendors or [])
    # 3+4) Saving + Konsolidierung
    s = (redundancy_report or {}).get("summary") or {}
    sav_low, sav_high = s.get("estimated_saving_year_eur", [0, 0])
    n_consolidation = s.get("consolidation_potential", 0)
    sav_pct = s.get("estimated_saving_pct", "—")
    parts = [
        '<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
        'max-width:700px;margin:0 auto 18px;padding:18px 22px;'
        'background:linear-gradient(135deg,#1e293b 0%,#0f172a 100%);'
        'border-radius:10px;color:white">',
        f'<div style="font-size:11px;color:#94a3b8;text-transform:uppercase;'
        f'letter-spacing:1.5px;margin-bottom:6px">Executive Summary</div>',
        f'<h2 style="margin:0 0 16px;font-size:18px;color:white">'
        f'Compliance-Check {site_name}</h2>',
        # 2x2 KPI grid
        '<table style="width:100%;border-collapse:separate;border-spacing:8px">',
        # Row 1: Compliance + Vendor count
        '<tr>',
        f'<td style="width:50%;padding:12px 14px;background:rgba(255,255,255,0.05);'
        f'border-radius:6px;border:1px solid rgba(255,255,255,0.08)">'
        f'<div style="font-size:10px;color:#94a3b8;text-transform:uppercase;'
        f'letter-spacing:1px;margin-bottom:4px">DSGVO / TDDDG / TMG Score</div>'
        f'<div style="font-size:28px;font-weight:700;color:{score_color}">'
        f'{pct}%{delta_str}</div>'
        f'<div style="font-size:11px;color:#cbd5e1;margin-top:2px">'
        f'aus {int((scorecard or {}).get("totals", {}).get("total", 0))} Pflicht-Pruefungen</div>'
        f'</td>',
        f'<td style="width:50%;padding:12px 14px;background:rgba(255,255,255,0.05);'
        f'border-radius:6px;border:1px solid rgba(255,255,255,0.08)">'
        f'<div style="font-size:10px;color:#94a3b8;text-transform:uppercase;'
        f'letter-spacing:1px;margin-bottom:4px">Identifizierte Anbieter</div>'
        f'<div style="font-size:28px;font-weight:700;color:white">{n_vendors}</div>'
        f'<div style="font-size:11px;color:#cbd5e1;margin-top:2px">'
        f'davon {n_consolidation} konsolidierbar</div>'
        f'</td>',
        '</tr>',
        # Row 2: Saving + CTA-Hinweis
        '<tr>',
        f'<td colspan="2" style="padding:14px 16px;background:linear-gradient(90deg,'
        f'rgba(16,185,129,0.15) 0%,rgba(16,185,129,0.05) 100%);'
        f'border-radius:6px;border:1px solid rgba(16,185,129,0.3)">'
        f'<div style="font-size:10px;color:#86efac;text-transform:uppercase;'
        f'letter-spacing:1px;margin-bottom:4px">'
        f'Geschaetztes Sparpotenzial pro Jahr (Tool-Lizenzen, ohne Media-Spend)</div>'
        f'<div style="font-size:24px;font-weight:700;color:#34d399">'
        f'{_fmt_eur_range(sav_low, sav_high)}'
        f'<span style="font-size:14px;color:#86efac;margin-left:8px">({sav_pct})</span></div>'
        f'<div style="font-size:11px;color:#cbd5e1;margin-top:4px">'
        f'durch Konsolidierung redundanter Anbieter auf je 1 EU-Tool pro '
        f'Funktions-Kategorie. <em>Schaetzbereich, mit dem Einkauf zu verifizieren.</em>'
        f'</div></td>',
        '</tr>',
        '</table>',
        # CTAs
        '<div style="margin-top:14px;padding-top:12px;border-top:1px solid '
        'rgba(255,255,255,0.1);text-align:center">',
        '<a href="#mc-scorecard" style="display:inline-block;padding:8px 16px;'
        'background:#7c3aed;color:white;text-decoration:none;border-radius:6px;'
        'font-size:12px;font-weight:600;margin-right:8px">'
        'Compliance-Maengel im Detail &rarr;</a>',
        '<a href="#optimierungspotenzial" style="display:inline-block;padding:8px 16px;'
        'background:#10b981;color:white;text-decoration:none;border-radius:6px;'
        'font-size:12px;font-weight:600">'
        'Konsolidierungs-Plan &rarr;</a>',
        '</div>',
        '</div>',
    ]
    return "".join(parts)
@@ -421,10 +421,18 @@ def _render_vendor_row_full(v: dict) -> str:
            f'{", ".join(flags[:4])}</div>'
            f'{actions_html}'
        )
    risk = v.get("compliance_risk") or {}
    risk_label = risk.get("label") or ""
    risk_badge = ""
    if risk_label and risk_label != "unklar":
        rc = {"kritisch": ("#dc2626", "#fff"), "hoch": ("#fecaca", "#991b1b"),
              "mittel": ("#fde68a", "#92400e"), "gering": ("#d1fae5", "#065f46")}.get(risk_label, ("#e5e7eb", "#475569"))
        risk_badge = (f'<span style="margin-left:6px;padding:1px 5px;border-radius:3px;font-size:9px;'
                      f'background:{rc[0]};color:{rc[1]}">Risk: {risk_label}</span>')
    return (
        f'<tr style="border-top:1px solid #e2e8f0">'
        f'<td style="padding:6px 8px;color:#1e293b;font-size:11px">'
-        f'{name}{flag_str}</td>'
+        f'{name}{risk_badge}{flag_str}</td>'
        f'<td style="padding:6px 8px;color:#475569;font-size:11px">{category}</td>'
        f'<td style="padding:6px 8px;color:#475569;font-size:11px">{country}</td>'
        f'<td style="padding:6px 8px;text-align:center;color:#475569;font-size:11px">'
@@ -28,9 +28,10 @@ def build_redundancy_html(report: dict | None) -> str:
    pct = s.get("estimated_saving_pct") or "n/a"
    parts = [
-        '<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
+        '<div id="optimierungspotenzial" style="font-family:-apple-system,'
-        'max-width:700px;margin:0 auto 16px;padding:14px 18px;'
+        'BlinkMacSystemFont,sans-serif;max-width:700px;margin:0 auto 16px;'
-        'background:#fef3c7;border:1px solid #fcd34d;border-radius:8px">',
+        'padding:14px 18px;background:#fef3c7;border:1px solid #fcd34d;'
        'border-radius:8px">',
        '<h3 style="margin:0 0 6px;font-size:14px;color:#92400e">'
        'Optimierungspotenzial: Redundanzen + EU-Alternativen</h3>',
        f'<p style="margin:0 0 10px;font-size:11px;color:#78350f">'
@@ -134,7 +134,9 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
    ok = [r for r in results if r.completeness_pct == 100 and not r.error]
    fixable = [r for r in results if 0 < r.completeness_pct < 100 and not r.error]
    critical = [r for r in results if r.completeness_pct == 0 and not r.error]
-    errors = [r for r in results if r.error]
+    not_applicable = [r for r in results if r.error
                      and r.error.startswith("Nicht anwendbar")]
    errors = [r for r in results if r.error and r not in not_applicable]
    html = [
        '<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
@@ -150,17 +152,24 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
        html.append('<p>Keine Dokumente geprueft.</p></div>')
        return "\n".join(html)
    na_note = (
        f' Zusaetzlich {len(not_applicable)} Dokument{"" if len(not_applicable) == 1 else "e"} '
        f'als NICHT ANWENDBAR markiert (kein Direkt-Vertrieb — '
        f'OEM-Konfigurator-Pattern).' if not_applicable else ""
    )
    if len(ok) == total:
        html.append(
-            '<p style="color:#16a34a;font-weight:600;font-size:15px">'
+            f'<p style="color:#16a34a;font-weight:600;font-size:15px">'
-            'Alle Dokumente sind vollstaendig. Keine dringenden Massnahmen noetig.</p>'
+            f'Alle Dokumente sind vollstaendig. Keine dringenden Massnahmen noetig.'
            f'{na_note}</p>'
        )
    else:
        html.append(
            f'<p style="font-size:14px;color:#475569">'
            f'{len(ok)} von {total} Dokumenten sind vollstaendig. '
            f'{len(fixable)} brauchen Korrekturen'
-            f'{f", {len(critical)} fehlen oder sind unbrauchbar" if critical else ""}.</p>'
+            f'{f", {len(critical)} fehlen oder sind unbrauchbar" if critical else ""}.'
            f'{na_note}</p>'
        )
    # Concrete actions
@@ -173,7 +182,7 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
            if c.level == 1 and not c.passed and not c.skipped
            and c.severity != "INFO"
        ]
-        for c in failed_checks[:3]:  # Max 3 per document
+        for c in failed_checks:  # P17-B: kein Per-Doc-Cap
            action = _check_to_action(r.label, c.label, c.hint)
            if action:
                actions.append(action)
@@ -184,7 +193,7 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
            'Konkrete Aufgaben:</h3>'
            '<ol style="font-size:13px;color:#475569;padding-left:20px;margin:0">'
        )
-        for a in actions[:10]:  # Max 10 actions
+        for a in actions[:20]:  # P17-B: 10 -> 20
            html.append(f'<li style="margin-bottom:6px">{a}</li>')
        html.append('</ol>')
@@ -279,10 +288,13 @@ def _render_document(html: list[str], r: DocCheckResult, doc_text: str = "") ->
        r.error.startswith("Nicht eingereicht")
        or r.error.startswith("Auf der Website nicht gefunden")
    )
    is_not_applicable = bool(r.error) and r.error.startswith("Nicht anwendbar")
    if is_missing:
        status_label = ("NICHT GEFUNDEN"
                        if r.error.startswith("Auf der Website")
                        else "NICHT EINGEREICHT")
    elif is_not_applicable:
        status_label = "NICHT ANWENDBAR"
    elif r.error:
        status_label = "FEHLER"
@@ -330,6 +342,13 @@ def _render_document(html: list[str], r: DocCheckResult, doc_text: str = "") ->
            'background:#fafafa;border-top:1px solid #f3f4f6">'
            + body_msg + '</div>'
        )
    elif is_not_applicable:
        html.append(
            '<div style="padding:12px 16px;color:#475569;font-size:12px;'
            'background:#f1f5f9;border-top:1px solid #cbd5e1;border-left:'
            '3px solid #94a3b8">'
            + r.error + '</div>'
        )
    elif r.error:
        html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
    else:
@@ -44,7 +44,7 @@ def build_scorecard_html(
    trend_str = _delta_badge(overall_pct, prev_total_pct) if prev_total_pct is not None else ""
    head = (
-        '<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
+        '<div id="mc-scorecard" style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
        'max-width:700px;margin:0 auto 16px;padding:12px 16px;'
        'background:#f0f9ff;border:1px solid #bae6fd;border-radius:8px">'
        '<h3 style="margin:0 0 6px;font-size:14px;color:#0369a1">'
@@ -0,0 +1,104 @@
 """
 Voll-Audit Findings Router — unified view across all 4 finding sources.
 Endpoint:
  GET /api/compliance/agent/findings/{check_id}
      ?source=mc|pflichtangabe|vendor|redundanz|all
      &severity=CRITICAL|HIGH|MEDIUM|LOW|INFO|all
      &doc_type=impressum|dse|cookie|...|all
      &status=failed|passed|skipped|na|info|all
      &q=<freitext>
      &limit=<int>
 Liefert summary + filtered findings list. Frontend rendert daraus den
 Voll-Audit-Tab unter /sdk/agent/audit/<check_id>.
 """
 from __future__ import annotations
 import logging
 from urllib.parse import urlparse
 from fastapi import APIRouter, HTTPException, Query
 from compliance.services.unified_findings_store import (
    findings_summary,
    list_findings,
 )
 from compliance.services.compliance_audit_log import get_check_run
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/compliance/agent", tags=["agent"])
 def _normalize_domain(d: str) -> str:
    if not d:
        return ""
    if "://" not in d:
        d = "https://" + d
    host = urlparse(d).netloc.lower()
    return host[4:] if host.startswith("www.") else host
@router.get("/findings/{check_id}")
 def get_findings(
    check_id: str,
    source: str | None = Query(None, description="mc|pflichtangabe|vendor|redundanz|all"),
    severity: str | None = Query(None, description="CRITICAL|HIGH|MEDIUM|LOW|INFO|all"),
    doc_type: str | None = Query(None),
    status: str | None = Query(None, description="failed|passed|skipped|na|info|all"),
    q: str | None = Query(None, description="freitext-suche label/vendor"),
    limit: int = Query(1000, ge=1, le=5000),
    expected_domain: str | None = Query(
        None, description="Hard-Assertion: Run muss zu dieser Domain gehoeren (Cross-Tenant-Schutz)",
    ),
 ) -> dict:
    """Return aggregated findings + summary counters for a check run."""
    # P7-Restpunkt: optionale Domain-Assertion. Verhindert dass ein Frontend
    # einen check_id einer fremden Tenant-Domain anfragen kann.
    if expected_domain:
        run = get_check_run(check_id)
        actual = _normalize_domain((run or {}).get("base_domain") or "")
        if not run or actual != _normalize_domain(expected_domain):
            raise HTTPException(
                status_code=403,
                detail=f"Cross-tenant access blocked: check_id {check_id} "
                       f"gehoert zu Domain '{actual or '?'}', angefragt: "
                       f"'{_normalize_domain(expected_domain)}'",
            )
    try:
        summary = findings_summary(check_id)
        findings = list_findings(
            check_id=check_id,
            source_type=source,
            severity=severity,
            doc_type=doc_type,
            status=status,
            q=q,
            limit=limit,
        )
        return {
            "found": summary.get("total", 0) > 0,
            "check_id": check_id,
            "summary": summary,
            "filter": {
                "source": source or "all",
                "severity": severity or "all",
                "doc_type": doc_type or "all",
                "status": status or "all",
                "q": q or "",
                "limit": limit,
            },
            "count": len(findings),
            "findings": findings,
        }
    except Exception as e:
        logger.exception("get_findings failed for %s", check_id)
        return {
            "found": False,
            "check_id": check_id,
            "error": str(e)[:200],
            "summary": {},
            "count": 0,
            "findings": [],
        }
@@ -0,0 +1,244 @@
 """FastAPI routes for QUAIDAL-derived Controls (AI Trainingsdaten-Qualität).
 Endpoints:
 - GET /v1/quaidal/stats             - Counts by kind + source provenance
 - GET /v1/quaidal/controls          - List all controls, optional kind= filter
 - GET /v1/quaidal/controls/{id}     - Single derived control by derived_id
 - GET /v1/quaidal/criteria          - The 10 QKB criteria with linked QB/MA IDs
 - GET /v1/quaidal/criteria/{id}     - Single QKB with full child tree (QB → MA → QM)
 The controls are Clean-Room derived from BSI QUAIDAL. See
 control-pipeline/scripts/derive_quaidal_mcs.py and migration 011.
 """
 from __future__ import annotations
 import logging
 from typing import Optional
 from fastapi import APIRouter, HTTPException, Query
 from pydantic import BaseModel
 from sqlalchemy import text
 from database import SessionLocal
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/v1/quaidal", tags=["quaidal"])
 # ---------------------------------------------------------------------------
 # Response shapes
 # ---------------------------------------------------------------------------
 class ExternalRef(BaseModel):
    framework: str
    citation: Optional[str] = None
 class SourceProvenance(BaseModel):
    framework: str
    section: str
    url: Optional[str] = None
    commit_sha: Optional[str] = None
    title_original: Optional[str] = None
    license_note: Optional[str] = None
 class DerivedControl(BaseModel):
    derived_id: str
    kind: str
    canonical_name: str
    description: str
    regulation_anchor: Optional[str] = None
    related_quaidal_ids: list[str]
    external_refs: list[ExternalRef]
    source: SourceProvenance
    plagiarism_score: Optional[float] = None
 class ControlsListResponse(BaseModel):
    total: int
    controls: list[DerivedControl]
 class CriterionWithChildren(BaseModel):
    """A QKB criterion with the IDs of its linked building blocks, measures and metrics."""
    criterion: DerivedControl
    building_blocks: list[DerivedControl]
    measures: list[DerivedControl]
    metrics: list[DerivedControl]
 class StatsResponse(BaseModel):
    counts_by_kind: dict[str, int]
    source_framework: str
    source_commit_sha: Optional[str]
    license_note: Optional[str]
 # ---------------------------------------------------------------------------
 # DB helpers
 # ---------------------------------------------------------------------------
 def _row_to_control(row) -> DerivedControl:
    return DerivedControl(
        derived_id=row.derived_id,
        kind=row.kind,
        canonical_name=row.canonical_name,
        description=row.description,
        regulation_anchor=row.regulation_anchor,
        related_quaidal_ids=row.related_quaidal_ids or [],
        external_refs=[ExternalRef(**r) for r in (row.external_refs or [])],
        source=SourceProvenance(
            framework=row.source_framework,
            section=row.source_section,
            url=row.source_url,
            commit_sha=row.source_commit_sha,
            title_original=row.source_title_original,
            license_note=row.source_license_note,
        ),
        plagiarism_score=float(row.plagiarism_score_at_generation) if row.plagiarism_score_at_generation is not None else None,
    )
 _SELECT_COLUMNS = """
    derived_id, kind, canonical_name, description, regulation_anchor,
    related_quaidal_ids, external_refs,
    source_framework, source_section, source_url, source_commit_sha,
    source_title_original, source_license_note,
    plagiarism_score_at_generation
 """
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@router.get("/stats", response_model=StatsResponse)
 def get_stats() -> StatsResponse:
    """Counts by kind + the QUAIDAL source provenance (single source today)."""
    with SessionLocal() as db:
        counts = db.execute(text(
            "SELECT kind, COUNT(*) AS n FROM compliance.derived_controls "
            "WHERE source_framework = :fw GROUP BY kind"
        ), {"fw": "BSI QUAIDAL"}).all()
        meta = db.execute(text(
            "SELECT source_commit_sha, source_license_note FROM compliance.derived_controls "
            "WHERE source_framework = :fw LIMIT 1"
        ), {"fw": "BSI QUAIDAL"}).first()
    return StatsResponse(
        counts_by_kind={r.kind: r.n for r in counts},
        source_framework="BSI QUAIDAL",
        source_commit_sha=meta.source_commit_sha if meta else None,
        license_note=meta.source_license_note if meta else None,
    )
@router.get("/controls", response_model=ControlsListResponse)
 def list_controls(
    kind: Optional[str] = Query(None, description="criterion | building_block | measure | metric"),
    limit: int = Query(500, ge=1, le=2000),
    offset: int = Query(0, ge=0),
 ) -> ControlsListResponse:
    """List QUAIDAL-derived controls, optionally filtered by kind."""
    where = ["source_framework = :fw"]
    params: dict = {"fw": "BSI QUAIDAL", "limit": limit, "offset": offset}
    if kind:
        where.append("kind = :kind")
        params["kind"] = kind
    sql = (
        f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
        f"WHERE {' AND '.join(where)} "
        "ORDER BY source_section LIMIT :limit OFFSET :offset"
    )
    count_sql = f"SELECT COUNT(*) FROM compliance.derived_controls WHERE {' AND '.join(where)}"
    with SessionLocal() as db:
        rows = db.execute(text(sql), params).all()
        total = db.execute(text(count_sql), {k: v for k, v in params.items() if k not in ("limit", "offset")}).scalar() or 0
    return ControlsListResponse(total=int(total), controls=[_row_to_control(r) for r in rows])
@router.get("/controls/{derived_id}", response_model=DerivedControl)
 def get_control(derived_id: str) -> DerivedControl:
    with SessionLocal() as db:
        row = db.execute(text(
            f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls WHERE derived_id = :id"
        ), {"id": derived_id}).first()
    if not row:
        raise HTTPException(status_code=404, detail=f"Control {derived_id} not found")
    return _row_to_control(row)
@router.get("/criteria", response_model=list[DerivedControl])
 def list_criteria() -> list[DerivedControl]:
    """Returns the 10 QKB criteria. Use /criteria/{section_id} for the full child tree."""
    with SessionLocal() as db:
        rows = db.execute(text(
            f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
            "WHERE source_framework = :fw AND kind = 'criterion' ORDER BY source_section"
        ), {"fw": "BSI QUAIDAL"}).all()
    return [_row_to_control(r) for r in rows]
@router.get("/criteria/{section_id}", response_model=CriterionWithChildren)
 def get_criterion_tree(section_id: str) -> CriterionWithChildren:
    """Single QKB with the building blocks it references and the measures/metrics those reference.
    `section_id` is the canonical QUAIDAL ID, e.g. `QKB-01`.
    """
    section_id_upper = section_id.upper()
    with SessionLocal() as db:
        criterion_row = db.execute(text(
            f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
            "WHERE source_framework = :fw AND source_section = :sid AND kind = 'criterion'"
        ), {"fw": "BSI QUAIDAL", "sid": section_id_upper}).first()
        if not criterion_row:
            raise HTTPException(status_code=404, detail=f"Criterion {section_id_upper} not found")
        building_block_ids = criterion_row.related_quaidal_ids or []
        building_blocks = []
        if building_block_ids:
            qb_rows = db.execute(text(
                f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
                "WHERE source_framework = :fw AND kind = 'building_block' "
                "AND source_section = ANY(:ids) ORDER BY source_section"
            ), {"fw": "BSI QUAIDAL", "ids": building_block_ids}).all()
            building_blocks = [_row_to_control(r) for r in qb_rows]
        # Collect measure IDs from each building block, then fetch them
        measure_ids: list[str] = []
        for qb in building_blocks:
            measure_ids.extend(mid for mid in qb.related_quaidal_ids if mid.startswith("MA-"))
        measures = []
        if measure_ids:
            ma_rows = db.execute(text(
                f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
                "WHERE source_framework = :fw AND kind = 'measure' "
                "AND source_section = ANY(:ids) ORDER BY source_section"
            ), {"fw": "BSI QUAIDAL", "ids": list(set(measure_ids))}).all()
            measures = [_row_to_control(r) for r in ma_rows]
        # Collect metric IDs from each measure
        metric_ids: list[str] = []
        for ma in measures:
            metric_ids.extend(mid for mid in ma.related_quaidal_ids if mid.startswith("QM-"))
        metrics = []
        if metric_ids:
            qm_rows = db.execute(text(
                f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
                "WHERE source_framework = :fw AND kind = 'metric' "
                "AND source_section = ANY(:ids) ORDER BY source_section"
            ), {"fw": "BSI QUAIDAL", "ids": list(set(metric_ids))}).all()
            metrics = [_row_to_control(r) for r in qm_rows]
    return CriterionWithChildren(
        criterion=_row_to_control(criterion_row),
        building_blocks=building_blocks,
        measures=measures,
        metrics=metrics,
    )
@@ -0,0 +1,196 @@
 """
 Saving-Scan-Funnel Endpoint — Marketing-Lead → Compliance-Check.
 Externes Form (https://breakpilot.ai/savings-scan) postet hier:
  POST /api/compliance/agent/saving-scan/start
  Body: {"url": "...", "email": "..."}
 Server-side:
  1. Validierung URL + Email (E-Mail-Regex, URL-Schema).
  2. Rate-Limit: max 1 vollstaendiger Scan / Domain / 24h
     (saving_scan_allowed aus compliance_user_agent).
  3. Lead persistieren (saving_scan_leads in Sidecar-SQLite) — fuer
     spaeteren Report-Versand + Sales-Follow-Up.
  4. Compliance-Check starten mit Auto-Discovery (DocumentInput leer
     ausser Homepage). Der bestehende Worker laeuft TDM-Check, dann
     Discovery, dann Pruefung.
  5. check_id zurueck — Frontend pollt /compliance-check/<check_id>.
 """
 from __future__ import annotations
 import logging
 import os
 import re
 import sqlite3
 import uuid as _uuid
 from datetime import datetime, timezone
 from pathlib import Path
 import asyncio
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
 from compliance.services.compliance_user_agent import (
    base_domain_of, saving_scan_allowed,
 )
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/compliance/agent", tags=["agent"])
 DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
 _EMAIL_RE = re.compile(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$")
 _URL_RE = re.compile(r"^https?://[A-Za-z0-9.-]+(/.*)?$")
 class SavingScanRequest(BaseModel):
    url: str = Field(..., min_length=4, max_length=400)
    email: str = Field(..., min_length=5, max_length=200)
    consent: bool = Field(
        True, description="Marketing-Consent fuer Sales-Follow-Up — "
                          "muss True sein laut Form-Checkbox.",
    )
 class SavingScanResponse(BaseModel):
    check_id: str
    status: str
    message: str = ""
 def _ensure_leads_table() -> None:
    Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
    with sqlite3.connect(DB_PATH) as conn:
        conn.executescript("""
            CREATE TABLE IF NOT EXISTS saving_scan_leads (
                id          INTEGER PRIMARY KEY AUTOINCREMENT,
                ts          TEXT NOT NULL,
                email       TEXT NOT NULL,
                url         TEXT NOT NULL,
                base_domain TEXT NOT NULL,
                check_id    TEXT,
                consent     INTEGER NOT NULL,
                source      TEXT
            );
            CREATE INDEX IF NOT EXISTS idx_leads_domain ON saving_scan_leads(base_domain, ts);
            CREATE INDEX IF NOT EXISTS idx_leads_email ON saving_scan_leads(email, ts);
        """)
 def _persist_lead(email: str, url: str, check_id: str, consent: bool) -> None:
    try:
        _ensure_leads_table()
        with sqlite3.connect(DB_PATH) as conn:
            conn.execute(
                "INSERT INTO saving_scan_leads "
                "(ts, email, url, base_domain, check_id, consent, source) "
                "VALUES (?, ?, ?, ?, ?, ?, ?)",
                (
                    datetime.now(timezone.utc).isoformat(),
                    email.lower().strip(),
                    url,
                    base_domain_of(url),
                    check_id,
                    1 if consent else 0,
                    "saving_scan_form",
                ),
            )
            conn.commit()
    except Exception as e:
        logger.warning("persist lead failed: %s", e)
 def _normalize_url(url: str) -> str:
    """Strip path → behaupt nur Homepage, der Discover findet den Rest."""
    if "://" not in url:
        url = "https://" + url
    from urllib.parse import urlparse
    p = urlparse(url)
    return f"{p.scheme}://{p.netloc}/"
@router.post("/saving-scan/start", response_model=SavingScanResponse)
 async def start_saving_scan(req: SavingScanRequest) -> SavingScanResponse:
    """Trigger compliance check from the marketing-funnel form."""
    if not _EMAIL_RE.match(req.email):
        raise HTTPException(400, "Ungueltige E-Mail-Adresse.")
    if not _URL_RE.match(req.url):
        raise HTTPException(400, "URL muss mit http:// oder https:// beginnen.")
    if not req.consent:
        raise HTTPException(400, "Marketing-Consent erforderlich.")
    domain = base_domain_of(req.url)
    if not domain:
        raise HTTPException(400, "Konnte Domain nicht ermitteln.")
    allowed, wait_s = saving_scan_allowed(req.url)
    if not allowed:
        raise HTTPException(
            429,
            f"Fuer '{domain}' wurde in den letzten 24h bereits ein Scan "
            f"durchgefuehrt. Bitte in {wait_s // 3600}h {wait_s % 3600 // 60}min "
            f"erneut versuchen.",
        )
    # Lazy import to avoid circular dependency at module load.
    from compliance.api.agent_compliance_check_routes import (
        DocumentInput,
        ComplianceCheckRequest,
        _run_compliance_check,
        _compliance_check_jobs,
    )
    homepage = _normalize_url(req.url)
    check_id = str(_uuid.uuid4())[:8]
    _compliance_check_jobs[check_id] = {
        "status": "running",
        "progress": "Saving-Scan gestartet — Auto-Discovery laeuft...",
        "progress_pct": 0,
        "result": None,
        "error": "",
    }
    # Single "other" entry forces auto-discovery to fill in the rest.
    docs = [DocumentInput(doc_type="other", url=homepage)]
    check_req = ComplianceCheckRequest(
        documents=docs, recipient=req.email.lower().strip(),
    )
    _persist_lead(req.email, req.url, check_id, req.consent)
    asyncio.create_task(_run_compliance_check(check_id, check_req))
    logger.info("saving-scan start: check_id=%s domain=%s email=%s",
                check_id, domain, req.email[:3] + "***")
    return SavingScanResponse(
        check_id=check_id,
        status="running",
        message=f"Scan gestartet fuer {domain}. Bericht in ~3-5 Minuten.",
    )
@router.get("/saving-scan/lead-count")
 def saving_scan_lead_count() -> dict:
    """Diagnostik fuer das Sales-Dashboard."""
    try:
        _ensure_leads_table()
        with sqlite3.connect(DB_PATH) as conn:
            total = conn.execute(
                "SELECT COUNT(*) FROM saving_scan_leads",
            ).fetchone()[0]
            last_24h = conn.execute(
                "SELECT COUNT(*) FROM saving_scan_leads "
                "WHERE ts > datetime('now', '-1 day')",
            ).fetchone()[0]
            top_domains = conn.execute(
                "SELECT base_domain, COUNT(*) AS n FROM saving_scan_leads "
                "GROUP BY base_domain ORDER BY n DESC LIMIT 10",
            ).fetchall()
            return {
                "total_leads": total,
                "last_24h": last_24h,
                "top_domains": [{"domain": d, "scans": n} for d, n in top_domains],
            }
    except Exception as e:
        return {"error": str(e)[:200]}
@@ -0,0 +1,149 @@
 """
 k-Anonymitaets-Helper fuer Branchen-Benchmarks (P6-Vorbereitung).
 Vor jeder Veroeffentlichung von Benchmark-Aussagen pruefen, ob die
 zugrundeliegende Stichprobe gross genug ist, dass keine Re-Identifikation
 einzelner Hersteller moeglich wird.
 Default k=5: jede publizierbare Aussage muss auf mindestens 5 verschiedenen
 Datensubjekten (z.B. OEM-Sites) beruhen. Bei OEM-Markt mit ~30 Spielern
 ist k=5 das Minimum, um "ein deutscher Premium-Hersteller mit X Modellen"
 auszuschliessen.
 Memory: feedback_oem_data_legal.md + project_legal_contracts_2026_07.md.
 Verwendung:
  from compliance.services.benchmark_k_anonymity import (
      enforce_k_anonymity, quantize_value, KAnonymityError,
  )
  rows = [...]                          # pro Hersteller 1 Row
  safe_groups = enforce_k_anonymity(rows, group_keys=["segment", "country"])
  # safe_groups: nur Gruppen mit count >= 5 zurueck
 """
 from __future__ import annotations
 from collections.abc import Iterable
 from typing import Any
 DEFAULT_K = 5
 class KAnonymityError(RuntimeError):
    """Stichprobe ist zu klein fuer eine publizierbare Aussage."""
 def assert_min_sample(n: int, k: int = DEFAULT_K, context: str = "") -> None:
    """Wirft KAnonymityError wenn n < k."""
    if n < k:
        raise KAnonymityError(
            f"Stichprobe zu klein fuer Publikation: n={n} < k={k}"
            + (f" — Kontext: {context}" if context else "")
        )
 def quantize_value(value: float | int, step: int = 5) -> int:
    """Quantisiere Zahlenwerte auf step-Vielfache (Generalisierung).
    quantize_value(67, 5) -> 65
    quantize_value(83, 10) -> 80
    Verhindert exakte Identifizierung ueber numerische Signale.
    """
    if step <= 0:
        return int(value)
    return int(value // step) * step
 def quantize_range(value: float | int, step: int = 10) -> str:
    """Gib ein Range-Bucket zurueck als String: '60-70%', '80-90%'."""
    base = quantize_value(value, step)
    return f"{base}-{base + step}%"
 def group_and_count(
    rows: Iterable[dict],
    keys: list[str],
 ) -> dict[tuple, int]:
    """Gruppiere Rows nach allen `keys` und zaehle pro Bucket."""
    counts: dict[tuple, int] = {}
    for r in rows:
        bucket = tuple(r.get(k, "") for k in keys)
        counts[bucket] = counts.get(bucket, 0) + 1
    return counts
 def enforce_k_anonymity(
    rows: list[dict],
    group_keys: list[str],
    k: int = DEFAULT_K,
 ) -> list[dict]:
    """Filtere Rows so, dass jede ueberlebende Gruppe >= k Mitglieder hat.
    Returns: Rows die in ausreichend grossen Gruppen sind.
    Rows in zu kleinen Gruppen werden suppressed (entfernt).
    """
    counts = group_and_count(rows, group_keys)
    safe_buckets = {bucket for bucket, n in counts.items() if n >= k}
    return [
        r for r in rows
        if tuple(r.get(key, "") for key in group_keys) in safe_buckets
    ]
 def summarize_benchmark(
    rows: list[dict],
    group_keys: list[str],
    measure_key: str,
    k: int = DEFAULT_K,
    quantize_step: int = 5,
 ) -> list[dict]:
    """Erzeuge publizierbare Benchmark-Aggregat-Zeilen.
    Pro Gruppe: count, mean (quantisiert), only-if count >= k.
    Liefert sortiert nach count desc.
    Beispiel:
      rows = [{"segment": "premium", "consent_score": 84}, ...]
      summarize_benchmark(rows, ["segment"], "consent_score")
      -> [{"segment": "premium", "n": 8, "mean_quantized": 80}, ...]
    """
    buckets: dict[tuple, list[float]] = {}
    for r in rows:
        bucket = tuple(r.get(k, "") for k in group_keys)
        val = r.get(measure_key)
        if val is not None:
            buckets.setdefault(bucket, []).append(float(val))
    out: list[dict] = []
    for bucket, values in buckets.items():
        n = len(values)
        if n < k:
            continue
        mean = sum(values) / n
        entry: dict[str, Any] = {key: bucket[i] for i, key in enumerate(group_keys)}
        entry["n"] = n
        entry["mean_quantized"] = quantize_value(mean, quantize_step)
        entry["mean_range"] = quantize_range(mean, quantize_step * 2)
        out.append(entry)
    out.sort(key=lambda e: e["n"], reverse=True)
    return out
 def safe_to_publish(
    statement: str,
    sample_size: int,
    k: int = DEFAULT_K,
 ) -> tuple[bool, str]:
    """Validator fuer Marketing/Press-Statements.
    Returns (ok, message). Wenn ok=False, NICHT publishen.
    """
    if sample_size < k:
        return False, (
            f'Aussage NICHT publizierbar: "{statement[:60]}…" '
            f'(n={sample_size} < k={k}). Risiko: Re-Identifikation '
            f'einzelner Hersteller moeglich.'
        )
    return True, f"OK (n={sample_size}, k={k})"
@@ -28,6 +28,12 @@ class BusinessProfile:
    needs_odr: bool = False  # Online-Streitbeilegung
    detected_services: list[str] = field(default_factory=list)
    confidence: float = 0.0
    # Wenn True: die Site selbst schliesst KEINEN Direktkauf-Vertrag
    # (typisch OEM-Konfigurator-Sites BMW/Audi/Mercedes — Vertrag laeuft
    # ueber den Vertragshaendler, nicht die Hersteller-Webseite).
    # Konsequenz: AGB/Widerruf/Nutzungsbedingungen sind NICHT PFLICHT
    # auf der Website, sondern werden beim Haendler ausgehaendigt.
    no_direct_sales: bool = False
 # ── Keyword lists ────────────────────────────────────────────────────
@@ -231,6 +237,13 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
    b2g_score = _count_hits(full_text, _B2G_KEYWORDS)
    nonprofit_score = _count_hits(full_text, _NONPROFIT_KEYWORDS)
    # P17-C: B2B-Dienstleister-Cluster (P14) als Boost — wenn ein Unternehmen
    # CE-Zertifizierung / Compliance-Beratung / Auditierung / Schulungen anbietet,
    # ist es i.d.R. B2B auch wenn die strikten B2B-Keywords nicht greifen.
    b2b_service_boost = _count_hits(full_text, _B2B_SERVICE_POSITIVE)
    if b2b_service_boost >= 2:
        b2b_score += min(3, b2b_service_boost - 1)
    # Missing documents as signal
    has_agb = "agb" in documents
    has_widerruf = "widerruf" in documents
@@ -319,4 +332,103 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
                    "steuerberater": "finance", "architekt": "craft"}
        profile.industry = prof_map.get(profile.regulated_profession_type, "unknown")
    # ── no_direct_sales (OEM-Konfigurator-Pattern) ───────────────
    # Hersteller-Sites die nur konfigurieren + zu Vertragshaendlern
    # weiterleiten (BMW/Audi/Mercedes/VW/Porsche) schliessen KEINEN
    # Direkt-Kaufvertrag. AGB/Widerruf/Nutzungsbedingungen sind dort
    # nicht Pflicht — werden beim Haendler ausgehaendigt.
    profile.no_direct_sales = _detect_no_direct_sales(full_text)
    return profile
 # P14: drei Cluster die jeweils unabhaengig no_direct_sales=True triggern.
 # Cluster A: OEM-Konfigurator-Pattern (Auto-Hersteller mit Vertragshaendler-Netz)
 _OEM_POSITIVE = [
    "vertragshaendler", "vertragshändler", "vertragspartner",
    "vertragswerkstatt", "haendlersuche", "händlersuche",
    "niederlassung", "vertretung", "autorisierter haendler",
    "autorisierter händler", "ihr haendler vor ort",
    "ihr händler vor ort", "haendler in ihrer naehe",
    "händler in ihrer nähe", "probefahrt vereinbaren",
    "anfrage an haendler", "anfrage an händler",
    "konfigurator", "fahrzeug konfigurieren",
    "ihre individuelle anfrage",
    "bmw vertriebs", "audi vertriebs", "mercedes-benz vertriebs",
    "volkswagen vertriebs", "porsche zentrum",
    # OEM-Markennamen im Pflichttext (Datenschutz erwaehnt Hersteller)
    "bmw ag", "audi ag", "mercedes-benz ag", "volkswagen ag",
    "porsche ag", "opel automobile gmbh",
 ]
 # Cluster B: B2B-Dienstleister (Beratung / Compliance / Schulung / CE)
 _B2B_SERVICE_POSITIVE = [
    "ce-zertifizierung", "ce zertifizierung",
    "ce-konformitaet", "ce-konformität",
    "ce-kennzeichnung", "ce kennzeichnung",
    "compliance-beratung", "compliance beratung",
    "arbeitssicherheit", "product compliance",
    "produktsicherheit", "produkthaftung",
    "auditierung", "auditor", "auditierungen",
    "schulungen", "workshops", "akademie",
    "beratungsleistungen", "consultingleistungen",
    "consulting services", "managementsystem",
    "datenschutzbeauftragter (extern)",
    "externer datenschutzbeauftragter",
    "datenschutz-audit", "tisax", "iso 27001",
    "iso 9001", "iso 14001", "iso 45001",
    "gefaehrdungsbeurteilung", "gefährdungsbeurteilung",
    "betriebsbeauftragter", "fachkraft fuer arbeitssicherheit",
    "fachkraft für arbeitssicherheit",
 ]
 # Cluster C: NGO / Verein / oeffentliche Verwaltung
 _NONPROFIT_PUBLIC_POSITIVE = [
    "spendenkonto", "vereinsregister", "gemeinnuetzig",
    "gemeinnützig", "ehrenamtlich", "foerderverein",
    "förderverein", "stiftung", "buergeramt", "bürgeramt",
    "landratsamt", "kommunalverwaltung",
 ]
 # Backwards-compat
 _NO_DIRECT_SALES_POSITIVE = (
    _OEM_POSITIVE + _B2B_SERVICE_POSITIVE + _NONPROFIT_PUBLIC_POSITIVE
 )
 # Indikatoren GEGEN no_direct_sales: echte Online-Shop-Funktionen.
 _DIRECT_SALES_NEGATIVE = [
    "in den warenkorb", "warenkorb hinzu", "zur kasse",
    "jetzt kaufen", "kostenpflichtig bestellen",
    "zahlungspflichtig bestellen", "sofort-kauf",
    "online bestellen", "lieferadresse", "rechnungsadresse",
    "versandkosten", "lieferzeit", "lieferbedingungen",
    "checkout", "stueckpreis", "stückpreis",
 ]
 def _detect_no_direct_sales(full_text: str) -> bool:
    """Heuristik: True wenn Site keinen Direkt-Vertrieb mit B2C-Kunden hat.
    Trifft fuer 3 Cluster zu (jeweils mind. 2 Treffer im Cluster):
      A) OEM-Konfigurator (Auto-Hersteller)
      B) B2B-Dienstleister (Beratung/Compliance/Schulung)
      C) NGO / oeffentliche Verwaltung
    Negativ-Signale (echte Shop-Funktionen) zaehlen gegen den Cluster:
    nur True wenn pos > neg.
    """
    text = full_text.lower()
    oem = sum(1 for k in _OEM_POSITIVE if k in text)
    b2b = sum(1 for k in _B2B_SERVICE_POSITIVE if k in text)
    npg = sum(1 for k in _NONPROFIT_PUBLIC_POSITIVE if k in text)
    neg = sum(1 for k in _DIRECT_SALES_NEGATIVE if k in text)
    # Jeder Cluster ist eigenstaendig: 2 Treffer + weniger Negativ-Signale
    # als Cluster-Treffer.
    if oem >= 2 and oem > neg:
        return True
    if b2b >= 2 and b2b > neg:
        return True
    if npg >= 2 and npg > neg:
        return True
    return False
@@ -0,0 +1,141 @@
 """
 Zentraler User-Agent-Provider + Domain-Rate-Limiter fuer alle Crawls.
 UA-Switch ist Trigger-gebunden an Firmengruendung:
  - aktuell (Vor-Gruendung): generischer Headless-Chrome-UA
  - nach Gruendung: env BREAKPILOT_BRANDED_UA=1 setzen
                    -> "BreakPilot-Compliance-Scanner/1.0 (+https://...)"
 Memory: project_legal_contracts_2026_07.md (Punkt 0).
 Rate-Limit:
  - Default 1 req/sec/Domain, max 2 concurrent pro Domain.
  - Saving-Scan-Funnel separat: max 1 vollstaendiger Run / Domain / 24h.
 """
 from __future__ import annotations
 import asyncio
 import os
 import time
 from collections import defaultdict
 from urllib.parse import urlparse
 _BRANDED_UA = (
    "BreakPilot-Compliance-Scanner/1.0 "
    "(+https://breakpilot.ai/scanner)"
 )
 _NEUTRAL_UA = (
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) HeadlessChrome/120.0.0.0 Safari/537.36"
 )
 def crawler_user_agent() -> str:
    """Aktueller UA-String fuer alle ausgehenden Crawls.
    Switcht auf den Markennamen sobald BREAKPILOT_BRANDED_UA=1 gesetzt
    wird (nach Firmengruendung — siehe Memory).
    """
    branded = (os.getenv("BREAKPILOT_BRANDED_UA") or "").strip().lower()
    if branded in ("1", "true", "yes"):
        return _BRANDED_UA
    return _NEUTRAL_UA
 def default_request_headers() -> dict:
    """Vollstaendiger Header-Satz fuer httpx-Calls."""
    return {
        "User-Agent": crawler_user_agent(),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
    }
 def base_domain_of(url_or_host: str) -> str:
    if not url_or_host:
        return ""
    if "://" not in url_or_host:
        url_or_host = "https://" + url_or_host
    netloc = urlparse(url_or_host).netloc.lower()
    return netloc.replace("www.", "") or url_or_host
 # --- per-Domain Rate-Limit ----------------------------------------------
 _MIN_INTERVAL_S = 1.0          # 1 req/sec/Domain
 _MAX_CONCURRENT_PER_DOMAIN = 2
 _last_request_at: dict[str, float] = defaultdict(float)
 _semaphores: dict[str, asyncio.Semaphore] = {}
 _locks_lock = asyncio.Lock()
 async def _get_semaphore(domain: str) -> asyncio.Semaphore:
    async with _locks_lock:
        sem = _semaphores.get(domain)
        if sem is None:
            sem = asyncio.Semaphore(_MAX_CONCURRENT_PER_DOMAIN)
            _semaphores[domain] = sem
        return sem
 class DomainRateLimiter:
    """Async-Context: warte vor Request + halte concurrent-Slot.
        async with DomainRateLimiter(url):
            resp = await client.get(url)
    """
    def __init__(self, url_or_domain: str):
        self.domain = base_domain_of(url_or_domain)
    async def __aenter__(self):
        sem = await _get_semaphore(self.domain)
        await sem.acquire()
        last = _last_request_at[self.domain]
        wait = (last + _MIN_INTERVAL_S) - time.monotonic()
        if wait > 0:
            await asyncio.sleep(wait)
        _last_request_at[self.domain] = time.monotonic()
        self._sem = sem
        return self
    async def __aexit__(self, exc_type, exc, tb):
        self._sem.release()
        return False
 # --- per-Domain "1 full run / 24h" (Saving-Scan) -----------------------
 _DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
 _SAVING_SCAN_INTERVAL_S = 24 * 3600
 def saving_scan_allowed(domain_or_url: str) -> tuple[bool, int]:
    """True wenn fuer diese Domain in den letzten 24h kein Saving-Scan lief.
    Liest aus compliance_audit_log.check_runs (existierende Tabelle).
    Liefert (allowed, seconds_until_allowed).
    """
    import sqlite3
    domain = base_domain_of(domain_or_url)
    if not domain:
        return True, 0
    try:
        with sqlite3.connect(_DB_PATH) as conn:
            row = conn.execute(
                "SELECT MAX(ts) FROM check_runs WHERE base_domain=?",
                (domain,),
            ).fetchone()
            last = row[0] if row else None
            if not last:
                return True, 0
            from datetime import datetime
            elapsed = time.time() - datetime.fromisoformat(last).timestamp()
            if elapsed >= _SAVING_SCAN_INTERVAL_S:
                return True, 0
            return False, int(_SAVING_SCAN_INTERVAL_S - elapsed)
    except Exception:
        return True, 0
@@ -129,20 +129,29 @@ def classify_cookie(cookie_name: str) -> tuple[str, str]:
 def annotate_vendor_cookies(vendor: dict) -> dict:
-    """Enrich a vendor record with functional_role per cookie."""
+    """Enrich a vendor record with functional_role + KB knowledge per cookie."""
    from compliance.services.cookie_knowledge import (
        lookup_cookie, summarize_compliance_risk,
    )
    cookies = vendor.get("cookies") or []
    annotated = []
    role_counts: dict[str, int] = {}
    for c in cookies:
        role, impact = classify_cookie(c.get("name", ""))
-        annotated.append({**c, "functional_role": role, "blocking_impact": impact})
+        knowledge = lookup_cookie(c.get("name", ""))
        entry = {**c, "functional_role": role, "blocking_impact": impact}
        if knowledge:
            entry["knowledge"] = knowledge
        annotated.append(entry)
        role_counts[role] = role_counts.get(role, 0) + 1
-    return {
+    out = {
        **vendor,
        "cookies": annotated,
        "role_distribution": role_counts,
        "role_labels": {r: _FUNCTIONAL_LABEL.get(r, r) for r in role_counts},
    }
    out["compliance_risk"] = summarize_compliance_risk(out)
    return out
 def aggregate_cookie_purposes(vendors: Iterable[dict]) -> dict:
@@ -0,0 +1,106 @@
 """
 Cookie-Knowledge Facade — vereint die Basis-KB (cookie_knowledge_db) mit
 der Erweiterung (cookie_knowledge_extended) hinter einer einzigen API.
 Caller sollten von hier importieren statt von einer der beiden Sub-DBs.
  from compliance.services.cookie_knowledge import (
      lookup_cookie,
      enrich_vendor_with_knowledge,
      summarize_compliance_risk,
      compliance_risk_label,
  )
 Lookup-Reihenfolge: Extended (kuratiert, juenger) vor Base. Dadurch
 koennen wir Eintraege ueberschreiben ohne die Base zu touchen.
 """
 from __future__ import annotations
 from compliance.services.cookie_knowledge_db import (
    CookieKnowledge,
    lookup_cookie as _lookup_base,
 )
 from compliance.services.cookie_knowledge_extended import (
    KB_EXT,
    lookup_cookie_extended,
 )
 def lookup_cookie(name: str) -> CookieKnowledge | None:
    """Resolve cookie name to enriched knowledge — extended overrides base."""
    return lookup_cookie_extended(name) or _lookup_base(name)
 def enrich_vendor_with_knowledge(vendor: dict) -> dict:
    """Add per-cookie knowledge dict + per-vendor risk summary."""
    cookies = vendor.get("cookies") or []
    enriched = []
    for c in cookies:
        info = lookup_cookie(c.get("name", ""))
        enriched.append({**c, "knowledge": info} if info else c)
    out = {**vendor, "cookies": enriched}
    out["compliance_risk"] = summarize_compliance_risk(out)
    return out
 def summarize_compliance_risk(vendor: dict) -> dict:
    """Aggregate Re-ID risk + Schrems-II exposure across all cookies."""
    cookies = vendor.get("cookies") or []
    risk_counts = {"high": 0, "medium": 0, "low": 0}
    schrems_affected = 0
    strictly_necessary = 0
    classified = 0
    for c in cookies:
        k = c.get("knowledge") or lookup_cookie(c.get("name", ""))
        if not k:
            continue
        classified += 1
        risk = (k.get("reid_risk") or "low").lower()
        risk_counts[risk] = risk_counts.get(risk, 0) + 1
        if "us" in (k.get("vendor_country") or "").lower() or \
                "schrems" in (k.get("schrems_ii_status") or "").lower():
            schrems_affected += 1
        if k.get("technical_necessity") == "full":
            strictly_necessary += 1
    return {
        "reid_risk_distribution": risk_counts,
        "high_risk_cookie_count": risk_counts["high"],
        "schrems_ii_affected_cookies": schrems_affected,
        "strictly_necessary_cookies": strictly_necessary,
        "total_classified": classified,
        "label": compliance_risk_label({
            "high_risk_cookie_count": risk_counts["high"],
            "schrems_ii_affected_cookies": schrems_affected,
            "total_classified": classified,
        }),
    }
 def compliance_risk_label(summary: dict) -> str:
    """Compact risk badge: 'kritisch' | 'hoch' | 'mittel' | 'gering' | 'unklar'."""
    if not summary or not summary.get("total_classified"):
        return "unklar"
    high = summary.get("high_risk_cookie_count", 0)
    schrems = summary.get("schrems_ii_affected_cookies", 0)
    total = summary.get("total_classified", 0) or 1
    if high >= 3 and schrems >= 2:
        return "kritisch"
    if high >= 2 or (high >= 1 and schrems >= 1):
        return "hoch"
    if high >= 1 or schrems >= 1:
        return "mittel"
    return "gering"
 def kb_size() -> dict:
    """Diagnostik fuer den Admin/Health-Endpoint."""
    from compliance.services.cookie_knowledge_db import KB as _KB_BASE
    base_keys = set(_KB_BASE.keys())
    ext_keys = set(KB_EXT.keys())
    return {
        "base_entries": len(base_keys),
        "extended_entries": len(ext_keys),
        "extended_overrides_base": len(base_keys & ext_keys),
        "total_unique": len(base_keys | ext_keys),
    }
@@ -0,0 +1,497 @@
 """
 Cookie-Knowledge Erweiterung — Adobe, Meta erweitert, Microsoft, LinkedIn,
 TikTok, Salesforce/HubSpot/Marketo, Hotjar/Mouseflow/FullStory, Live-Chat,
 Cloudflare/Akamai, Payment, CMP-eigene Cookies, EU-Analytics.
 Hinweis zu Rechten: Eintraege enthalten ausschliesslich Identitaetsfelder
 (Cookie-Name, Anbieter, Sitzland) + EIGENE Knappformulierungen + Verweise
 auf oeffentliche EuGH-/CNIL-/EDPB-Quellen. KEINE 1:1-Kopien aus OneTrust,
 Cookiepedia oder Vendor-eigenen Beschreibungstexten.
 Quellen-Pointer: IAB TCF v2.2 Vendor List, CNIL Cookies & Trackers
 Guidelines 2024, EDPB Guidelines 2/2023, EuGH-Rechtsprechung (Schrems II,
 Planet49), DSK-Orientierungshilfen 2021/2024.
 """
 from __future__ import annotations
 from compliance.services.cookie_knowledge_db import CookieKnowledge
 _ADOBE_BASE = {
    "vendor": "Adobe Inc.", "vendor_country": "US",
    "schrems_ii_status": "Drittlandtransfer US. Mit DPF (2023) wieder "
                         "zulaessig; EU-Datenresidenz-Option in Adobe "
                         "Experience Platform verfuegbar.",
    "eugh_rulings": [
        "EuGH C-311/18 (Schrems II)",
        "EDPB Recommendations 01/2020 — Supplementary Measures",
    ],
 }
 _META_BASE = {
    "vendor": "Meta Platforms Ireland Ltd.", "vendor_country": "IE",
    "schrems_ii_status": "Verarbeitung in IE + US-Transfer. DPC Ireland "
                         "Bussgeld 2023 (€1,2 Mrd) wegen unzureichender "
                         "Schutzmassnahmen — DPF deckt seit 2023.",
    "eugh_rulings": [
        "EuGH C-311/18 (Schrems II)",
        "DPC Ireland 2023 — Meta 1,2 Mrd. EUR",
    ],
 }
 _MICROSOFT_BASE = {
    "vendor": "Microsoft Corp.", "vendor_country": "US",
    "schrems_ii_status": "DPF-zertifiziert; EU Data Boundary fuer Azure/365 "
                         "seit 2024 verfuegbar.",
    "eugh_rulings": ["EuGH C-311/18 (Schrems II)"],
 }
 _LINKEDIN_BASE = {
    "vendor": "LinkedIn Ireland Unlimited Co.", "vendor_country": "IE",
    "schrems_ii_status": "Microsoft-Konzern, EU-Hauptsitz IE, Transfer US.",
    "eugh_rulings": ["EuGH C-311/18 (Schrems II)"],
 }
 KB_EXT: dict[str, CookieKnowledge] = {
    # --- Adobe Experience Cloud --------------------------------------
    # AMCV_, s_cc, s_sq leben in Base-KB.
    "demdex": {
        **_ADOBE_BASE,
        "vendor": "Adobe Inc. (Audience Manager)",
        "exact_purpose": "Adobe Audience Manager DMP — Cross-Site-Profil "
                         "fuer Zielgruppen-Segmentierung.",
        "data_collected": ["dpuuid", "segments"],
        "ip_relevant": True,
        "tcf_purpose_ids": [4, 9, 10],
        "typical_lifetime": "180 Tage",
        "reid_risk": "high", "technical_necessity": "none",
    },
    # --- Meta erweitert -----------------------------------------------
    # fr, _fbc leben in Base-KB.
    "datr": {
        **_META_BASE,
        "exact_purpose": "Facebook Browser-Identifier — Anti-Abuse/Bot-Schutz.",
        "data_collected": ["browser_fingerprint_id"],
        "ip_relevant": True,
        "typical_lifetime": "2 Jahre",
        "reid_risk": "high", "technical_necessity": "partial",
        "notes": "Wird auch ohne Consent gesetzt; Meta argumentiert "
                 "Sicherheit. Trotzdem von DSK 2024 kritisch bewertet.",
    },
    # --- Microsoft / Bing ---------------------------------------------
    # MUID lebt in Base-KB.
    "MSCC": {
        **_MICROSOFT_BASE,
        "exact_purpose": "Microsoft Site Consent — Consent-Status-Speicherung "
                         "fuer Microsoft-eigene Properties.",
        "data_collected": ["consent_string"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "low", "technical_necessity": "full",
        "notes": "Strictly necessary nach §25(2) TDDDG.",
    },
    "ai_session": {
        **_MICROSOFT_BASE,
        "vendor": "Microsoft Corp. (Application Insights)",
        "exact_purpose": "Azure Application Insights — Session-Tracking fuer "
                         "Telemetry.",
        "data_collected": ["session_id"],
        "typical_lifetime": "30 Minuten",
        "reid_risk": "medium", "technical_necessity": "partial",
    },
    # --- LinkedIn ------------------------------------------------------
    "li_at": {
        **_LINKEDIN_BASE,
        "exact_purpose": "LinkedIn-Authentifizierung — Login-Session.",
        "data_collected": ["auth_token"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "full",
        "notes": "Nur fuer eingeloggte Nutzer; auf externer Site = "
                 "Insight Tag (siehe li_sugr).",
    },
    "li_sugr": {
        **_LINKEDIN_BASE,
        "exact_purpose": "LinkedIn Insight Tag — Browser-ID fuer "
                         "Conversion-Tracking + Werbe-Targeting.",
        "data_collected": ["browser_id"],
        "ip_relevant": True,
        "tcf_purpose_ids": [7, 9, 10],
        "typical_lifetime": "90 Tage",
        "reid_risk": "high", "technical_necessity": "none",
    },
    # bcookie, lidc leben in Base-KB.
    # --- TikTok --------------------------------------------------------
    "_ttp": {
        "vendor": "TikTok Pte. Ltd.", "vendor_country": "SG/CN",
        "exact_purpose": "TikTok Pixel — User-ID fuer Conversion-Tracking + "
                         "Werbeoptimierung.",
        "data_collected": ["pixel_id", "browser_id"],
        "ip_relevant": True,
        "tcf_purpose_ids": [7, 9, 10],
        "typical_lifetime": "13 Monate",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "Drittlandtransfer in Drittstaaten ohne "
                             "Angemessenheitsbeschluss. CNIL 2023 — "
                             "TikTok 5 Mio EUR Bussgeld.",
        "eugh_rulings": [
            "CNIL SAN-2022-027 — TikTok 5 Mio EUR",
            "Italienische DPA 2024 — TikTok 10 Mio EUR",
        ],
    },
    "ttwid": {
        "vendor": "TikTok Pte. Ltd.", "vendor_country": "SG/CN",
        "exact_purpose": "TikTok Web-Identifier — eindeutige Browser-ID auch "
                         "ohne Login.",
        "data_collected": ["ttwid"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "Wie _ttp.",
    },
    # --- HubSpot / Marketo / Salesforce ------------------------------
    "hubspotutk": {
        "vendor": "HubSpot Inc.", "vendor_country": "US",
        "exact_purpose": "HubSpot User-Token — Cross-Visit-Identitaet fuer "
                         "Lead-Tracking.",
        "data_collected": ["user_token"],
        "ip_relevant": True,
        "tcf_purpose_ids": [7, 8],
        "typical_lifetime": "6 Monate",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
    },
    "__hssc": {
        "vendor": "HubSpot Inc.", "vendor_country": "US",
        "exact_purpose": "HubSpot Session-Tracking — Pageviews innerhalb "
                         "einer Session.",
        "data_collected": ["session_count"],
        "typical_lifetime": "30 Minuten",
        "reid_risk": "low", "technical_necessity": "none",
    },
    "_mkto_trk": {
        "vendor": "Adobe Inc. (Marketo)", "vendor_country": "US",
        "exact_purpose": "Marketo Munchkin-Tracker — Lead-Identifikation "
                         "fuer Marketing-Automation.",
        "data_collected": ["munchkin_id", "session_id"],
        "ip_relevant": True,
        "typical_lifetime": "2 Jahre",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": _ADOBE_BASE["schrems_ii_status"],
    },
    "BrowserId_sec": {
        "vendor": "Salesforce.com Inc.", "vendor_country": "US",
        "exact_purpose": "Salesforce Marketing Cloud Browser-Token — "
                         "Cross-Visit-Identifikation.",
        "data_collected": ["browser_id"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "medium", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
    },
    # --- Session-Recording / Heatmaps ---------------------------------
    "_hjSessionUser_": {
        "vendor": "Hotjar Ltd.", "vendor_country": "MT",
        "exact_purpose": "Hotjar User-ID — Cross-Visit-Identifikation fuer "
                         "Session-Recording + Heatmaps.",
        "data_collected": ["user_id"],
        "ip_relevant": True,
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "EU (Malta) — kein Drittland. Aber: parent "
                             "Contentsquare (FR) hostet teilweise in US.",
        "notes": "Suffix `<site_id>`. Pattern-Match noetig. "
                 "DSGVO-Aufzeichnung = Einwilligung pflichtig.",
        "eu_alternative_vendor": "Mouseflow / Smartlook (CZ)",
    },
    "_hjSession_": {
        "vendor": "Hotjar Ltd.", "vendor_country": "MT",
        "exact_purpose": "Hotjar Session-Token — eindeutige Session-ID "
                         "innerhalb 30min Inaktivitaet.",
        "data_collected": ["session_id"],
        "typical_lifetime": "30 Minuten",
        "reid_risk": "medium", "technical_necessity": "none",
    },
    "fs_uid": {
        "vendor": "FullStory Inc.", "vendor_country": "US",
        "exact_purpose": "FullStory User-ID — Cross-Visit-Identifikation "
                         "fuer Session-Replay.",
        "data_collected": ["user_id"],
        "ip_relevant": True,
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert. EU-Region verfuegbar (opt-in).",
    },
    "mf_user": {
        "vendor": "Mouseflow Aps", "vendor_country": "DK",
        "exact_purpose": "Mouseflow User-ID — Cross-Visit-Identifikation fuer "
                         "Heatmap + Recording.",
        "data_collected": ["user_id"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "medium", "technical_necessity": "none",
        "schrems_ii_status": "EU (DK) — kein Drittland.",
    },
    # --- Live-Chat ----------------------------------------------------
    "intercom-id-": {
        "vendor": "Intercom Inc.", "vendor_country": "US",
        "exact_purpose": "Intercom Visitor-ID — Wiedererkennung anonymer "
                         "Besucher fuer Chat-History.",
        "data_collected": ["visitor_id"],
        "typical_lifetime": "9 Monate",
        "reid_risk": "medium", "technical_necessity": "partial",
        "schrems_ii_status": "DPF-zertifiziert; EU-Datenresidenz optional.",
        "notes": "Suffix `<app_id>`. Pattern-Match noetig.",
    },
    "driftt_aid": {
        "vendor": "Salesforce.com Inc. (Drift)", "vendor_country": "US",
        "exact_purpose": "Drift Anonymous-Visitor-ID fuer Chat-Personalisierung.",
        "data_collected": ["visitor_id"],
        "typical_lifetime": "2 Jahre",
        "reid_risk": "medium", "technical_necessity": "partial",
    },
    "__zlcmid": {
        "vendor": "Zendesk Inc.", "vendor_country": "US",
        "exact_purpose": "Zendesk Chat Visitor-ID fuer Session-Tracking.",
        "data_collected": ["chat_visitor_id"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "medium", "technical_necessity": "partial",
        "schrems_ii_status": "DPF-zertifiziert; EU-Datacenter optional.",
    },
    # --- CDN / Sicherheit (strictly necessary) -----------------------
    # __cf_bm, cf_clearance leben in Base-KB.
    "AKA_A2": {
        "vendor": "Akamai Technologies Inc.", "vendor_country": "US",
        "exact_purpose": "Akamai Adaptive Acceleration — geroutete Best-Path-"
                         "Optimierung.",
        "data_collected": ["a2_route"],
        "typical_lifetime": "1 Stunde",
        "reid_risk": "low", "technical_necessity": "full",
    },
    # --- Payment (strictly necessary fuer Checkout) ------------------
    "__stripe_mid": {
        "vendor": "Stripe Payments Europe Ltd.", "vendor_country": "IE",
        "exact_purpose": "Stripe Fraud-Detection Merchant-ID — Risiko-Scoring "
                         "fuer Zahlungs-Authentifizierung.",
        "data_collected": ["merchant_visitor_id"],
        "ip_relevant": True,
        "typical_lifetime": "1 Jahr",
        "reid_risk": "low", "technical_necessity": "full",
        "schrems_ii_status": "EU (IE) — kein Drittland.",
        "notes": "Strictly necessary nach §25(2) TDDDG fuer Zahlungsabwicklung.",
    },
    "__stripe_sid": {
        "vendor": "Stripe Payments Europe Ltd.", "vendor_country": "IE",
        "exact_purpose": "Stripe Session-ID — temporaere Zahlungs-Session.",
        "data_collected": ["session_id"],
        "typical_lifetime": "30 Minuten",
        "reid_risk": "low", "technical_necessity": "full",
    },
    # --- CMP-eigene Cookies (strictly necessary) ---------------------
    "CookieConsent": {
        "vendor": "Cybot A/S (Cookiebot)", "vendor_country": "DK",
        "exact_purpose": "Cookiebot Consent-Speicherung — gewaehlte "
                         "Kategorien + Zeitstempel.",
        "data_collected": ["consent_categories", "consent_timestamp"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "low", "technical_necessity": "full",
        "schrems_ii_status": "EU (DK). Wenn EU-Cloud, kein Drittland.",
    },
    "OptanonConsent": {
        "vendor": "OneTrust LLC", "vendor_country": "US",
        "exact_purpose": "OneTrust Consent-Speicherung — Kategorien + "
                         "Vendor-Liste + Zeitstempel.",
        "data_collected": ["consent_categories", "consent_string"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "low", "technical_necessity": "full",
        "schrems_ii_status": "DPF-zertifiziert; EU-Cloud optional.",
    },
    "OptanonAlertBoxClosed": {
        "vendor": "OneTrust LLC", "vendor_country": "US",
        "exact_purpose": "OneTrust UI-Flag — verhindert Re-Display des "
                         "Banners nach Schliessung.",
        "data_collected": ["closed_timestamp"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "low", "technical_necessity": "full",
    },
    "usercentrics-uuid": {
        "vendor": "Usercentrics GmbH", "vendor_country": "DE",
        "exact_purpose": "Usercentrics Consent-Speicherung — UUID-basiert.",
        "data_collected": ["consent_uuid", "consent_settings"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "low", "technical_necessity": "full",
        "schrems_ii_status": "DE — kein Drittland.",
    },
    # --- Weitere Social / Werbeplattformen ---------------------------
    # _pin_unauth lebt in Base-KB.
    "_scid": {
        "vendor": "Snap Group Ltd.", "vendor_country": "GB/US",
        "exact_purpose": "Snapchat Pixel — Conversion-Tracking fuer "
                         "Snap Ads.",
        "data_collected": ["snap_visitor_id"],
        "ip_relevant": True,
        "tcf_purpose_ids": [7, 9, 10],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "Drittlandtransfer; UK seit 2021 mit "
                             "Angemessenheitsbeschluss.",
    },
    "guest_id": {
        "vendor": "X Corp. (Twitter)", "vendor_country": "US",
        "exact_purpose": "X/Twitter Guest-Identifier — Tracking nicht "
                         "eingeloggter Besucher inkl. Embeds.",
        "data_collected": ["guest_id"],
        "ip_relevant": True,
        "tcf_purpose_ids": [4, 9, 10],
        "typical_lifetime": "2 Jahre",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-Status unklar seit Eigentuemerwechsel 2022. "
                             "Erhoehtes Risiko, EDPB beobachtet.",
    },
    "VISITOR_INFO1_LIVE": {
        "vendor": "Google Ireland Ltd. (YouTube)", "vendor_country": "IE",
        "exact_purpose": "YouTube Embed Visitor-ID — Bandbreiten-Optimierung "
                         "+ Empfehlungsalgorithmus.",
        "data_collected": ["youtube_visitor_id"],
        "ip_relevant": True,
        "tcf_purpose_ids": [8, 10],
        "typical_lifetime": "6 Monate",
        "reid_risk": "high", "technical_necessity": "none",
        "notes": "YouTube-NoCookie-Domain (youtube-nocookie.com) reduziert "
                 "Tracking — DSGVO-konformer.",
    },
    "vuid": {
        "vendor": "Vimeo Inc.", "vendor_country": "US",
        "exact_purpose": "Vimeo User-Identifier — Wiedererkennung "
                         "wiederkehrender Besucher fuer Statistik.",
        "data_collected": ["vimeo_user_id"],
        "typical_lifetime": "2 Jahre",
        "reid_risk": "medium", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
    },
    # --- Marketing-Automation / Email --------------------------------
    "__kla_id": {
        "vendor": "Klaviyo Inc.", "vendor_country": "US",
        "exact_purpose": "Klaviyo Visitor-Tracking — fuer E-Mail-Marketing-"
                         "Attribution.",
        "data_collected": ["klaviyo_id"],
        "ip_relevant": True,
        "typical_lifetime": "2 Jahre",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
    },
    "_mcid": {
        "vendor": "Intuit Mailchimp", "vendor_country": "US",
        "exact_purpose": "Mailchimp Email-Click-Tracking — Verknuepft "
                         "Pageviews mit gesendeter Kampagne.",
        "data_collected": ["mc_email_id"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
    },
    # --- Product-Analytics / CDP -------------------------------------
    "mp_": {
        "vendor": "Mixpanel Inc.", "vendor_country": "US",
        "exact_purpose": "Mixpanel Distinct-ID + Properties — "
                         "Pseudonyme Event-Analytics.",
        "data_collected": ["distinct_id", "properties"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert; EU-Residency optional.",
        "notes": "Suffix `<token>_mixpanel`. Pattern-Match noetig.",
    },
    "ajs_anonymous_id": {
        "vendor": "Twilio Inc. (Segment)", "vendor_country": "US",
        "exact_purpose": "Segment Anonymous-ID — Cross-Device-Identitaet "
                         "vor Login.",
        "data_collected": ["anonymous_id"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert; EU-Datenresidenz optional.",
    },
    "AMP_": {
        "vendor": "Amplitude Inc.", "vendor_country": "US",
        "exact_purpose": "Amplitude Device-ID — Cross-Session-Identitaet "
                         "fuer Product-Analytics.",
        "data_collected": ["device_id", "session_id"],
        "typical_lifetime": "1 Jahr",
        "reid_risk": "high", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
        "notes": "Suffix `<api_key>`. Pattern-Match noetig.",
    },
    # --- A/B-Testing -------------------------------------------------
    "optimizelyEndUserId": {
        "vendor": "Optimizely Inc.", "vendor_country": "US",
        "exact_purpose": "Optimizely End-User-ID — konsistente "
                         "Experiment-Zuteilung pro Besucher.",
        "data_collected": ["end_user_id", "variation_assignments"],
        "typical_lifetime": "6 Monate",
        "reid_risk": "medium", "technical_necessity": "none",
        "schrems_ii_status": "DPF-zertifiziert.",
    },
    # --- RUM / Monitoring (oft strictly necessary diskutiert) --------
    "_dd_s": {
        "vendor": "Datadog Inc.", "vendor_country": "US",
        "exact_purpose": "Datadog RUM Session-Tracking — Performance- "
                         "Monitoring + Fehler-Telemetrie.",
        "data_collected": ["session_id", "session_type"],
        "typical_lifetime": "15 Minuten",
        "reid_risk": "low", "technical_necessity": "partial",
        "schrems_ii_status": "EU-Region (Frankfurt) verfuegbar.",
        "notes": "Bei reiner Server-/Fehler-Telemetrie ohne Cross-Site-"
                 "Tracking Argument fuer berechtigtes Interesse moeglich.",
    },
    # --- EU-Analytics-Alternativen -----------------------------------
    "_pk_ref": {
        "vendor": "InnoCraft Ltd. (Matomo)", "vendor_country": "NZ",
        "exact_purpose": "Matomo Referrer-Tracking — Quelle des Besuchs.",
        "data_collected": ["referrer", "campaign"],
        "typical_lifetime": "6 Monate",
        "reid_risk": "low", "technical_necessity": "none",
        "schrems_ii_status": "NZ hat Angemessenheitsbeschluss (2012). "
                             "Bei On-Premise-Hosting kein Transfer.",
        "notes": "Self-Hosting empfohlen — dann zeroes Drittland.",
    },
    "_pk_cvar": {
        "vendor": "InnoCraft Ltd. (Matomo)", "vendor_country": "NZ",
        "exact_purpose": "Matomo Custom-Variables — pro Visit konfigurierbar.",
        "data_collected": ["custom_vars"],
        "typical_lifetime": "30 Minuten",
        "reid_risk": "low", "technical_necessity": "none",
    },
 }
 # Pattern-Lookups fuer dynamische Cookie-Namen
 _EXT_PATTERNS: list[tuple[str, str]] = [
    (r"^_hjSessionUser_",    "_hjSessionUser_"),
    (r"^_hjSession_",        "_hjSession_"),
    (r"^intercom-id-",       "intercom-id-"),
    (r"^mp_",                "mp_"),
    (r"^AMP_",               "AMP_"),
 ]
 def lookup_cookie_extended(name: str) -> CookieKnowledge | None:
    """Lookup in der KB_EXT (Extension). None wenn nicht gefunden."""
    import re
    if not name: return None  # noqa: E701
    if name in KB_EXT: return KB_EXT[name]  # noqa: E701
    for pat, key in _EXT_PATTERNS:
        if re.search(pat, name): return KB_EXT.get(key)  # noqa: E701
    base = name.split(".", 1)[0]
    if base != name and base in KB_EXT: return KB_EXT[base]  # noqa: E701
    return None
@@ -0,0 +1,255 @@
 """
 Cookie-Policy-Architecture-Detection.
 Erkennt vier Diagnose-Punkte zur rechtlichen Bewertung der Cookie-Policy
 einer Website. Hintergrund: die DSGVO + TDDDG verlangen ZWEI Layer
 (Banner fuer Consent + Cookie-Richtlinie fuer Information), aber lassen
 offen ob das in einem oder zwei HTML-Dokumenten umgesetzt wird.
 BMW-Pattern: eine HTML-Seite ist GLEICHZEITIG der Banner-Re-Trigger und
 die Cookie-Richtlinie. Mindestanforderung erfuellt, aber kein
 versionierter Audit-Trail moeglich -> "gelbes" Risiko.
 Output-Format:
  {
    "layer_separation": "single" | "separate" | "unknown",
    "versioned": bool,
    "dynamic_content": bool,
    "vendor_count_in_text": int,
    "risk_label": "gruen" | "gelb" | "rot",
    "recommendation": str,
    "signals": [{"src": ..., "detail": ...}],
  }
 """
 from __future__ import annotations
 import re
 from urllib.parse import urlparse
 # Regex fuer "Stand vom DD.MM.JJJJ" / "Stand: DD.MM.JJJJ" / "Version X.Y"
 _VERSION_PATTERNS = [
    r"stand\s*[:\-]?\s*(?:vom\s+)?\d{1,2}\.\s*\d{1,2}\.\s*\d{4}",
    r"stand\s*[:\-]?\s*\d{1,2}\.\s*\w+\s+\d{4}",  # "Stand: 1. Mai 2026"
    r"letzte\s+(?:aktualisierung|aenderung|änderung)\s*[:\-]?\s*\d{1,2}\.",
    r"version\s*[:\-]?\s*\d+(?:\.\d+)?",
    r"stand\s+der\s+(?:information|cookie)\w*\s*[:\-]?\s*\d{1,2}\.",
    r"(?:gueltig|gültig)\s+ab\s+\d{1,2}\.\s*\d{1,2}\.\s*\d{4}",
 ]
 # Hinweise auf dynamische Generierung
 _DYNAMIC_MARKERS = [
    "wird automatisch aktualisiert",
    "wird dynamisch generiert",
    "wird laufend angepasst",
    "cookie-einstellungen ändern",
    "cookie-einstellungen aendern",
    "cookie-praeferenzen verwalten",
    "cookie-präferenzen verwalten",
    "consent aktualisieren",
    "einwilligung verwalten",
    "einwilligungs-einstellungen",
 ]
 # CMP-Trigger-Marker (Container-/Button-Texte die typischerweise das
 # Banner re-oeffnen)
 _BANNER_TRIGGER_MARKERS = [
    "cookie-einstellungen öffnen",
    "cookie einstellungen öffnen",
    "ihre cookie-präferenzen",
    "ihre cookie praeferenzen",
    "consent banner",
    "datenschutz-einstellungen",
    "cookie-banner anzeigen",
 ]
 def _normalize_url(u: str) -> str:
    if not u:
        return ""
    if "://" not in u:
        u = "https://" + u
    p = urlparse(u)
    path = p.path.rstrip("/").lower()
    host = p.netloc.lower().replace("www.", "")
    return f"{host}{path}"
 def _check_versioned(text_lower: str) -> tuple[bool, str | None]:
    for pat in _VERSION_PATTERNS:
        m = re.search(pat, text_lower)
        if m:
            return True, m.group()[:80]
    return False, None
 def _check_dynamic(text_lower: str) -> tuple[bool, str | None]:
    for marker in _DYNAMIC_MARKERS:
        if marker in text_lower:
            return True, marker
    return False, None
 def _check_banner_trigger(text_lower: str) -> tuple[bool, str | None]:
    for marker in _BANNER_TRIGGER_MARKERS:
        if marker in text_lower:
            return True, marker
    return False, None
 def _count_vendor_signals(text_lower: str) -> int:
    """Zaehle wieviele Vendor-Namen im Text — Indikator ob die Liste statisch
    drinsteht oder dynamisch nachgeladen wird."""
    vendor_signals = [
        "google", "meta", "facebook", "adobe", "microsoft", "linkedin",
        "tiktok", "amazon", "hotjar", "cloudflare", "stripe", "salesforce",
        "hubspot", "mailchimp", "pinterest", "snapchat", "youtube", "vimeo",
    ]
    return sum(1 for v in vendor_signals if v in text_lower)
 def detect_architecture(
    doc_url: str,
    doc_text: str,
    cmp_payloads: list[dict] | None = None,
    homepage_cmp_payloads: list[dict] | None = None,
 ) -> dict:
    """Pruefe die Layer-Architektur einer Cookie-Richtlinie.
    Args:
        doc_url:                URL des erkannten Cookie-Richtlinie-Dokuments
        doc_text:               Volltext der Cookie-Richtlinie
        cmp_payloads:           CMP-Capture die WAEHREND des doc-Crawls passiert sind
        homepage_cmp_payloads:  CMP-Capture vom initialen Homepage-Crawl
    """
    text_lower = (doc_text or "").lower()
    signals: list[dict] = []
    # 1. Single- vs Separate-Layer
    cmp_on_doc = bool(cmp_payloads)
    banner_trigger, trigger_marker = _check_banner_trigger(text_lower)
    if cmp_on_doc and banner_trigger:
        layer = "single"
        signals.append({"src": "cmp+marker",
                        "detail": f"CMP feuerte auf Doc-URL + Marker '{trigger_marker}'"})
    elif cmp_on_doc:
        layer = "single"
        signals.append({"src": "cmp", "detail": "CMP-Payload waehrend Doc-Crawl"})
    elif banner_trigger:
        layer = "single"
        signals.append({"src": "marker", "detail": f"Trigger-Marker: '{trigger_marker}'"})
    elif homepage_cmp_payloads and not cmp_on_doc:
        layer = "separate"
        signals.append({"src": "topology",
                        "detail": "Banner triggert nur auf Homepage, Cookie-Doc ist eigene Seite"})
    else:
        layer = "unknown"
    # 2. Versionierung
    versioned, version_marker = _check_versioned(text_lower)
    if versioned:
        signals.append({"src": "version", "detail": f"Marker: '{version_marker}'"})
    # 3. Dynamic content
    dynamic, dyn_marker = _check_dynamic(text_lower)
    if dynamic or cmp_on_doc:
        dynamic = True
        if dyn_marker:
            signals.append({"src": "dynamic", "detail": dyn_marker})
    # 4. Vendor-Count (Indikator ob Liste statisch im Text steht)
    vendor_count = _count_vendor_signals(text_lower)
    # Risiko-Bewertung
    if layer == "unknown" and vendor_count < 3:
        risk = "rot"
        rec = (
            "Cookie-Richtlinie konnte nicht eindeutig identifiziert oder ist "
            "unzureichend. Pruefen Sie ob die Pflicht-Information nach "
            "Art. 13 DSGVO + §25 TDDDG ueberhaupt erreichbar ist."
        )
    elif layer == "single" and not versioned:
        risk = "gelb"
        rec = (
            "BMW-Pattern erkannt: Single-Layer-CMP (Banner-Trigger + "
            "Info-Layer in einer URL). Mindestanforderung erfuellt, aber "
            "OHNE Versionierung. Bei einer Aufsichtsbehoerden-Pruefung "
            "kann nicht belegt werden welche Vendor-Liste an einem "
            "bestimmten Stichtag aktiv war. Empfehlung: monatlicher "
            "Snapshot der dynamischen Vendor-Tabelle als versioniertes "
            "PDF im Archiv."
        )
    elif layer == "single" and versioned:
        risk = "gelb"
        rec = (
            "Single-Layer mit Versionierung — gute Mindestloesung. "
            "Best Practice waere zusaetzlich eine getrennte statische "
            "Vendor-Tabelle die Crawler indexieren koennen."
        )
    elif layer == "separate" and versioned:
        risk = "gruen"
        rec = (
            "Best Practice umgesetzt: separater Banner + versionierte "
            "Cookie-Richtlinie."
        )
    elif layer == "separate" and not versioned:
        risk = "gelb"
        rec = (
            "Separate Cookie-Richtlinie vorhanden, aber ohne Versionierung. "
            "Snapshot-Archiv empfohlen."
        )
    else:
        risk = "gelb"
        rec = "Cookie-Policy-Architektur uneindeutig — manuelle Pruefung empfohlen."
    return {
        "layer_separation": layer,
        "versioned": versioned,
        "dynamic_content": dynamic,
        "vendor_count_in_text": vendor_count,
        "risk_label": risk,
        "recommendation": rec,
        "signals": signals,
        "doc_url_normalized": _normalize_url(doc_url),
    }
 def build_architecture_html(arch: dict) -> str:
    """Render the architecture block for the executive summary."""
    if not arch:
        return ""
    risk_colors = {
        "gruen": ("#16a34a", "#dcfce7", "#166534"),
        "gelb":  ("#d97706", "#fef3c7", "#92400e"),
        "rot":   ("#dc2626", "#fee2e2", "#991b1b"),
    }
    border, bg, fg = risk_colors.get(arch["risk_label"], ("#94a3b8", "#f1f5f9", "#475569"))
    layer_label = {"single": "Single-Layer (kombiniert)",
                   "separate": "Separate Layer (Best Practice)",
                   "unknown": "Nicht eindeutig"}[arch["layer_separation"]]
    versioned_lbl = "ja" if arch["versioned"] else "nein"
    dynamic_lbl = "ja (CMP-generiert)" if arch["dynamic_content"] else "statisch"
    return (
        f'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
        f'max-width:700px;margin:0 auto 14px;padding:12px 16px;'
        f'background:{bg};border:1px solid {border};border-radius:8px;color:{fg}">'
        f'<div style="font-size:11px;text-transform:uppercase;letter-spacing:1px;'
        f'font-weight:600;margin-bottom:6px">Cookie-Policy-Architektur</div>'
        f'<table style="width:100%;font-size:12px;margin:0">'
        f'<tr><td style="padding:2px 0;width:50%">Layer-Trennung</td>'
        f'<td><strong>{layer_label}</strong></td></tr>'
        f'<tr><td style="padding:2px 0">Versionierung</td>'
        f'<td><strong>{versioned_lbl}</strong></td></tr>'
        f'<tr><td style="padding:2px 0">Vendor-Liste</td>'
        f'<td><strong>{dynamic_lbl}</strong></td></tr>'
        f'<tr><td style="padding:2px 0">Vendor-Namen im Text</td>'
        f'<td><strong>{arch["vendor_count_in_text"]}</strong></td></tr>'
        f'</table>'
        f'<div style="font-size:11px;margin-top:8px;padding-top:8px;'
        f'border-top:1px solid {border};font-style:italic">'
        f'{arch["recommendation"]}</div>'
        f'</div>'
    )
@@ -16,8 +16,9 @@ IMPRESSUM_CHECKLIST = [
        "label": "Name des Anbieters",
        "level": 1, "parent": None,
        "patterns": [
-            r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)",
+            # Word-Boundaries verhindern Falsch-Treffer ("ag" in "samstag")
-            r"firma", r"unternehmen",
+            r"\b(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd|aktiengesellschaft|kommanditgesellschaft|partnerschaft\s+mbb)\b",
            r"\bfirma\s+\w+", r"\bunternehmen\s+\w+",
        ],
        "severity": "HIGH",
        "hint": "§5(1) Nr.1 TMG: Vollstaendiger Firmenname MIT Rechtsform (z.B. 'Muster GmbH', nicht nur 'Muster'). Bei Einzelunternehmen: Vor- und Nachname plus ggf. Geschaeftsbezeichnung. Haeufiger Abmahngrund: Nur Markenname ohne juristische Person.",
@@ -178,9 +179,13 @@ IMPRESSUM_CHECKLIST = [
        "label": "Name der vertretungsberechtigten Person",
        "level": 2, "parent": "representative",
        "patterns": [
-            r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr\w*|vorstand|inhaber)\s*:?\s*[a-zA-Z\u00c0-\u017e]",
+            r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr\w*|vorstand|inhaber|aufsichtsrats?)\s*[:\-]?\s*[a-zA-Z\u00c0-\u017e]",
-            r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*:?\s*[a-zA-Z\u00c0-\u017e]",
+            # "Vorstand (Milan Nedeljkovic, ...)" - BMW-Pattern mit Klammer-Liste
-            r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hrung)\s*:?\s*(?:dr\.?\s+|prof\.?\s+)?[a-zA-Z\u00c0-\u017e]",
+            r"(?:vorstand|gesch(?:ae|ä)ftsf(?:ue|ü)hrung|aufsichtsrats?)\s*\(\s*[a-zA-Z\u00c0-\u017e]",
            r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*[:\-]?\s*(?:den\s+vorstand\s*\(?|[a-zA-Z\u00c0-\u017e])",
            r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hrung)\s*[:\-]?\s*(?:dr\.?\s+|prof\.?\s+)?[a-zA-Z\u00c0-\u017e]",
            # "Vorsitzender des Aufsichtsrats: Nicolas Peter"
            r"(?:vorsitzend\w+|stellv\w*\s+vorsitz\w*)\s+(?:des\s+\w+\s*)?[:\-]?\s*[a-zA-Z\u00c0-\u017e]",
        ],
        "severity": "LOW",
        "hint": "Voller Vor- und Nachname mit Funktionsbezeichnung erforderlich (z.B. 'Geschaeftsfuehrung: Dr. Max Mustermann').",
@@ -234,11 +239,12 @@ IMPRESSUM_CHECKLIST = [
        "label": "Zustaendige Kammer benannt",
        "level": 2, "parent": "regulated_profession",
        "patterns": [
-            r"(?:(?:ae|ä)rztekammer|rechtsanwaltskammer|steuerberaterkammer|architektenkammer|ingenieurkammer|apothekerkammer)",
+            r"(?:(?:ae|ä)rztekammer|rechtsanwaltskammer|steuerberaterkammer|architektenkammer|ingenieurkammer|apothekerkammer|handwerkskammer|tier(?:ae|ä)rztekammer|psychotherapeutenkammer)",
            r"\bihk\b|industrie-?\s+und\s+handelskammer",
            r"(?:mitglied|zugelassen|eingetragen)\s+(?:bei|in|der)\s+(?:der\s+)?(?:\w+)?kammer",
        ],
-        "severity": "LOW",
+        "severity": "INFO",  # P9: konditional - nur kammerpflichtige Berufe
-        "hint": "Zustaendige Kammer mit vollem Namen und Sitz nennen (z.B. 'Rechtsanwaltskammer Muenchen').",
+        "hint": "Zustaendige Kammer mit vollem Namen und Sitz nennen (z.B. 'Rechtsanwaltskammer Muenchen', 'IHK Muenchen'). Nur relevant fuer kammerpflichtige Berufe.",
    },
    {
        "id": "profession_title",
@@ -314,6 +320,7 @@ IMPRESSUM_CHECKLIST = [
            r"distanzier|macht\s+sich\s+(?:nicht|kein)\s+(?:zu\s+eigen|verantwortlich)",
        ],
        "severity": "LOW",
        "invert": True,  # Anti-Pattern: passed wenn NICHT gefunden
        "hint": "Der klassische Link-Disclaimer ('Wir distanzieren uns von verlinkten Inhalten') ist seit BGH (I ZR 317/01) rechtlich wirkungslos. Empfehlung: Entfernen Sie pauschale Disclaimer — sie schuetzen nicht und koennen kontraproduktiv sein.",
    },
 ]
@@ -15,7 +15,9 @@ import httpx
 logger = logging.getLogger(__name__)
 OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
-OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3.5:35b-a3b")
+# P13: qwen3:30b-a3b liefert zuverlaessige JSON-Antworten im Batch-Modus.
 # qwen3.5:35b-a3b lieferte mit format='json' + langem Prompt leere Strings.
 OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3:30b-a3b")
 TIMEOUT = 30.0
@@ -5,6 +5,8 @@ Pass 1: Run all L1 checks ("Is it mentioned?")
 Pass 2: Run L2 checks only where their L1 parent passed ("Is it correct?")
 """
 from __future__ import annotations
 import logging
 import re
@@ -83,6 +85,7 @@ def check_document_completeness(
    doc_type: str,
    doc_title: str,
    doc_url: str,
    business_profile: dict | None = None,
 ) -> list[dict]:
    """Check a legal document against its type-specific requirements.
@@ -90,9 +93,20 @@ def check_document_completeness(
      L1 — Is the mandatory field mentioned at all?
      L2 — Is it correct/complete? (only checked if L1 parent passed)
    business_profile (optional) wird genutzt um Checks die fuer das
    spezifische Unternehmen nicht anwendbar sind als 'skipped' zu
    markieren (z.B. OS-Plattform/VSBG bei no_direct_sales=True).
    Returns a list of findings (summary + missing items).
    """
    findings = []
    no_direct_sales = bool((business_profile or {}).get("no_direct_sales"))
    # P9: Welche Check-IDs sind bei OEM-Konfigurator-Pattern obsolet.
    skip_check_ids: set[str] = set()
    if no_direct_sales:
        skip_check_ids.update([
            "dispute_resolution",  # OS-Plattform / VSBG nur B2C-Direkthaendler
        ])
    # Strip soft hyphens ( / \xad) that CMS tools insert for word-breaking
    # — they break regex matches on compound words like "Datenübertragbarkeit"
    text_clean = text.replace("\xad", "").replace("&shy;", "")
@@ -135,8 +149,25 @@ def check_document_completeness(
    for check in l1_checks:
        is_info = check.get("severity") == "INFO"
        # P9: Profil-basiertes Skip (OEM-Pattern -> OS-Plattform raus)
        if check["id"] in skip_check_ids:
            all_checks.append({
                "id": check["id"], "label": check["label"],
                "passed": False, "severity": "INFO",
                "matched_text": "", "level": 1, "parent": None,
                "skipped": True,
                "hint": "Nicht anwendbar: Unternehmen betreibt keinen "
                        "Direkt-Vertrieb an Verbraucher (OEM-Konfigurator-Pattern).",
            })
            continue
        match = _match_patterns(check["patterns"], text_lower)
-        passed = match is not None
+        # P9: "invert"=True bedeutet Anti-Pattern (z.B. illegaler Link-
        # Disclaimer): passed wenn NICHT gefunden, fail wenn gefunden.
        if check.get("invert"):
            passed = match is None
            match = None if passed else match
        else:
            passed = match is not None
        if passed:
            passed_l1_ids.add(check["id"])
            if not is_info:
@@ -168,18 +199,26 @@ def check_document_completeness(
    for check in l2_checks:
        parent = check.get("parent")
        is_info = check.get("severity") == "INFO"
        skipped = parent not in passed_l1_ids
        passed = False
        matched_text = ""
        if not skipped:
            l2_total += 1
            match = _match_patterns(check["patterns"], text_lower)
            passed = match is not None
-            if passed:
+            # P9: INFO-L2-Checks (konditional, z.B. Kammer) zaehlen NICHT
            # in correctness-pct und erscheinen nicht als Fail-Finding.
            if is_info:
                if passed:
                    matched_text = _extract_context(text_lower, match)
                # weder l2_total++ noch findings.append: kein Fail-Eintrag
            else:
                l2_total += 1
            if passed and not is_info:
                l2_passed += 1
                matched_text = _extract_context(text_lower, match)
-            else:
+            elif not passed and not is_info:
                findings.append({
                    "code": f"DSI-DETAIL-{check['id'].upper()}",
                    "severity": check.get("severity", "MEDIUM"),
@@ -121,11 +121,37 @@ def _dedup_key(label: str) -> str:
    return label
 _CONDITIONAL_MARKERS = ("falls ", "sofern ", "wenn ", "soweit ",
                        "bei bedarf", "ggf.", "gegebenenfalls")
 def _is_hard_finding(r: dict) -> bool:
    """Echtes Finding = wir haben einen positiven Treffer im Text der den
    Verstoss belegt. Stille im Text reicht NICHT — das wandert ins MC-Audit
    als "selbst pruefen", nicht ins Email als HIGH-Drohung.
    Heuristik:
      - matched_text nicht leer = textuelle Evidenz vorhanden → hart
      - konditionales Label ("falls / sofern / wenn") UND matched_text leer
        → weich (Pre-Condition nicht belegt) → raus aus Top-Fails
      - sonst: hart (klassische Pflichtangaben-Lücke wie "DSB fehlt")
    """
    mt = (r.get("matched_text") or "").strip()
    if mt:
        return True
    label_low = (r.get("label") or "").lower()
    if any(m in label_low for m in _CONDITIONAL_MARKERS):
        return False
    return True
 def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
    """Return top-N failing MCs sorted by severity then label.
    Skipped + passed MCs are excluded. INFO severity is excluded by
-    default since those are guidance, not findings.
+    default since those are guidance, not findings. Konditionale MCs
    ohne Negativ-Beleg (P8) werden ebenfalls ausgesteuert — sie
    erscheinen nur noch im MC-Audit als "selbst pruefen".
    Near-duplicates (multiple MCs that all complain about "einfache
    Sprache" / "Einwilligungsaufforderung" / ...) are collapsed to ONE
@@ -136,6 +162,7 @@ def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
        r for r in (check_results or [])
        if not r.get("passed") and not r.get("skipped")
        and (r.get("severity") or "").upper() != "INFO"
        and _is_hard_finding(r)
    ]
    fails.sort(key=lambda r: (
        _SEV_RANK.get((r.get("severity") or "MEDIUM").upper(), 5),
@@ -0,0 +1,242 @@
 """
 TDM-Reservation-Check (§ 44b UrhG / EU CDSM Art. 4).
 Prueft pro Domain ob ein maschinenlesbarer Nutzungsvorbehalt fuer
 Text-and-Data-Mining gesetzt ist. Quellen:
  1. robots.txt — User-agent: * Disallow: /  (oder spezifisch fuer uns)
  2. /ai.txt — neuer OpenAI-Standard
  3. HTTP-Header `tdm-reservation: 1` auf Homepage
  4. HTML <meta name="tdm-reservation" content="1"> auf Homepage
  5. HTML <meta name="robots" content="noai|noimageai"> Tags
 Status-Interpretation:
  status=allowed   -> kein Vorbehalt, crawlbar
  status=reserved  -> expliziter Vorbehalt, NICHT crawlen
  status=denied    -> robots.txt-Zugriff aktiv blockiert (403/401)
                      => konservativ: NICHT crawlen
  status=unknown   -> Server-Error (500/timeout/DNS) auf robots.txt
                      => crawlbar, aber 24h-Recheck markiert
 Cache via sidecar SQLite (gleiche DB wie compliance_audit_log), 24h TTL.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import sqlite3
 import time
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Literal
 from urllib.parse import urlparse
 import httpx
 logger = logging.getLogger(__name__)
 DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
 CACHE_TTL_SECONDS = 24 * 3600
 Status = Literal["allowed", "reserved", "denied", "unknown"]
 _DEFAULT_UA = (
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) HeadlessChrome/120.0.0.0 Safari/537.36"
 )
 def _ensure_cache_table() -> None:
    Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
    with sqlite3.connect(DB_PATH) as conn:
        conn.executescript("""
            CREATE TABLE IF NOT EXISTS tdm_reservation_cache (
                domain     TEXT PRIMARY KEY,
                ts         TEXT NOT NULL,
                status     TEXT NOT NULL,
                signals    TEXT NOT NULL    -- JSON list[dict]
            );
            CREATE INDEX IF NOT EXISTS idx_tdm_ts ON tdm_reservation_cache(ts);
        """)
 def _cache_get(domain: str) -> dict | None:
    try:
        _ensure_cache_table()
        with sqlite3.connect(DB_PATH) as conn:
            conn.row_factory = sqlite3.Row
            row = conn.execute(
                "SELECT * FROM tdm_reservation_cache WHERE domain=?", (domain,),
            ).fetchone()
            if not row:
                return None
            ts = datetime.fromisoformat(row["ts"]).timestamp()
            if time.time() - ts > CACHE_TTL_SECONDS:
                return None
            return {
                "domain": domain,
                "status": row["status"],
                "signals": json.loads(row["signals"]),
                "cached": True,
                "ts": row["ts"],
            }
    except Exception as e:
        logger.debug("tdm cache_get failed for %s: %s", domain, e)
        return None
 def _cache_put(domain: str, status: Status, signals: list[dict]) -> None:
    try:
        _ensure_cache_table()
        with sqlite3.connect(DB_PATH) as conn:
            conn.execute(
                "INSERT OR REPLACE INTO tdm_reservation_cache "
                "(domain, ts, status, signals) VALUES (?, ?, ?, ?)",
                (
                    domain,
                    datetime.now(timezone.utc).isoformat(),
                    status,
                    json.dumps(signals, ensure_ascii=False),
                ),
            )
            conn.commit()
    except Exception as e:
        logger.warning("tdm cache_put failed for %s: %s", domain, e)
 def _base_domain(url_or_domain: str) -> str:
    if not url_or_domain:
        return ""
    if "://" not in url_or_domain:
        url_or_domain = "https://" + url_or_domain
    netloc = urlparse(url_or_domain).netloc.lower()
    return netloc.replace("www.", "")
 async def _fetch_status(client: httpx.AsyncClient, url: str) -> tuple[int, str, dict]:
    """Return (status_code, body, headers). Body capped at 16 KiB."""
    try:
        resp = await client.get(url)
        body = resp.text[:16384] if resp.content else ""
        return resp.status_code, body, dict(resp.headers)
    except Exception as e:
        logger.debug("tdm fetch %s failed: %s", url, e)
        return 0, "", {}
 def _robots_disallows_us(body: str) -> bool:
    """Parse robots.txt — true if our group has Disallow: /."""
    if not body:
        return False
    relevant_groups = ["*", "claudebot", "anthropic-ai", "gptbot",
                       "google-extended", "ccbot", "breakpilot"]
    current_uas: list[str] = []
    in_our_group = False
    for raw in body.splitlines():
        line = raw.split("#", 1)[0].strip()
        if not line:
            in_our_group = False
            current_uas = []
            continue
        if ":" not in line:
            continue
        key, val = (s.strip().lower() for s in line.split(":", 1))
        if key == "user-agent":
            current_uas.append(val)
            in_our_group = any(ua in relevant_groups for ua in current_uas)
        elif key == "disallow" and in_our_group:
            if val == "/" or val == "":
                if val == "/":
                    return True
    return False
 def _meta_has_reservation(body: str) -> bool:
    """Detect <meta name="tdm-reservation|robots|googlebot"> with noai/noimageai/1."""
    low = body.lower()
    needles = [
        'name="tdm-reservation" content="1"',
        "name='tdm-reservation' content='1'",
        '"noai"', '"noimageai"',
        "content=\"noai", "content='noai",
    ]
    return any(n in low for n in needles)
 async def check_tdm_reservation(domain_or_url: str) -> dict:
    """Probe a domain for machine-readable TDM reservations.
    Returns:
      {
        domain, status, signals: [{src, detail}], cached, ts
      }
    """
    domain = _base_domain(domain_or_url)
    if not domain:
        return {"domain": "", "status": "unknown", "signals": [], "cached": False}
    cached = _cache_get(domain)
    if cached:
        return cached
    signals: list[dict] = []
    status: Status = "allowed"
    headers = {"User-Agent": _DEFAULT_UA, "Accept": "*/*"}
    async with httpx.AsyncClient(
        timeout=12.0, follow_redirects=True, headers=headers,
    ) as client:
        for scheme in ("https", "http"):
            r_code, r_body, _ = await _fetch_status(
                client, f"{scheme}://www.{domain}/robots.txt",
            )
            if r_code == 0 and scheme == "https":
                continue
            signals.append({"src": "robots.txt", "status_code": r_code,
                            "scheme": scheme})
            if r_code in (401, 403):
                status = "denied"
            elif r_code == 200 and _robots_disallows_us(r_body):
                status = "reserved"
                signals[-1]["detail"] = "Disallow: / for relevant UA group"
            elif r_code not in (200, 404):
                status = "unknown"
            break
        if status == "allowed":
            ai_code, _, _ = await _fetch_status(
                client, f"https://www.{domain}/ai.txt",
            )
            if ai_code == 200:
                status = "reserved"
                signals.append({"src": "ai.txt", "status_code": 200,
                                "detail": "ai.txt present"})
        if status == "allowed":
            h_code, h_body, h_hdrs = await _fetch_status(
                client, f"https://www.{domain}/",
            )
            if h_code == 200:
                if h_hdrs.get("tdm-reservation") == "1":
                    status = "reserved"
                    signals.append({"src": "http-header",
                                    "detail": "tdm-reservation: 1"})
                elif _meta_has_reservation(h_body):
                    status = "reserved"
                    signals.append({"src": "html-meta",
                                    "detail": "noai/tdm-reservation meta"})
    _cache_put(domain, status, signals)
    return {
        "domain": domain,
        "status": status,
        "signals": signals,
        "cached": False,
        "ts": datetime.now(timezone.utc).isoformat(),
    }
 def is_crawl_allowed(result: dict) -> bool:
    """Strict: only 'allowed' and 'unknown' are crawlable."""
    return (result.get("status") or "unknown") in ("allowed", "unknown")
@@ -0,0 +1,277 @@
 """
 Aggregator: Doc-Check-Results + cmp_vendors + redundancy_report
            -> einheitliche Finding-Records fuer unified_findings_store.
 Speichert nur ABGELEITETE/normalisierte Findings (siehe Memory
 'feedback_oem_data_legal.md'): keine rohen CMP-Cookie-Texte, keine
 1:1-Spiegelung fremder Vendor-Listen — nur eigene Risk-/Status-Bewertung.
 Hook:
  from compliance.services.unified_findings_collector import collect
  from compliance.services.unified_findings_store import record_findings
  findings = collect(check_id, results, cmp_vendors, redundancy_report, doc_texts)
  record_findings(check_id, findings)
 """
 from __future__ import annotations
 import logging
 from typing import Any
 logger = logging.getLogger(__name__)
 _SEVERITY_DEFAULT = {
    "mc": "MEDIUM",
    "pflichtangabe": "MEDIUM",
    "vendor": "MEDIUM",
    "redundanz": "LOW",
 }
 # Mapping cmp_vendor.flag → action_recipe key + Default-Severity
 _VENDOR_FLAG_SEVERITY = {
    "no_cookies_listed": ("HIGH", "Cookie-Auflistung fehlt"),
    "no_country": ("MEDIUM", "Sitzland des Anbieters fehlt"),
    "no_privacy_url": ("HIGH", "Datenschutzerklaerung des Anbieters fehlt"),
    "broken_privacy_url": ("HIGH", "Datenschutz-URL nicht erreichbar"),
    "no_opt_out_url": ("MEDIUM", "Widerspruchs-/Opt-Out-Link fehlt"),
    "broken_opt_out": ("MEDIUM", "Opt-Out-Link nicht erreichbar"),
    "no_name": ("HIGH", "Anbieter-Name fehlt"),
    "no_purpose": ("HIGH", "Verarbeitungszweck fehlt"),
    "cookies_no_expiry": ("LOW", "Cookie-Speicherdauer fehlt"),
    "cookies_no_names": ("LOW", "Cookie-Namen fehlen"),
 }
 def _safe_recipe(key: str) -> dict:
    """Lookup mit lazy-import — recipes-Modul ist optional."""
    try:
        from compliance.services.finding_action_recipes import recipe_for
        r = recipe_for(key)
        return dict(r) if r else {}
    except Exception:
        return {}
 def _safe_anchor(label: str, doc_text: str, doc_id: str) -> dict:
    """Anchor-Lookup mit lazy-import + best-effort."""
    if not label or not doc_text:
        return {}
    try:
        from compliance.services.doc_anchor_locator import locate_anchor
        a = locate_anchor(label, doc_text, doc_id)
        return a or {}
    except Exception:
        return {}
 def _from_doc_check(
    check_id: str,
    r: Any,
    doc_text: str,
 ) -> list[dict]:
    """Convert one DocCheckResult into unified-finding rows."""
    out: list[dict] = []
    if r.error and r.error.startswith("Nicht anwendbar"):
        out.append({
            "source_type": "pflichtangabe",
            "doc_type": r.doc_type,
            "severity": "INFO",
            "status": "na",
            "regulation": "",
            "label": f"{r.label}: {r.error}",
            "hint": r.error,
            "action_recipe": {},
            "payload": {"scenario": r.scenario},
        })
        return out
    if r.error:
        out.append({
            "source_type": "pflichtangabe",
            "doc_type": r.doc_type,
            "severity": "HIGH",
            "status": "failed",
            "regulation": "",
            "label": f"{r.label}: Dokument nicht erreichbar",
            "hint": r.error[:400],
            "action_recipe": {},
            "payload": {},
        })
        return out
    for c in (r.checks or []):
        is_mc = (c.id or "").startswith("mc-")
        source = "mc" if is_mc else "pflichtangabe"
        if c.passed:
            status = "passed"
        elif c.skipped:
            status = "skipped"
        else:
            status = "failed"
        severity = (c.severity or _SEVERITY_DEFAULT[source]).upper()
        # Nur fuer Fails Anchor + Recipe — Pass-Eintraege halten wir mager
        recipe: dict = {}
        anchor: dict = {}
        if status == "failed":
            # Recipe per Label-Substring (mehr als nur exakte Keys)
            recipe = _safe_recipe(c.label or "") or _safe_recipe(c.id or "")
            anchor = _safe_anchor(c.label or "", doc_text, r.doc_type)
        out.append({
            "source_type": source,
            "doc_type": r.doc_type,
            "severity": severity,
            "status": status,
            "regulation": c.regulation or "",
            "label": c.label or "",
            "hint": c.hint or "",
            "action_recipe": recipe,
            "anchor_excerpt": (anchor.get("anchor_phrase") or "")[:800],
            "anchor_conf": _conf_to_score(anchor),
            "payload": {
                "mc_id": c.id,
                "level": c.level,
                "parent": c.parent,
                "matched_text": (c.matched_text or "")[:300],
                "article": c.article or "",
                "anchor_method": anchor.get("method"),
                "anchor_position": anchor.get("position_hint"),
            },
        })
    return out
 def _conf_to_score(anchor: dict) -> float:
    if not anchor:
        return 0.0
    try:
        return float(anchor.get("score") or 0.0)
    except (TypeError, ValueError):
        return 0.0
 def _from_vendors(check_id: str, vendors: list[dict]) -> list[dict]:
    """Per-vendor flag -> finding row."""
    out: list[dict] = []
    for v in vendors or []:
        name = v.get("name") or v.get("vendor_name") or "Unbekannter Anbieter"
        country = v.get("country") or ""
        risk = v.get("compliance_risk") or {}
        for flag in (v.get("compliance_flags") or v.get("flags") or []):
            sev, label = _VENDOR_FLAG_SEVERITY.get(
                flag, ("LOW", flag.replace("_", " ").title()),
            )
            out.append({
                "source_type": "vendor",
                "doc_type": "-",
                "severity": sev,
                "status": "failed",
                "regulation": "DSGVO",
                "label": f"{name} — {label}",
                "hint": _vendor_hint(flag, name),
                "action_recipe": _safe_recipe(flag),
                "vendor_name": name,
                "category": (v.get("category") or "")[:64],
                "payload": {
                    "flag": flag,
                    "country": country,
                    "compliance_score": v.get("compliance_score"),
                    "category": v.get("category"),
                    "risk_label": risk.get("label"),
                    "high_risk_cookies": risk.get("high_risk_cookie_count"),
                    "schrems_ii_cookies": risk.get("schrems_ii_affected_cookies"),
                },
            })
    return out
 def _vendor_hint(flag: str, name: str) -> str:
    hints = {
        "no_cookies_listed":
            f"Bei '{name}' sind keine Cookies dokumentiert — DSK-Orientierungshilfe "
            "verlangt Name + Zweck + Speicherdauer pro Cookie.",
        "no_country":
            f"Sitzland von '{name}' fehlt — bei Drittland-Anbieter "
            "Art. 44 ff. DSGVO erforderlich.",
        "no_privacy_url":
            f"Link zur Datenschutzerklaerung von '{name}' fehlt — Art. 13 Abs. 1 lit. e.",
        "broken_privacy_url":
            f"Privacy-URL von '{name}' nicht erreichbar (404/Timeout).",
        "no_opt_out_url":
            f"Opt-Out/Widerspruchs-Link fuer '{name}' fehlt — Art. 21 DSGVO.",
        "broken_opt_out":
            f"Opt-Out-Link von '{name}' nicht erreichbar.",
        "no_name":
            "Anbieter ohne Name erfasst — Art. 13 Abs. 1 lit. a.",
        "no_purpose":
            f"Verarbeitungszweck fuer '{name}' fehlt — Art. 13 Abs. 1 lit. c.",
    }
    return hints.get(flag, f"Flag: {flag}")
 def _from_redundancies(check_id: str, report: dict | None) -> list[dict]:
    """Each redundancy category -> finding row (status='info', sev='LOW')."""
    if not report:
        return []
    out: list[dict] = []
    for r in (report.get("redundancies") or []):
        cat = r.get("category_label") or r.get("category") or "Unbekannt"
        vendors = r.get("vendors") or []
        sav = r.get("estimated_saving_year_eur") or [0, 0]
        out.append({
            "source_type": "redundanz",
            "doc_type": "-",
            "severity": "LOW",
            "status": "info",
            "regulation": "Cost-Optimization",
            "label": f"Mehrfach-Anbieter in '{cat}' ({len(vendors)} Tools)",
            "hint": (
                f"Anbieter: {', '.join(vendors[:6])}"
                + (f" (+{len(vendors)-6} weitere)" if len(vendors) > 6 else "")
                + (f" · EU-Empfehlung: {r['suggested_eu_tool']}"
                   if r.get("suggested_eu_tool") else "")
            ),
            "action_recipe": {
                "what": "Konsolidierung auf 1 Tool pro Kategorie pruefen.",
                "why": (r.get("consolidation_hint") or
                        "Mehrfach-Lizenzen + Vertrags-Overhead reduzieren."),
                "fix_text": "Migrations-Plan zu einem Anbieter erarbeiten; "
                            "Vertraege ueberlappend kuendigen.",
            },
            "category": cat,
            "payload": {
                "vendors": vendors[:20],
                "saving_year_eur_low": sav[0],
                "saving_year_eur_high": sav[1],
                "suggested_eu_tool": r.get("suggested_eu_tool"),
                "caveats": (r.get("caveats") or [])[:4],
            },
        })
    return out
 def collect(
    check_id: str,
    results: list[Any],
    cmp_vendors: list[dict] | None,
    redundancy_report: dict | None,
    doc_texts: dict[str, str] | None = None,
 ) -> list[dict]:
    """Bundle all 4 finding sources into one list ready for record_findings()."""
    out: list[dict] = []
    texts = doc_texts or {}
    for r in (results or []):
        try:
            out.extend(_from_doc_check(check_id, r, texts.get(r.doc_type, "")))
        except Exception as e:
            logger.warning("collect: doc result %s failed: %s",
                           getattr(r, "doc_type", "?"), e)
    try:
        out.extend(_from_vendors(check_id, cmp_vendors or []))
    except Exception as e:
        logger.warning("collect: vendors failed: %s", e)
    try:
        out.extend(_from_redundancies(check_id, redundancy_report))
    except Exception as e:
        logger.warning("collect: redundancies failed: %s", e)
    logger.info("collect: check=%s total_findings=%d", check_id, len(out))
    return out
@@ -0,0 +1,190 @@
 """
 Unified-Findings sidecar store.
 A compliance check produces findings from 4 sources today:
  - Master-Controls (mc_results table — already persisted)
  - Pflichtangaben (L1/L2 doc checks, e.g. Impressum-Vollstaendigkeit)
  - Vendor scans (per cmp_vendor: missing privacy url, no opt-out, ...)
  - Redundancies (multi-vendor in same category)
 Previously the DSB had to look in 4 different blocks of the email to
 find everything. This store flattens all of them into ONE searchable
 table so the /audit/<check_id> frontend can show a unified list with
 source / severity / status / doc_type filters.
 Sidecar SQLite (same DB as compliance_audit_log) — no Postgres
 migration needed.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import sqlite3
 from pathlib import Path
 logger = logging.getLogger(__name__)
 DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
 def _ensure_table() -> None:
    Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
    with sqlite3.connect(DB_PATH) as conn:
        conn.executescript("""
            CREATE TABLE IF NOT EXISTS unified_findings (
                id              INTEGER PRIMARY KEY AUTOINCREMENT,
                check_id        TEXT NOT NULL,
                source_type     TEXT NOT NULL,     -- mc|pflichtangabe|vendor|redundanz
                doc_type        TEXT,              -- impressum|dse|cookie|... or '-' for vendor/redundanz
                severity        TEXT,              -- CRITICAL|HIGH|MEDIUM|LOW|INFO
                status          TEXT,              -- failed|passed|skipped|na|info
                regulation      TEXT,
                label           TEXT,
                hint            TEXT,
                action_recipe   TEXT,              -- JSON {what,why,fix_text,where,example}
                anchor_excerpt  TEXT,
                anchor_conf     REAL,
                vendor_name     TEXT,
                category        TEXT,
                payload         TEXT               -- JSON extras (matched_text, cookies count, ...)
            );
            CREATE INDEX IF NOT EXISTS idx_uf_check    ON unified_findings(check_id);
            CREATE INDEX IF NOT EXISTS idx_uf_source   ON unified_findings(check_id, source_type);
            CREATE INDEX IF NOT EXISTS idx_uf_status   ON unified_findings(check_id, status);
            CREATE INDEX IF NOT EXISTS idx_uf_severity ON unified_findings(check_id, severity);
        """)
 def record_findings(check_id: str, findings: list[dict]) -> int:
    """Bulk-insert all findings for a check. Idempotent on check_id."""
    if not check_id:
        return 0
    try:
        _ensure_table()
        with sqlite3.connect(DB_PATH) as conn:
            conn.execute(
                "DELETE FROM unified_findings WHERE check_id=?", (check_id,),
            )
            if not findings:
                conn.commit()
                return 0
            rows = [
                (
                    check_id,
                    (f.get("source_type") or "mc")[:24],
                    (f.get("doc_type") or "")[:32],
                    (f.get("severity") or "MEDIUM").upper()[:16],
                    (f.get("status") or "failed")[:16],
                    (f.get("regulation") or "")[:64],
                    (f.get("label") or "")[:400],
                    (f.get("hint") or "")[:1200],
                    json.dumps(f.get("action_recipe") or {}, ensure_ascii=False),
                    (f.get("anchor_excerpt") or "")[:800],
                    float(f.get("anchor_conf") or 0.0),
                    (f.get("vendor_name") or "")[:160],
                    (f.get("category") or "")[:64],
                    json.dumps(f.get("payload") or {}, ensure_ascii=False),
                )
                for f in findings
            ]
            conn.executemany(
                "INSERT INTO unified_findings "
                "(check_id, source_type, doc_type, severity, status, regulation, "
                " label, hint, action_recipe, anchor_excerpt, anchor_conf, "
                " vendor_name, category, payload) "
                "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                rows,
            )
            conn.commit()
        logger.info(
            "unified_findings: %s rows=%d sources=%s",
            check_id, len(rows),
            sorted(set(f.get("source_type", "mc") for f in findings)),
        )
        return len(rows)
    except Exception as e:
        logger.warning("record_findings failed for %s: %s", check_id, e)
        return 0
 def list_findings(
    check_id: str,
    source_type: str | None = None,
    severity: str | None = None,
    doc_type: str | None = None,
    status: str | None = None,
    q: str | None = None,
    limit: int = 1000,
 ) -> list[dict]:
    """Return filtered findings. q matches label OR vendor_name (case-insensitive)."""
    try:
        _ensure_table()
        where = ["check_id = ?"]
        params: list = [check_id]
        if source_type and source_type != "all":
            where.append("source_type = ?")
            params.append(source_type)
        if severity and severity != "all":
            where.append("severity = ?")
            params.append(severity.upper())
        if doc_type and doc_type != "all":
            where.append("doc_type = ?")
            params.append(doc_type)
        if status and status != "all":
            where.append("status = ?")
            params.append(status)
        if q:
            where.append("(LOWER(label) LIKE ? OR LOWER(vendor_name) LIKE ?)")
            needle = f"%{q.lower()}%"
            params.extend([needle, needle])
        sql = ("SELECT * FROM unified_findings WHERE " + " AND ".join(where) +
               " ORDER BY CASE severity "
               "  WHEN 'CRITICAL' THEN 0 WHEN 'HIGH' THEN 1 "
               "  WHEN 'MEDIUM' THEN 2 WHEN 'LOW' THEN 3 "
               "  ELSE 4 END, source_type, label LIMIT ?")
        params.append(int(limit))
        with sqlite3.connect(DB_PATH) as conn:
            conn.row_factory = sqlite3.Row
            rows = conn.execute(sql, params).fetchall()
            out = []
            for r in rows:
                d = dict(r)
                d["action_recipe"] = json.loads(d.get("action_recipe") or "{}")
                d["payload"] = json.loads(d.get("payload") or "{}")
                out.append(d)
            return out
    except Exception as e:
        logger.warning("list_findings failed: %s", e)
        return []
 def findings_summary(check_id: str) -> dict:
    """Return aggregate counts for the filter UI (source/severity/status)."""
    out = {
        "total": 0,
        "by_source": {},
        "by_severity": {},
        "by_status": {},
        "by_doc_type": {},
    }
    try:
        _ensure_table()
        with sqlite3.connect(DB_PATH) as conn:
            conn.row_factory = sqlite3.Row
            for col in ("source_type", "severity", "status", "doc_type"):
                rows = conn.execute(
                    f"SELECT {col} AS k, COUNT(*) AS n FROM unified_findings "
                    f"WHERE check_id=? GROUP BY {col}",
                    (check_id,),
                ).fetchall()
                bucket = f"by_{col if col != 'source_type' else 'source'}"
                if col == "doc_type":
                    bucket = "by_doc_type"
                out[bucket] = {r["k"] or "-": r["n"] for r in rows}
                out["total"] = max(out["total"], sum(r["n"] for r in rows))
        return out
    except Exception as e:
        logger.warning("findings_summary failed: %s", e)
        return out
@@ -50,9 +50,12 @@ from compliance.api.agent_recurring_routes import router as agent_recurring_rout
 from compliance.api.agent_compare_routes import router as agent_compare_router
 from compliance.api.agent_doc_check_routes import router as agent_doc_check_router
 from compliance.api.agent_compliance_check_routes import router as agent_compliance_check_router
 from compliance.api.agent_findings_routes import router as agent_findings_router
 from compliance.api.saving_scan_routes import router as saving_scan_router
 from compliance.api.agent_migration_routes import router as agent_migration_router
 from compliance.api.vendor_assessment_routes import router as vendor_assessment_router
 from compliance.api.cra_routes import router as cra_router
 from compliance.api.quaidal_routes import router as quaidal_router
 # Middleware
 from middleware import (
@@ -157,6 +160,8 @@ app.include_router(agent_recurring_router, prefix="/api")
 app.include_router(agent_compare_router, prefix="/api")
 app.include_router(agent_doc_check_router, prefix="/api")
 app.include_router(agent_compliance_check_router, prefix="/api")
 app.include_router(agent_findings_router, prefix="/api")
 app.include_router(saving_scan_router, prefix="/api")
 app.include_router(agent_migration_router, prefix="/api")
 # Vendor Contract Assessment
@@ -164,6 +169,7 @@ app.include_router(vendor_assessment_router, prefix="/api")
 # CRA (Cyber Resilience Act) Compliance
 app.include_router(cra_router, prefix="/api")
 app.include_router(quaidal_router, prefix="/api")
 if __name__ == "__main__":
@@ -0,0 +1,116 @@
 """
 Tests for the saving-scan funnel endpoint.
 Focus: input validation + lead persistence + rate-limit error path.
 The actual compliance check is mocked — we only verify the route layer.
 """
 import os
 import sys
 from unittest.mock import patch
 import pytest
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 # Use a temp SQLite for the sidecar
 os.environ["COMPLIANCE_AUDIT_DB"] = "/tmp/test_saving_scan.db"
 if os.path.exists("/tmp/test_saving_scan.db"):
    os.remove("/tmp/test_saving_scan.db")
 from compliance.api.saving_scan_routes import router  # noqa: E402
 app = FastAPI()
 app.include_router(router, prefix="/api")
 client = TestClient(app)
 class TestStartSavingScanValidation:
    def test_missing_email_returns_422(self):
        resp = client.post("/api/compliance/agent/saving-scan/start",
                           json={"url": "https://example.de"})
        assert resp.status_code == 422
    def test_invalid_email_returns_400(self):
        with patch("compliance.api.saving_scan_routes.asyncio.create_task"):
            resp = client.post(
                "/api/compliance/agent/saving-scan/start",
                json={"url": "https://example.de", "email": "kein-email",
                      "consent": True},
            )
            assert resp.status_code == 400
            assert "E-Mail" in resp.json()["detail"]
    def test_invalid_url_returns_400(self):
        with patch("compliance.api.saving_scan_routes.asyncio.create_task"):
            resp = client.post(
                "/api/compliance/agent/saving-scan/start",
                json={"url": "ftp://wrong.de", "email": "u@x.de",
                      "consent": True},
            )
            assert resp.status_code == 400
    def test_consent_required(self):
        with patch("compliance.api.saving_scan_routes.asyncio.create_task"):
            resp = client.post(
                "/api/compliance/agent/saving-scan/start",
                json={"url": "https://example.de", "email": "u@x.de",
                      "consent": False},
            )
            assert resp.status_code == 400
            assert "Consent" in resp.json()["detail"]
 def _patch_check_runner():
    """Stub the lazy-imported worker — avoids loading smtp_sender (Py3.10+)."""
    import sys, types
    fake = types.ModuleType("compliance.api.agent_compliance_check_routes")
    class _DocInput:
        def __init__(self, doc_type="other", url=""): self.doc_type, self.url = doc_type, url
    class _Req:
        def __init__(self, **kw): self.__dict__.update(kw)
    async def _runner(*_a, **_kw): pass
    fake.DocumentInput = _DocInput
    fake.ComplianceCheckRequest = _Req
    fake._run_compliance_check = _runner
    fake._compliance_check_jobs = {}
    sys.modules["compliance.api.agent_compliance_check_routes"] = fake
 class TestStartSavingScanSuccess:
    def test_valid_request_starts_check(self):
        _patch_check_runner()
        resp = client.post(
            "/api/compliance/agent/saving-scan/start",
            json={"url": "https://example-newdomain.de",
                  "email": "user@example.de", "consent": True},
        )
        assert resp.status_code == 200, resp.text
        data = resp.json()
        assert "check_id" in data
        assert data["status"] == "running"
        assert "example-newdomain.de" in data["message"]
 class TestLeadCount:
    def test_lead_count_after_submit(self):
        _patch_check_runner()
        client.post(
            "/api/compliance/agent/saving-scan/start",
            json={"url": "https://abc-leadtest.de",
                  "email": "lead@x.de", "consent": True},
        )
        resp = client.get("/api/compliance/agent/saving-scan/lead-count")
        assert resp.status_code == 200
        data = resp.json()
        assert data["total_leads"] >= 1
        assert "abc-leadtest.de" in str(data["top_domains"])