refactor: split cookie_screenshot_ocr.py (642 → 290 + 353 LOC)

CI hard-cap 500 LOC. cookie_screenshot_ocr.py war auf 642 gewachsen, also gesplittet: - cookie_screenshot_ocr_engines.py (353 LOC, NEU) OCR-Engine-Funktionen: _slice_screenshot, Vision-LLM (qwen2.5vl), PaddleOCR, Tesseract, parse_ocr_cookie_table, parse_vision_response, Konstanten VISION_MODEL/OLLAMA_URL/VISION_PROMPT. - cookie_screenshot_ocr.py (290 LOC, REWRITE) Orchestration: capture_cookie_evidence_slices, _ocr_one_slice, ocr_slices_extract_cookies, capture_cookie_screenshot, extract_cookies_via_vision, cookies_to_vendor_records. Re-Exports der Engine-Funktionen für Backward-Kompat. Einziger externer Importer (_phase_d1_vendors_raw.py) braucht keinen Code-Change — Public-API stabil. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-06 23:35:33 +02:00
parent ff796fb480
commit 02879a2c3a
9 changed files with 1790 additions and 384 deletions
@@ -0,0 +1,232 @@
+'use client'
+
+/**
+ * Strukturierter Editor fuer JSONB-Conditions:
+ *   { kind: 'all'|'any', clauses: [{field, op, value}] }
+ *
+ * Wird im RuleEditor verwendet. Reine Praesentations-Komponente — Parent
+ * verwaltet State.
+ */
+
+import type {
+  ClauseOperator, RuleClause, RuleCondition,
+} from '../_types'
+import { OPERATOR_LABELS, PROFILE_FIELDS } from '../_types'
+
+interface Props {
+  value: RuleCondition
+  onChange: (next: RuleCondition) => void
+  readOnly?: boolean
+}
+
+export default function ConditionBuilder({ value, onChange, readOnly }: Props) {
+  const setKind = (kind: 'all' | 'any') => onChange({ ...value, kind })
+  const setClause = (idx: number, clause: RuleClause) => {
+    const next = [...value.clauses]
+    next[idx] = clause
+    onChange({ ...value, clauses: next })
+  }
+  const addClause = () =>
+    onChange({
+      ...value,
+      clauses: [
+        ...value.clauses,
+        { field: PROFILE_FIELDS[0].key, op: 'eq', value: '' },
+      ],
+    })
+  const removeClause = (idx: number) =>
+    onChange({ ...value, clauses: value.clauses.filter((_, i) => i !== idx) })
+
+  return (
+    <div className="space-y-2">
+      <div className="flex items-center gap-2">
+        <span className="text-xs text-gray-600">Bedingung:</span>
+        <select
+          className="text-xs px-2 py-1 border border-gray-300 rounded"
+          value={value.kind}
+          disabled={readOnly}
+          onChange={(e) => setKind(e.target.value as 'all' | 'any')}
+        >
+          <option value="all">ALLE Klauseln müssen zutreffen (AND)</option>
+          <option value="any">MIND. EINE Klausel trifft zu (OR)</option>
+        </select>
+      </div>
+
+      {value.clauses.length === 0 && (
+        <div className="text-xs text-gray-500 italic px-1">
+          Keine Klauseln — Regel gilt für jedes Profil.
+        </div>
+      )}
+
+      <ul className="space-y-1">
+        {value.clauses.map((clause, idx) => (
+          <li key={idx} className="flex items-start gap-1 p-1.5 bg-gray-50 rounded border border-gray-200">
+            <ClauseRow
+              clause={clause}
+              onChange={(c) => setClause(idx, c)}
+              readOnly={!!readOnly}
+            />
+            {!readOnly && (
+              <button
+                className="text-xs px-1.5 py-0.5 text-rose-700 hover:bg-rose-50 rounded"
+                onClick={() => removeClause(idx)}
+                title="Klausel entfernen"
+              >
+                ×
+              </button>
+            )}
+          </li>
+        ))}
+      </ul>
+
+      {!readOnly && (
+        <button
+          className="text-xs px-2 py-1 border border-gray-300 rounded text-gray-700 hover:bg-gray-50"
+          onClick={addClause}
+        >
+          + Klausel hinzufügen
+        </button>
+      )}
+    </div>
+  )
+}
+
+function ClauseRow({
+  clause, onChange, readOnly,
+}: {
+  clause: RuleClause
+  onChange: (c: RuleClause) => void
+  readOnly: boolean
+}) {
+  const field = PROFILE_FIELDS.find((f) => f.key === clause.field) || PROFILE_FIELDS[0]
+  const operators: ClauseOperator[] =
+    field.type === 'enum'
+      ? ['eq', 'neq', 'in', 'not_in', 'exists', 'truthy', 'falsy']
+      : field.type === 'boolean'
+      ? ['truthy', 'falsy', 'eq', 'neq']
+      : field.type === 'number'
+      ? ['eq', 'neq', 'gt', 'gte', 'lt', 'lte']
+      : ['eq', 'neq', 'in', 'not_in', 'exists']
+
+  const requiresValue = !['exists', 'truthy', 'falsy'].includes(clause.op)
+  const multiValue = clause.op === 'in' || clause.op === 'not_in'
+
+  return (
+    <div className="flex-1 grid grid-cols-12 gap-1 items-center text-xs">
+      <select
+        className="col-span-4 px-1 py-0.5 border border-gray-300 rounded bg-white truncate"
+        value={clause.field}
+        disabled={readOnly}
+        onChange={(e) => onChange({ ...clause, field: e.target.value })}
+      >
+        {PROFILE_FIELDS.map((f) => (
+          <option key={f.key} value={f.key}>{f.label} ({f.key})</option>
+        ))}
+      </select>
+
+      <select
+        className="col-span-3 px-1 py-0.5 border border-gray-300 rounded bg-white"
+        value={clause.op}
+        disabled={readOnly}
+        onChange={(e) => onChange({ ...clause, op: e.target.value as ClauseOperator })}
+      >
+        {operators.map((op) => (
+          <option key={op} value={op}>{OPERATOR_LABELS[op]}</option>
+        ))}
+      </select>
+
+      <div className="col-span-5">
+        {requiresValue && (
+          <ValueInput
+            field={field}
+            multi={multiValue}
+            value={clause.value}
+            onChange={(v) => onChange({ ...clause, value: v })}
+            readOnly={readOnly}
+          />
+        )}
+      </div>
+    </div>
+  )
+}
+
+function ValueInput({
+  field, multi, value, onChange, readOnly,
+}: {
+  field: typeof PROFILE_FIELDS[number]
+  multi: boolean
+  value: unknown
+  onChange: (v: unknown) => void
+  readOnly: boolean
+}) {
+  if (field.type === 'enum' && field.options) {
+    if (multi) {
+      const selected = Array.isArray(value) ? (value as string[]) : []
+      return (
+        <select
+          multiple
+          className="w-full px-1 py-0.5 border border-gray-300 rounded bg-white h-16"
+          value={selected}
+          disabled={readOnly}
+          onChange={(e) => {
+            const opts = Array.from(e.target.selectedOptions, (o) => o.value)
+            onChange(opts)
+          }}
+        >
+          {field.options.map((o) => (
+            <option key={o.value} value={o.value}>{o.label}</option>
+          ))}
+        </select>
+      )
+    }
+    return (
+      <select
+        className="w-full px-1 py-0.5 border border-gray-300 rounded bg-white"
+        value={typeof value === 'string' ? value : ''}
+        disabled={readOnly}
+        onChange={(e) => onChange(e.target.value)}
+      >
+        <option value="">— wählen —</option>
+        {field.options.map((o) => (
+          <option key={o.value} value={o.value}>{o.label}</option>
+        ))}
+      </select>
+    )
+  }
+
+  if (field.type === 'number') {
+    return (
+      <input
+        type="number"
+        className="w-full px-1 py-0.5 border border-gray-300 rounded"
+        value={typeof value === 'number' ? value : 0}
+        disabled={readOnly}
+        onChange={(e) => onChange(Number(e.target.value))}
+      />
+    )
+  }
+
+  if (field.type === 'boolean') {
+    return (
+      <select
+        className="w-full px-1 py-0.5 border border-gray-300 rounded bg-white"
+        value={value ? 'true' : 'false'}
+        disabled={readOnly}
+        onChange={(e) => onChange(e.target.value === 'true')}
+      >
+        <option value="true">true</option>
+        <option value="false">false</option>
+      </select>
+    )
+  }
+
+  return (
+    <input
+      type="text"
+      className="w-full px-1 py-0.5 border border-gray-300 rounded"
+      value={typeof value === 'string' ? value : ''}
+      disabled={readOnly}
+      onChange={(e) => onChange(e.target.value)}
+    />
+  )
+}
@@ -0,0 +1,414 @@
+'use client'
+
+/**
+ * Rechte Spalte: Detail-Editor fuer die ausgewaehlte Regel.
+ *
+ * - Zeigt Live-Version + offenen Draft (falls vorhanden)
+ * - Erlaubt Draft-Edit (classification, conditions, source_citation, rationale)
+ * - Buttons: "Neuen Draft starten" (kopiert von Live), "Einreichen" (mit Pflicht
+ *   change_summary-Modal), "Intern freigeben" (DSB), "Publish" (= Mandanten-Freigabe)
+ * - Versionshistorie + Approval-Trail unten als Akkordeon
+ */
+
+import { useEffect, useMemo, useState } from 'react'
+import type {
+  ApprovalHistoryEntry, Classification, Rule, RuleCondition, RuleVersion,
+} from '../_types'
+import { CLASSIFICATION_LABELS, STATUS_LABELS } from '../_types'
+import ConditionBuilder from './ConditionBuilder'
+
+interface Props {
+  rule: Rule
+  versions: RuleVersion[]
+  history: ApprovalHistoryEntry[]
+  onCreateDraft: (payload: {
+    classification: Classification
+    conditions: RuleCondition
+    source_citation: string
+    rationale?: string | null
+  }) => Promise<void>
+  onUpdateDraft: (versionId: string, patch: {
+    classification?: Classification
+    conditions?: RuleCondition
+    source_citation?: string
+    rationale?: string | null
+  }) => Promise<void>
+  onSubmitForReview: (versionId: string, changeSummary: string) => Promise<void>
+  onApprove: (versionId: string) => Promise<void>
+  onPublish: (versionId: string) => Promise<void>
+  onReject: (versionId: string, reason: string) => Promise<void>
+}
+
+export default function RuleEditor({
+  rule, versions, history,
+  onCreateDraft, onUpdateDraft,
+  onSubmitForReview, onApprove, onPublish, onReject,
+}: Props) {
+  const liveVersion = useMemo(
+    () => versions.find((v) => v.is_live) || null,
+    [versions],
+  )
+  const draftVersion = useMemo(
+    () => versions.find((v) => ['draft', 'review'].includes(v.status)) || null,
+    [versions],
+  )
+
+  // Edit-State
+  const [classification, setClassification] = useState<Classification>('required')
+  const [conditions, setConditions] = useState<RuleCondition>({ kind: 'all', clauses: [] })
+  const [sourceCitation, setSourceCitation] = useState('')
+  const [rationale, setRationale] = useState('')
+
+  // Modal-State
+  const [showSubmit, setShowSubmit] = useState(false)
+  const [changeSummary, setChangeSummary] = useState('')
+  const [showHistory, setShowHistory] = useState(false)
+  const [rejectReason, setRejectReason] = useState('')
+  const [showReject, setShowReject] = useState(false)
+
+  // Sync Edit-State mit ausgewaehltem Version (Draft hat Vorrang)
+  const sourceVersion = draftVersion || liveVersion
+  useEffect(() => {
+    if (sourceVersion) {
+      setClassification(sourceVersion.classification)
+      setConditions(sourceVersion.conditions)
+      setSourceCitation(sourceVersion.source_citation)
+      setRationale(sourceVersion.rationale || '')
+    }
+  }, [sourceVersion?.id])
+
+  const isDraftMode = !!draftVersion && draftVersion.status === 'draft'
+  const isReviewMode = !!draftVersion && draftVersion.status === 'review'
+  const readOnly = !isDraftMode
+
+  const handleCreateDraft = () => {
+    onCreateDraft({
+      classification: liveVersion?.classification || 'recommended',
+      conditions: liveVersion?.conditions || { kind: 'all', clauses: [] },
+      source_citation: liveVersion?.source_citation || '',
+      rationale: liveVersion?.rationale,
+    })
+  }
+
+  const handleSaveDraft = () => {
+    if (!draftVersion) return
+    onUpdateDraft(draftVersion.id, {
+      classification, conditions, source_citation: sourceCitation, rationale,
+    })
+  }
+
+  const handleSubmit = () => {
+    if (!draftVersion || !changeSummary.trim()) return
+    onSubmitForReview(draftVersion.id, changeSummary.trim())
+    setShowSubmit(false)
+    setChangeSummary('')
+  }
+
+  return (
+    <div className="h-full flex flex-col overflow-hidden bg-white">
+      <header className="px-5 py-3 border-b border-gray-200">
+        <div className="flex items-baseline justify-between gap-3">
+          <div className="min-w-0">
+            <h2 className="text-base font-semibold text-gray-800 truncate">{rule.title}</h2>
+            <div className="text-xs text-gray-500">
+              <code>{rule.document_type}</code> · {rule.rule_key}
+            </div>
+          </div>
+          <div className="flex items-center gap-2 text-xs text-gray-600">
+            {liveVersion && (
+              <span>
+                Live: v{liveVersion.version_number} (
+                <code>{liveVersion.classification}</code>)
+              </span>
+            )}
+            {draftVersion && (
+              <span className="px-1.5 py-0.5 bg-amber-100 text-amber-800 rounded border border-amber-300">
+                Draft v{draftVersion.version_number} · {STATUS_LABELS[draftVersion.status]}
+              </span>
+            )}
+          </div>
+        </div>
+      </header>
+
+      <div className="flex-1 overflow-y-auto p-5 space-y-4">
+        {!draftVersion && (
+          <div className="bg-amber-50 border border-amber-200 rounded p-3 flex items-center justify-between">
+            <span className="text-sm text-amber-800">
+              Kein offener Draft. Starte einen neuen Draft, um die Regel zu ändern.
+            </span>
+            <button
+              className="px-3 py-1.5 text-sm bg-amber-600 text-white rounded hover:bg-amber-700"
+              onClick={handleCreateDraft}
+            >
+              + Neuen Draft starten
+            </button>
+          </div>
+        )}
+
+        {/* Klassifikation */}
+        <section>
+          <label className="text-xs font-medium text-gray-700 block mb-1">
+            Klassifikation
+          </label>
+          <select
+            className="text-sm px-2 py-1 border border-gray-300 rounded"
+            value={classification}
+            disabled={readOnly}
+            onChange={(e) => setClassification(e.target.value as Classification)}
+          >
+            {(['required', 'recommended', 'optional'] as const).map((c) => (
+              <option key={c} value={c}>{CLASSIFICATION_LABELS[c]}</option>
+            ))}
+          </select>
+        </section>
+
+        {/* Bedingung */}
+        <section>
+          <label className="text-xs font-medium text-gray-700 block mb-1">
+            Bedingung
+          </label>
+          <ConditionBuilder
+            value={conditions}
+            onChange={setConditions}
+            readOnly={readOnly}
+          />
+        </section>
+
+        {/* Source Citation (Pflicht) */}
+        <section>
+          <label className="text-xs font-medium text-gray-700 block mb-1">
+            Quelle / Norm-Citation <span className="text-rose-600">*</span>
+          </label>
+          <input
+            type="text"
+            className="w-full text-sm px-2 py-1.5 border border-gray-300 rounded"
+            placeholder="z.B. § 12 HinSchG, Art. 28 DSGVO, EuGH C-311/18"
+            value={sourceCitation}
+            disabled={readOnly}
+            onChange={(e) => setSourceCitation(e.target.value)}
+          />
+        </section>
+
+        {/* Rationale */}
+        <section>
+          <label className="text-xs font-medium text-gray-700 block mb-1">
+            Begründung / Rationale (optional)
+          </label>
+          <textarea
+            className="w-full text-sm px-2 py-1.5 border border-gray-300 rounded"
+            rows={3}
+            placeholder="Anwalts-Kommentar, warum die Regel so klassifiziert ist…"
+            value={rationale}
+            disabled={readOnly}
+            onChange={(e) => setRationale(e.target.value)}
+          />
+        </section>
+
+        {/* Versionshistorie */}
+        <section>
+          <button
+            className="text-xs text-gray-600 hover:text-gray-800"
+            onClick={() => setShowHistory((v) => !v)}
+          >
+            {showHistory ? '▾' : '▸'} Versionshistorie + Approval-Trail ({versions.length} Versionen)
+          </button>
+          {showHistory && (
+            <HistoryList versions={versions} history={history} />
+          )}
+        </section>
+      </div>
+
+      {/* Footer-Aktionen */}
+      <footer className="px-5 py-3 border-t border-gray-200 bg-gray-50 flex items-center gap-2 flex-wrap">
+        {isDraftMode && (
+          <>
+            <button
+              className="px-3 py-1.5 text-sm border border-gray-300 rounded text-gray-700 hover:bg-white"
+              onClick={handleSaveDraft}
+            >
+              Draft speichern
+            </button>
+            <button
+              className="px-3 py-1.5 text-sm bg-amber-600 text-white rounded hover:bg-amber-700 disabled:opacity-50"
+              disabled={!sourceCitation.trim()}
+              onClick={() => setShowSubmit(true)}
+              title={!sourceCitation.trim() ? 'Source Citation ist Pflicht' : ''}
+            >
+              Zur internen Prüfung einreichen
+            </button>
+          </>
+        )}
+        {isReviewMode && (
+          <>
+            <button
+              className="px-3 py-1.5 text-sm bg-emerald-600 text-white rounded hover:bg-emerald-700"
+              onClick={() => draftVersion && onApprove(draftVersion.id)}
+            >
+              Intern freigeben → Mandant
+            </button>
+            <button
+              className="px-3 py-1.5 text-sm bg-blue-600 text-white rounded hover:bg-blue-700"
+              onClick={() => draftVersion && onPublish(draftVersion.id)}
+              title="Wird sofort live (Test-Modus)"
+            >
+              Publish (sofort live)
+            </button>
+            <button
+              className="px-3 py-1.5 text-sm border border-rose-300 text-rose-700 rounded hover:bg-rose-50"
+              onClick={() => setShowReject(true)}
+            >
+              Ablehnen
+            </button>
+          </>
+        )}
+      </footer>
+
+      {showSubmit && (
+        <SubmitDialog
+          value={changeSummary}
+          onChange={setChangeSummary}
+          onCancel={() => setShowSubmit(false)}
+          onSubmit={handleSubmit}
+        />
+      )}
+
+      {showReject && (
+        <RejectDialog
+          value={rejectReason}
+          onChange={setRejectReason}
+          onCancel={() => { setShowReject(false); setRejectReason('') }}
+          onSubmit={() => {
+            if (!draftVersion || !rejectReason.trim()) return
+            onReject(draftVersion.id, rejectReason.trim())
+            setShowReject(false); setRejectReason('')
+          }}
+        />
+      )}
+    </div>
+  )
+}
+
+function HistoryList({ versions, history }: { versions: RuleVersion[]; history: ApprovalHistoryEntry[] }) {
+  return (
+    <div className="mt-2 space-y-2 text-xs">
+      <div>
+        <div className="font-medium text-gray-700 mb-1">Versionen:</div>
+        <ul className="space-y-1">
+          {versions.map((v) => (
+            <li key={v.id} className="bg-white border border-gray-200 rounded p-2">
+              <div className="flex items-center gap-2">
+                <span className="font-medium">v{v.version_number}</span>
+                <span className="px-1.5 py-0.5 bg-gray-100 rounded">{STATUS_LABELS[v.status]}</span>
+                {v.is_live && <span className="text-emerald-700">● Live</span>}
+                <span className="text-gray-500 ml-auto">
+                  {new Date(v.created_at).toLocaleString('de-DE')}
+                </span>
+              </div>
+              {v.change_summary && (
+                <div className="mt-1 text-gray-600">Änderung: {v.change_summary}</div>
+              )}
+              {v.source_citation && (
+                <div className="mt-0.5 text-gray-500">Quelle: {v.source_citation}</div>
+              )}
+            </li>
+          ))}
+        </ul>
+      </div>
+      <div>
+        <div className="font-medium text-gray-700 mb-1">Approval-Trail:</div>
+        <ul className="space-y-0.5">
+          {history.map((h) => (
+            <li key={h.id} className="text-gray-600">
+              {new Date(h.created_at).toLocaleString('de-DE')} · {h.action}
+              {h.approver && ` · ${h.approver}`}
+              {h.comment && ` — ${h.comment}`}
+            </li>
+          ))}
+        </ul>
+      </div>
+    </div>
+  )
+}
+
+function SubmitDialog({
+  value, onChange, onCancel, onSubmit,
+}: {
+  value: string
+  onChange: (s: string) => void
+  onCancel: () => void
+  onSubmit: () => void
+}) {
+  return (
+    <div className="fixed inset-0 bg-black/30 z-50 flex items-center justify-center" onClick={onCancel}>
+      <div className="bg-white rounded-lg shadow-xl w-[520px]" onClick={(e) => e.stopPropagation()}>
+        <header className="px-5 py-3 border-b border-gray-200">
+          <h3 className="font-semibold">Zur internen Prüfung einreichen</h3>
+        </header>
+        <div className="p-5">
+          <label className="text-xs font-medium text-gray-700">
+            Was wurde geändert? <span className="text-rose-600">*</span>
+          </label>
+          <textarea
+            autoFocus
+            rows={4}
+            className="w-full mt-1 text-sm px-2 py-1.5 border border-gray-300 rounded"
+            placeholder="z.B. Schwelle auf 50 MA angehoben (BAG-Urteil X)"
+            value={value}
+            onChange={(e) => onChange(e.target.value)}
+          />
+        </div>
+        <footer className="px-5 py-3 border-t border-gray-200 flex justify-end gap-2">
+          <button className="px-3 py-1.5 text-sm text-gray-600" onClick={onCancel}>Abbrechen</button>
+          <button
+            className="px-4 py-1.5 text-sm bg-amber-600 text-white rounded disabled:opacity-50"
+            disabled={!value.trim()}
+            onClick={onSubmit}
+          >
+            Einreichen
+          </button>
+        </footer>
+      </div>
+    </div>
+  )
+}
+
+function RejectDialog({
+  value, onChange, onCancel, onSubmit,
+}: {
+  value: string
+  onChange: (s: string) => void
+  onCancel: () => void
+  onSubmit: () => void
+}) {
+  return (
+    <div className="fixed inset-0 bg-black/30 z-50 flex items-center justify-center" onClick={onCancel}>
+      <div className="bg-white rounded-lg shadow-xl w-[480px]" onClick={(e) => e.stopPropagation()}>
+        <header className="px-5 py-3 border-b border-gray-200">
+          <h3 className="font-semibold">Draft ablehnen</h3>
+        </header>
+        <div className="p-5">
+          <label className="text-xs font-medium text-gray-700">
+            Ablehnungsgrund <span className="text-rose-600">*</span>
+          </label>
+          <textarea
+            autoFocus
+            rows={3}
+            className="w-full mt-1 text-sm px-2 py-1.5 border border-gray-300 rounded"
+            value={value}
+            onChange={(e) => onChange(e.target.value)}
+          />
+        </div>
+        <footer className="px-5 py-3 border-t border-gray-200 flex justify-end gap-2">
+          <button className="px-3 py-1.5 text-sm text-gray-600" onClick={onCancel}>Abbrechen</button>
+          <button
+            className="px-4 py-1.5 text-sm bg-rose-600 text-white rounded disabled:opacity-50"
+            disabled={!value.trim()}
+            onClick={onSubmit}
+          >
+            Ablehnen
+          </button>
+        </footer>
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,111 @@
+'use client'
+
+/**
+ * Linke Spalte: Liste der globalen Empfehlungs-Regeln.
+ *
+ * Filterbar nach document_type. Klassifikations-Chip + Live-Indikator.
+ */
+
+import { useMemo, useState } from 'react'
+import type { Rule, RuleVersion } from '../_types'
+import { CLASSIFICATION_LABELS, STATUS_LABELS } from '../_types'
+
+interface Props {
+  rules: Rule[]
+  versionsByRule: Record<string, RuleVersion | undefined>
+  selectedRuleId: string | null
+  onSelectRule: (ruleId: string) => void
+}
+
+export default function RuleList({
+  rules, versionsByRule, selectedRuleId, onSelectRule,
+}: Props) {
+  const [filter, setFilter] = useState('')
+  const filtered = useMemo(() => {
+    if (!filter.trim()) return rules
+    const q = filter.toLowerCase()
+    return rules.filter(
+      (r) =>
+        r.title.toLowerCase().includes(q) ||
+        r.rule_key.toLowerCase().includes(q) ||
+        r.document_type.toLowerCase().includes(q),
+    )
+  }, [rules, filter])
+
+  return (
+    <div className="h-full flex flex-col overflow-hidden border-r border-gray-200 bg-gray-50">
+      <div className="p-3 border-b border-gray-200 bg-white">
+        <input
+          type="text"
+          placeholder="Suchen (Titel, Key, Doc-Type)…"
+          value={filter}
+          onChange={(e) => setFilter(e.target.value)}
+          className="w-full text-sm px-2 py-1.5 border border-gray-300 rounded"
+        />
+        <div className="text-xs text-gray-500 mt-1">
+          {filtered.length} von {rules.length} Regeln
+        </div>
+      </div>
+
+      <ul className="flex-1 overflow-y-auto">
+        {filtered.map((rule) => {
+          const live = versionsByRule[rule.id]
+          const isSelected = rule.id === selectedRuleId
+          return (
+            <li key={rule.id}>
+              <button
+                className={`w-full text-left px-3 py-2 border-b border-gray-100 hover:bg-white ${
+                  isSelected ? 'bg-white border-l-4 border-l-amber-500' : ''
+                }`}
+                onClick={() => onSelectRule(rule.id)}
+              >
+                <div className="flex items-center gap-2 mb-0.5">
+                  {live && (
+                    <ClassificationChip classification={live.classification} />
+                  )}
+                  {!live && (
+                    <span className="px-1.5 py-0.5 text-xs rounded bg-gray-200 text-gray-600">
+                      ohne Live-Version
+                    </span>
+                  )}
+                </div>
+                <div className="text-sm font-medium text-gray-800 truncate">
+                  {rule.title}
+                </div>
+                <div className="text-xs text-gray-500 truncate">
+                  <code>{rule.document_type}</code> · {rule.rule_key}
+                </div>
+                {live && (
+                  <div className="text-[10px] text-gray-500 mt-0.5">
+                    v{live.version_number} · {STATUS_LABELS[live.status]}
+                    {live.is_live && (
+                      <span className="ml-1 inline-block w-1.5 h-1.5 bg-emerald-500 rounded-full" />
+                    )}
+                  </div>
+                )}
+              </button>
+            </li>
+          )
+        })}
+        {filtered.length === 0 && (
+          <li className="px-3 py-4 text-sm text-gray-500 italic">
+            Keine Regeln gefunden.
+          </li>
+        )}
+      </ul>
+    </div>
+  )
+}
+
+function ClassificationChip({ classification }: { classification: 'required' | 'recommended' | 'optional' }) {
+  const colorMap = {
+    required: 'bg-rose-100 text-rose-800 border-rose-300',
+    recommended: 'bg-amber-100 text-amber-800 border-amber-300',
+    optional: 'bg-slate-100 text-slate-700 border-slate-300',
+  } as const
+  return (
+    <span className={`px-1.5 py-0.5 text-[10px] font-medium rounded border ${colorMap[classification]}`}>
+      {CLASSIFICATION_LABELS[classification]}
+    </span>
+  )
+}
@@ -0,0 +1,183 @@
+/**
+ * Hook fuer Template-Rule-Editor: laedt Regeln/Versions/History und exponiert
+ * Lifecycle-Actions (submit/approve/publish/reject) + Tenant-Override-CRUD.
+ *
+ * Alle API-Calls gehen ueber /api/sdk/v1/compliance/* (Next.js-Proxy zum
+ * backend-compliance).
+ */
+
+import { useCallback } from 'react'
+import type {
+  ApprovalHistoryEntry,
+  Classification,
+  Rule,
+  RuleCondition,
+  RuleVersion,
+  TenantRuleOverride,
+} from '../_types'
+
+const API_BASE = '/api/sdk/v1/compliance'
+
+async function req<T>(url: string, init?: RequestInit): Promise<T> {
+  const res = await fetch(url, {
+    ...init,
+    headers: {
+      'Content-Type': 'application/json',
+      ...(init?.headers || {}),
+    },
+  })
+  if (!res.ok) {
+    const text = await res.text().catch(() => res.statusText)
+    throw new Error(`${res.status}: ${text}`)
+  }
+  if (res.status === 204) return undefined as T
+  return res.json() as Promise<T>
+}
+
+export function useRuleEditorActions() {
+  const listRules = useCallback(
+    (documentType?: string) => {
+      const q = documentType ? `?document_type=${encodeURIComponent(documentType)}` : ''
+      return req<Rule[]>(`${API_BASE}/template-rules${q}`)
+    },
+    [],
+  )
+
+  const getRule = useCallback(
+    (ruleId: string) => req<Rule>(`${API_BASE}/template-rules/${ruleId}`),
+    [],
+  )
+
+  const listVersions = useCallback(
+    (ruleId: string) => req<RuleVersion[]>(`${API_BASE}/template-rules/${ruleId}/versions`),
+    [],
+  )
+
+  const getVersion = useCallback(
+    (versionId: string) => req<RuleVersion>(`${API_BASE}/template-rule-versions/${versionId}`),
+    [],
+  )
+
+  const createDraftVersion = useCallback(
+    (
+      ruleId: string,
+      payload: {
+        classification: Classification
+        conditions: RuleCondition
+        source_citation: string
+        rationale?: string | null
+        created_by?: string | null
+      },
+    ) =>
+      req<RuleVersion>(`${API_BASE}/template-rules/${ruleId}/versions`, {
+        method: 'POST',
+        body: JSON.stringify({
+          rule_id: ruleId,
+          ...payload,
+        }),
+      }),
+    [],
+  )
+
+  const updateDraftVersion = useCallback(
+    (
+      versionId: string,
+      patch: {
+        classification?: Classification
+        conditions?: RuleCondition
+        source_citation?: string
+        rationale?: string | null
+        change_summary?: string | null
+      },
+    ) =>
+      req<RuleVersion>(`${API_BASE}/template-rule-versions/${versionId}`, {
+        method: 'PATCH',
+        body: JSON.stringify(patch),
+      }),
+    [],
+  )
+
+  const submitForReview = useCallback(
+    (
+      versionId: string,
+      payload: { change_summary: string; submitter?: string; comment?: string },
+    ) =>
+      req<RuleVersion>(`${API_BASE}/template-rule-versions/${versionId}/submit-review`, {
+        method: 'POST',
+        body: JSON.stringify(payload),
+      }),
+    [],
+  )
+
+  const approveVersion = useCallback(
+    (versionId: string, payload: { approver?: string; comment?: string } = {}) =>
+      req<RuleVersion>(`${API_BASE}/template-rule-versions/${versionId}/approve`, {
+        method: 'POST',
+        body: JSON.stringify(payload),
+      }),
+    [],
+  )
+
+  const publishVersion = useCallback(
+    (versionId: string, payload: { approver?: string; comment?: string } = {}) =>
+      req<RuleVersion>(`${API_BASE}/template-rule-versions/${versionId}/publish`, {
+        method: 'POST',
+        body: JSON.stringify(payload),
+      }),
+    [],
+  )
+
+  const rejectVersion = useCallback(
+    (
+      versionId: string,
+      payload: { rejection_reason: string; rejector?: string; comment?: string },
+    ) =>
+      req<RuleVersion>(`${API_BASE}/template-rule-versions/${versionId}/reject`, {
+        method: 'POST',
+        body: JSON.stringify(payload),
+      }),
+    [],
+  )
+
+  const getApprovalHistory = useCallback(
+    (versionId: string) =>
+      req<ApprovalHistoryEntry[]>(
+        `${API_BASE}/template-rule-versions/${versionId}/approval-history`,
+      ),
+    [],
+  )
+
+  const listOverrides = useCallback(
+    () => req<TenantRuleOverride[]>(`${API_BASE}/tenant-rule-overrides`),
+    [],
+  )
+
+  const upsertOverride = useCallback(
+    (payload: {
+      rule_id: string
+      override_classification: Classification | null
+      reason: string
+      created_by?: string
+    }) =>
+      req<TenantRuleOverride>(`${API_BASE}/tenant-rule-overrides`, {
+        method: 'POST',
+        body: JSON.stringify(payload),
+      }),
+    [],
+  )
+
+  const deleteOverride = useCallback(
+    (overrideId: string) =>
+      req<void>(`${API_BASE}/tenant-rule-overrides/${overrideId}`, { method: 'DELETE' }),
+    [],
+  )
+
+  return {
+    listRules, getRule,
+    listVersions, getVersion,
+    createDraftVersion, updateDraftVersion,
+    submitForReview, approveVersion, publishVersion, rejectVersion,
+    getApprovalHistory,
+    listOverrides, upsertOverride, deleteOverride,
+  }
+}
@@ -0,0 +1,246 @@
+/**
+ * Types fuer den Template-Rule-Editor (SDK).
+ *
+ * Spiegeln die Pydantic-Modelle aus
+ * backend-compliance/compliance/schemas/template_rule.py.
+ */
+
+export type Classification = 'required' | 'recommended' | 'optional'
+
+export type RuleStatus =
+  | 'draft' | 'review' | 'approved' | 'published' | 'archived' | 'rejected'
+
+export type ClauseOperator =
+  | 'eq' | 'neq' | 'in' | 'not_in'
+  | 'gt' | 'gte' | 'lt' | 'lte'
+  | 'exists' | 'truthy' | 'falsy'
+
+export interface RuleClause {
+  field: string
+  op: ClauseOperator
+  value?: unknown
+}
+
+export interface RuleCondition {
+  kind: 'all' | 'any'
+  clauses: RuleClause[]
+}
+
+export interface Rule {
+  id: string
+  rule_key: string
+  document_type: string
+  title: string
+  current_version_id: string | null
+  created_at: string
+  updated_at: string | null
+}
+
+export interface RuleVersion {
+  id: string
+  rule_id: string
+  version_number: number
+  status: RuleStatus
+  is_live: boolean
+  classification: Classification
+  conditions: RuleCondition
+  source_citation: string
+  rationale: string | null
+  change_summary: string | null
+  created_by: string | null
+  submitted_by: string | null
+  submitted_at: string | null
+  approved_by: string | null
+  approved_at: string | null
+  published_by: string | null
+  published_at: string | null
+  rejected_by: string | null
+  rejected_at: string | null
+  rejection_reason: string | null
+  created_at: string
+  updated_at: string | null
+}
+
+export interface ApprovalHistoryEntry {
+  id: string
+  version_id: string
+  action: string
+  approver: string | null
+  comment: string | null
+  created_at: string
+}
+
+export interface TenantRuleOverride {
+  id: string
+  tenant_id: string
+  rule_id: string
+  override_classification: Classification | null
+  reason: string
+  created_by: string | null
+  created_at: string
+  updated_at: string | null
+}
+
+// ---- Profil-Felder fuer Condition-Builder ----
+
+export interface ProfileFieldOption {
+  /** Key der im Profil verwendet wird */
+  key: string
+  /** Label fuer die UI */
+  label: string
+  /** Kategorie fuer Gruppierung */
+  category: 'org' | 'proc' | 'prod' | 'comp' | 'tech' | 'compliance'
+  /** Erwarteter Datentyp */
+  type: 'string' | 'number' | 'boolean' | 'enum'
+  /** Wenn enum: Mögliche Werte mit Label */
+  options?: { value: string; label: string }[]
+}
+
+/**
+ * Die 17 Profil-Felder, die in den 33 Initial-Regeln verwendet werden.
+ * Aus templateRecommendations.ts portiert + compliance_depth_level ergaenzt.
+ */
+export const PROFILE_FIELDS: ProfileFieldOption[] = [
+  {
+    key: 'compliance_depth_level',
+    label: 'Compliance-Tiefe',
+    category: 'compliance', type: 'enum',
+    options: [
+      { value: 'L1', label: 'L1 — Lean Startup' },
+      { value: 'L2', label: 'L2 — Standard' },
+      { value: 'L3', label: 'L3 — Strict' },
+      { value: 'L4', label: 'L4 — Zertifizierungsbereit' },
+    ],
+  },
+  {
+    key: 'org_employee_count',
+    label: 'Mitarbeiterzahl',
+    category: 'org', type: 'enum',
+    options: [
+      { value: 'none', label: 'Keine' },
+      { value: '1_9', label: '1–9' },
+      { value: '10_49', label: '10–49' },
+      { value: '50_249', label: '50–249' },
+      { value: '250_999', label: '250–999' },
+      { value: '1000_plus', label: '1000+' },
+    ],
+  },
+  {
+    key: 'org_has_employees', label: 'Hat Mitarbeiter', category: 'org', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'org_business_model', label: 'Geschäftsmodell', category: 'org', type: 'enum',
+    options: [
+      { value: 'b2b_saas', label: 'B2B SaaS' },
+      { value: 'b2c_shop', label: 'B2C Shop' },
+      { value: 'platform', label: 'Plattform' },
+      { value: 'marketplace', label: 'Marktplatz' },
+      { value: 'social', label: 'Social Media' },
+      { value: 'saas', label: 'SaaS' },
+      { value: 'media', label: 'Media' },
+      { value: 'manufacturing', label: 'Maschinenbau' },
+      { value: 'other', label: 'Sonstiges' },
+    ],
+  },
+  {
+    key: 'org_has_social_media', label: 'Hat Social Media', category: 'org', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'org_has_video_conferencing', label: 'Hat Video-Konferenzen', category: 'org', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'org_cert_target', label: 'Zertifizierungsziel', category: 'org', type: 'enum',
+    options: [
+      { value: 'none', label: 'Keines' },
+      { value: 'iso27001', label: 'ISO 27001' },
+      { value: 'iso27701', label: 'ISO 27701' },
+      { value: 'tisax', label: 'TISAX' },
+    ],
+  },
+  {
+    key: 'proc_ai_usage', label: 'KI-Nutzung', category: 'proc', type: 'enum',
+    options: [
+      { value: 'none', label: 'Keine' },
+      { value: 'limited', label: 'Begrenzt' },
+      { value: 'extensive', label: 'Umfangreich' },
+    ],
+  },
+  {
+    key: 'proc_uses_ai_tools', label: 'Nutzt KI-Tools', category: 'proc', type: 'boolean',
+  },
+  {
+    key: 'proc_byod_allowed', label: 'BYOD erlaubt', category: 'proc', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'proc_dsfa_required', label: 'DSFA erforderlich', category: 'proc', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'prod_webshop', label: 'Webshop', category: 'prod', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'prod_ugc_platform', label: 'UGC-Plattform', category: 'prod', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'prod_consent_management', label: 'Consent Management', category: 'prod', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'comp_has_processors', label: 'Auftragsverarbeiter', category: 'comp', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'comp_vendor_management', label: 'Vendor-Management', category: 'comp', type: 'enum',
+    options: [{ value: 'yes', label: 'Ja' }, { value: 'no', label: 'Nein' }],
+  },
+  {
+    key: 'comp_dsfa_processes', label: 'DSFA-Prozesse', category: 'comp', type: 'enum',
+    options: [{ value: 'required', label: 'Erforderlich' }, { value: 'optional', label: 'Optional' }],
+  },
+  {
+    key: 'tech_third_country', label: 'Drittland-Transfer', category: 'tech', type: 'enum',
+    options: [
+      { value: 'no', label: 'Nein' },
+      { value: 'us_dpf_only', label: 'Nur US-DPF' },
+      { value: 'adequate_only', label: 'Nur Angemessenheitsbeschluss' },
+      { value: 'yes_us', label: 'Ja, USA' },
+      { value: 'yes_other', label: 'Ja, Sonstige' },
+    ],
+  },
+]
+
+
+export const OPERATOR_LABELS: Record<ClauseOperator, string> = {
+  eq: 'gleich (=)',
+  neq: 'ungleich (≠)',
+  in: 'in Liste',
+  not_in: 'nicht in Liste',
+  gt: 'größer (>)',
+  gte: 'größer/gleich (≥)',
+  lt: 'kleiner (<)',
+  lte: 'kleiner/gleich (≤)',
+  exists: 'existiert',
+  truthy: 'ist gesetzt',
+  falsy: 'ist leer',
+}
+
+export const CLASSIFICATION_LABELS: Record<Classification, string> = {
+  required: 'Pflicht',
+  recommended: 'Empfohlen',
+  optional: 'Optional',
+}
+
+export const STATUS_LABELS: Record<RuleStatus, string> = {
+  draft: 'Entwurf',
+  review: 'In Prüfung',
+  approved: 'Freigegeben',
+  published: 'Live',
+  archived: 'Archiviert',
+  rejected: 'Abgelehnt',
+}
@@ -0,0 +1,205 @@
+'use client'
+
+/**
+ * Template Rule Editor — Editorial-UI fuer Anwaelte/DSBs.
+ *
+ * Architektur:
+ * - Links: RuleList mit Filter
+ * - Rechts: RuleEditor mit Klassifikation, Condition-Builder, Source-Citation,
+ *   Approval-Workflow (draft → review → approved → published)
+ *
+ * Backend: /api/sdk/v1/compliance/template-rules + /template-rule-versions/*
+ */
+
+import { useEffect, useState, useCallback } from 'react'
+import { useSDK } from '@/lib/sdk'
+import StepHeader from '@/components/sdk/StepHeader/StepHeader'
+import { useRuleEditorActions } from './_hooks/useRuleEditorActions'
+import type {
+  ApprovalHistoryEntry, Classification, Rule, RuleCondition, RuleVersion,
+} from './_types'
+import RuleList from './_components/RuleList'
+import RuleEditor from './_components/RuleEditor'
+
+export default function TemplateRuleEditorPage() {
+  useSDK()
+
+  const actions = useRuleEditorActions()
+
+  const [rules, setRules] = useState<Rule[]>([])
+  const [liveVersionsByRule, setLiveVersionsByRule] = useState<Record<string, RuleVersion | undefined>>({})
+  const [selectedRuleId, setSelectedRuleId] = useState<string | null>(null)
+  const [selectedVersions, setSelectedVersions] = useState<RuleVersion[]>([])
+  const [selectedHistory, setSelectedHistory] = useState<ApprovalHistoryEntry[]>([])
+  const [loading, setLoading] = useState(true)
+  const [error, setError] = useState<string | null>(null)
+
+  // Initial: Regeln laden + Live-Versions
+  const loadRules = useCallback(async () => {
+    setLoading(true)
+    setError(null)
+    try {
+      const list = await actions.listRules()
+      setRules(list)
+      const byRule: Record<string, RuleVersion | undefined> = {}
+      // Live-Versionen parallel
+      await Promise.all(
+        list.map(async (r) => {
+          try {
+            const versions = await actions.listVersions(r.id)
+            const live = versions.find((v) => v.is_live)
+            byRule[r.id] = live
+          } catch {
+            byRule[r.id] = undefined
+          }
+        }),
+      )
+      setLiveVersionsByRule(byRule)
+      if (list.length > 0 && !selectedRuleId) {
+        setSelectedRuleId(list[0].id)
+      }
+    } catch (e) {
+      setError((e as Error).message)
+    } finally {
+      setLoading(false)
+    }
+  }, [actions, selectedRuleId])
+
+  // Bei Selektions-Wechsel: Versions + History laden
+  const loadSelected = useCallback(async () => {
+    if (!selectedRuleId) {
+      setSelectedVersions([])
+      setSelectedHistory([])
+      return
+    }
+    try {
+      const versions = await actions.listVersions(selectedRuleId)
+      setSelectedVersions(versions)
+      const live = versions.find((v) => v.is_live)
+      if (live) {
+        const history = await actions.getApprovalHistory(live.id)
+        setSelectedHistory(history)
+      } else {
+        setSelectedHistory([])
+      }
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }, [actions, selectedRuleId])
+
+  useEffect(() => { loadRules() }, [])
+  useEffect(() => { loadSelected() }, [selectedRuleId])
+
+  const handleCreateDraft = async (payload: {
+    classification: Classification
+    conditions: RuleCondition
+    source_citation: string
+    rationale?: string | null
+  }) => {
+    if (!selectedRuleId) return
+    try {
+      await actions.createDraftVersion(selectedRuleId, payload)
+      await loadSelected()
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }
+
+  const handleUpdateDraft = async (versionId: string, patch: {
+    classification?: Classification
+    conditions?: RuleCondition
+    source_citation?: string
+    rationale?: string | null
+  }) => {
+    try {
+      await actions.updateDraftVersion(versionId, patch)
+      await loadSelected()
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }
+
+  const handleSubmitForReview = async (versionId: string, changeSummary: string) => {
+    try {
+      await actions.submitForReview(versionId, { change_summary: changeSummary })
+      await loadSelected()
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }
+
+  const handleApprove = async (versionId: string) => {
+    try {
+      await actions.approveVersion(versionId)
+      await loadSelected()
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }
+
+  const handlePublish = async (versionId: string) => {
+    try {
+      await actions.publishVersion(versionId)
+      await loadRules()
+      await loadSelected()
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }
+
+  const handleReject = async (versionId: string, reason: string) => {
+    try {
+      await actions.rejectVersion(versionId, { rejection_reason: reason })
+      await loadSelected()
+    } catch (e) {
+      setError((e as Error).message)
+    }
+  }
+
+  const selectedRule = rules.find((r) => r.id === selectedRuleId)
+
+  return (
+    <div className="h-full flex flex-col bg-white">
+      <StepHeader
+        stepId="template-rule-editor"
+        title="Empfehlungs-Regeln"
+        description="Editorial-UI für profilbasierte Dokument-Empfehlungen. Anwälte/DSBs editieren globale Regeln mit Approval-Workflow + Quellen-Attribution."
+      />
+      {error && (
+        <div className="px-5 py-2 bg-rose-50 border-b border-rose-200 text-sm text-rose-800">
+          {error}
+        </div>
+      )}
+      {loading && (
+        <div className="p-5 text-sm text-gray-500">Lade Regeln…</div>
+      )}
+      {!loading && (
+        <div className="flex-1 grid grid-cols-[320px_1fr] overflow-hidden">
+          <RuleList
+            rules={rules}
+            versionsByRule={liveVersionsByRule}
+            selectedRuleId={selectedRuleId}
+            onSelectRule={setSelectedRuleId}
+          />
+          {selectedRule ? (
+            <RuleEditor
+              rule={selectedRule}
+              versions={selectedVersions}
+              history={selectedHistory}
+              onCreateDraft={handleCreateDraft}
+              onUpdateDraft={handleUpdateDraft}
+              onSubmitForReview={handleSubmitForReview}
+              onApprove={handleApprove}
+              onPublish={handlePublish}
+              onReject={handleReject}
+            />
+          ) : (
+            <div className="h-full grid place-items-center text-sm text-gray-500">
+              Wähle links eine Regel zum Bearbeiten.
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  )
+}
@@ -494,4 +494,18 @@ export const SDK_STEPS: SDKStep[] = [
    prerequisiteSteps: [],
    isOptional: true,
  },
+  {
+    id: 'template-rule-editor',
+    seq: 5000,
+    phase: 2,
+    package: 'betrieb',
+    order: 13,
+    name: 'Empfehlungs-Regeln',
+    nameShort: 'Regeln',
+    description: 'Editorial-UI fuer profilbasierte Dokument-Empfehlungen (Anwalt/DSB)',
+    url: '/sdk/template-rule-editor',
+    checkpointId: 'CP-RULES',
+    prerequisiteSteps: [],
+    isOptional: true,
+  },
 ]
@@ -1,336 +1,49 @@
-"""Screenshot-basierte Cookie-Extraktion mit Tesseract-OCR.
+"""Screenshot-basierte Cookie-Extraktion (Orchestration).

 Pipeline:
 1. consent-tester macht Full-Page-Screenshot (Banner akzeptiert,
   Accordions ausgeklappt, Timestamp eingebrannt) → PNG b64
 2. Tesseract OCR (lang=deu, psm=4) → Rohtext mit Tabellen-Reihen
-3. _parse_ocr_cookie_table(text) → strukturierte Liste {name, category,
-   purpose, duration, type, vendor}
+3. parse_ocr_cookie_table(text) → strukturierte Liste

-Funktioniert site-unabhaengig — egal welches CMP, egal welche Sprache
-(Tesseract kann viele), egal welches DOM-Layout. Timestamp im Bild =
-Beweis was wir zum Scan-Zeitpunkt wirklich gesehen haben.
+Phase-1-Split (2026-06-06): Engine-Funktionen
+(_slice_screenshot / vision-OCR / paddle / tesseract / parse) leben
+jetzt in `cookie_screenshot_ocr_engines.py`. Re-Exports halten die
+Public-API stabil — externe Importer (`_phase_d1_vendors_raw.py`)
+brauchen keinen Code-Change.
 """

 from __future__ import annotations

 import base64 as _b64
-import json
 import logging
 import os
-import re

 import httpx

-logger = logging.getLogger(__name__)
+from .cookie_screenshot_ocr_engines import (  # noqa: F401  (re-exports)
+    OLLAMA_URL,
+    VISION_MODEL,
+    VISION_PROMPT,
+    _PADDLE_OCR,
+    _call_vision_on_slice,
+    _slice_screenshot,
+    ocr_screenshot_via_paddle,
+    ocr_screenshot_via_tesseract,
+    ocr_screenshot_via_vision_slices,
+    parse_ocr_cookie_table,
+    parse_vision_response,
+)

+logger = logging.getLogger(__name__)

 CONSENT_TESTER_URL = os.getenv(
    "CONSENT_TESTER_URL", "http://bp-compliance-consent-tester:8094"
 )
-VISION_MODEL = os.getenv("COOKIE_VISION_MODEL", "qwen2.5vl:32b")
-OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")


-def _slice_screenshot(png_bytes: bytes, slice_h: int = 1500,
-                      max_slices: int = 25) -> list[str]:
-    """Cut a tall full-page screenshot into 1280×slice_h slices and return
-    each as base64-encoded PNG. Vision models choke on 25k-tall images
-    (resampled down to ~1024 → unreadable text); slicing keeps DPI."""
-    if not png_bytes:
-        return []
-    try:
-        from PIL import Image
-        from io import BytesIO
-    except ImportError:
-        return []
-    img = Image.open(BytesIO(png_bytes)).convert("RGB")
-    w, h = img.size
-    n = min((h + slice_h - 1) // slice_h, max_slices)
-    out: list[str] = []
-    for i in range(n):
-        top = i * slice_h
-        bot = min((i + 1) * slice_h, h)
-        chunk = img.crop((0, top, w, bot))
-        buf = BytesIO()
-        chunk.save(buf, format="PNG", optimize=True)
-        out.append(_b64.b64encode(buf.getvalue()).decode("ascii"))
-    return out
-
-
-async def _call_vision_on_slice(b64_png: str, timeout_s: float = 240.0) -> str:
-    """Ask the vision model to dump all cookie-row text from one slice
-    as raw text (NOT JSON). We parse it downstream with parse_flat regex."""
-    prompt = (
-        "Du siehst einen Bildausschnitt einer Cookie-Richtlinien-Tabelle. "
-        "Liste ALLE Tabellen-Zeilen wortwoertlich auf, eine Zeile pro "
-        "Cookie. Jede Zeile soll enthalten: Cookie-Name, Kategorie, "
-        "Zweck, Speicherdauer, Art (Permanent/Session). "
-        "Format: '<Name> | <Kategorie> | <Zweck> | <Dauer> | <Art>'. "
-        "KEINE Cookies erfinden, nur was im Bild steht. Nur die Tabellen-"
-        "Zeilen, keine Erklaerungen."
-    )
-    payload = {
-        "model": VISION_MODEL,
-        "stream": False,
-        "messages": [{
-            "role": "user", "content": prompt, "images": [b64_png],
-        }],
-        "options": {"temperature": 0.05, "num_predict": 4000},
-    }
-    try:
-        async with httpx.AsyncClient(timeout=timeout_s) as c:
-            r = await c.post(f"{OLLAMA_URL.rstrip('/')}/api/chat", json=payload)
-            r.raise_for_status()
-        return (r.json().get("message") or {}).get("content", "") or ""
-    except Exception as e:
-        logger.debug("vision slice failed: %s", e)
-        return ""
-
-
-async def ocr_screenshot_via_vision_slices(png_bytes: bytes,
-                                            max_slices: int = 20) -> str:
-    """Slice + vision-OCR each slice + concatenate. Returns raw text that
-    can be fed to parse_flat_cookie_text."""
-    slices = _slice_screenshot(png_bytes, slice_h=1500, max_slices=max_slices)
-    if not slices:
-        return ""
-    logger.info("Vision-slicing: %d slices → vision-OCR (model=%s)",
-                 len(slices), VISION_MODEL)
-    import asyncio as _aio
-    # Run slices SEQUENTIALLY: ollama is single-GPU and loading the same
-    # model for parallel requests causes OOM + thrashing on Mac Mini.
-    parts: list[str] = []
-    for i, s in enumerate(slices):
-        txt = await _call_vision_on_slice(s)
-        if txt:
-            parts.append(txt)
-        logger.info("Vision-slice %d/%d: %d chars", i + 1, len(slices),
-                     len(txt))
-    full = "\n".join(parts)
-    logger.info("Vision-OCR slicing total: %d chars from %d slices",
-                 len(full), len(slices))
-    return full
-
-
-def ocr_screenshot_via_paddle(png_bytes: bytes) -> str:
-    """Run PaddleOCR over the full-page screenshot, returning the
-    concatenated text. Deterministic, no LLM halluzination.
-
-    Splits tall screenshots into 1280x3000 slices so OCR works in chunks
-    without OOM on large pages (VW cookie-page is ~25k px tall).
-    """
-    if not png_bytes:
-        return ""
-    try:
-        from PIL import Image
-        from io import BytesIO
-        from paddleocr import PaddleOCR
-    except ImportError as e:
-        logger.warning("PaddleOCR / PIL not available: %s", e)
-        return ""
-
-    try:
-        img = Image.open(BytesIO(png_bytes)).convert("RGB")
-    except Exception as e:
-        logger.warning("PIL open failed: %s", e)
-        return ""
-
-    w, h = img.size
-    slice_h = 3000
-    n_slices = (h + slice_h - 1) // slice_h
-    logger.info("PaddleOCR: %dx%d screenshot → %d slices of %d high",
-                 w, h, n_slices, slice_h)
-
-    # Global OCR instance reused — initial init is ~10s.
-    global _PADDLE_OCR
-    if "_PADDLE_OCR" not in globals() or _PADDLE_OCR is None:
-        try:
-            _PADDLE_OCR = PaddleOCR(use_angle_cls=False, lang="german",
-                                     show_log=False)
-        except Exception as e:
-            logger.warning("PaddleOCR init failed: %s", e)
-            return ""
-
-    parts: list[str] = []
-    import numpy as np
-    for i in range(n_slices):
-        top = i * slice_h
-        bot = min((i + 1) * slice_h, h)
-        crop = img.crop((0, top, w, bot))
-        arr = np.array(crop)
-        try:
-            result = _PADDLE_OCR.ocr(arr, cls=False)
-        except Exception as e:
-            logger.warning("PaddleOCR slice %d failed: %s", i, e)
-            continue
-        # PaddleOCR returns list-of-lines where each line is
-        # [bbox, (text, conf)] — variable nesting depending on version.
-        if not result:
-            continue
-        for page in result:
-            if not page: continue
-            for line in page:
-                if not line: continue
-                try:
-                    if isinstance(line, list) and len(line) >= 2:
-                        txt = line[1][0] if isinstance(line[1], (list, tuple)) else str(line[1])
-                    else:
-                        txt = str(line)
-                    if txt: parts.append(txt)
-                except Exception:
-                    continue
-
-    full_text = "\n".join(parts)
-    logger.info("PaddleOCR: extracted %d lines / %d chars from %d slices",
-                 len(parts), len(full_text), n_slices)
-    return full_text
-
-
-_PADDLE_OCR = None
-
-
-# ── Tesseract-based parser ────────────────────────────────────────────
-
-def ocr_screenshot_via_tesseract(png_bytes: bytes,
-                                  lang: str = "deu",
-                                  psm: int = 4) -> str:
-    """Run Tesseract OCR on a full-page screenshot. Returns normalized text
-    where multi-newline paragraphs are collapsed but blank lines preserved
-    (helps anchor-based parsing).
-
-    psm=4 means single column of text of variable sizes (cookie-tables).
-    """
-    if not png_bytes:
-        return ""
-    try:
-        import pytesseract
-        from PIL import Image
-        from io import BytesIO
-        import re as _re
-    except ImportError as e:
-        logger.warning("tesseract/PIL not available: %s", e)
-        return ""
-    try:
-        img = Image.open(BytesIO(png_bytes)).convert("RGB")
-        raw = pytesseract.image_to_string(img, lang=lang,
-                                            config=f"--psm {psm}")
-        # Collapse intra-paragraph newlines so OCR cells flow on one line.
-        norm = _re.sub(r"[ \t]+", " ", raw)
-        norm = _re.sub(r"\n(?!\s*\n)", " ", norm)
-        norm = _re.sub(r"\s{2,}", " ", norm)
-        logger.info(
-            "Tesseract OCR: %d chars / %d words (image %dx%d)",
-            len(norm), len(norm.split()), img.size[0], img.size[1],
-        )
-        return norm
-    except Exception as e:
-        logger.warning("Tesseract OCR failed: %s (%s)",
-                        str(e) or "(no msg)", type(e).__name__)
-        return ""
-
-
-# Kategorie-Anchor-Tokens that ALWAYS follow the Cookie-Name in the
-# typical column layout: [NAME] [KATEGORIE] [ZWECK] [DAUER] [ART]
-_CATEGORY_ANCHORS = (
-    r"Funktionscookie", r"Trackingcookie",
-    r"Tracking Cookies?", r"Session Cookies?",
-    r"Funktional", r"Marketing", r"Analytics", r"Necessary",
-    r"Werbung", r"Personalisierung", r"Statistik",
-    r"Notwendig", r"Erforderlich",
-)
-
-_CATEGORY_PATTERN = "(?:" + "|".join(_CATEGORY_ANCHORS) + r")(?:\s*\([^)]*\))?"
-
-# Cookie-Name: alphanum + underscore + dash + dot. Wir erlauben optional
-# einen Suffix-Underscore (Spalten-Umbruch bei VW: `VWD6_ENSIGHTEN_PRIVACY_`
-# als Name-Fragment). Mind. 3, max. 60 chars.
-_COOKIE_NAME_RE = (
-    r"(?:[A-Za-z][\w\-.]{2,60}|[A-Za-z][\w\-.]{2,60}<[^>]+>)"
-)
-
-
-def parse_ocr_cookie_table(text: str) -> list[dict]:
-    """Extract cookie-records from Tesseract-OCR text using anchor-based
-    pattern: <name> <category> <purpose...> <duration> <type>.
-
-    Returns list of {name, category, purpose, duration, type}. Vendor is
-    NOT inferred here — caller maps via _guess_vendor.
-
-    KEINE Cookie-Namens-Korrektur — `awsalb` bleibt `awsalb`, nicht
-    `awesome`. Falsche Korrektur waere ein Compliance-Verlust.
-    """
-    if not text or len(text) < 200:
-        return []
-    import re as _re
-    # Pattern: capture name + anchor category, then up to 250 chars
-    # forward to grab duration + type tokens.
-    pattern = _re.compile(
-        rf"(?P<name>{_COOKIE_NAME_RE})\s+"
-        rf"(?P<category>{_CATEGORY_PATTERN})"
-        rf"(?P<rest>[^A-Z]{{0,300}}?)"
-        rf"(?:(?P<duration>\d+(?:[.,]\s*)?\s*(?:Tage|Jahre?|Monate?|Minuten|Stunden|Sekunden)\.?)?\s*"
-        rf"(?P<type>Permanent/Protokoll|Session\s*Cookie|Persistent\s*Cookie|Persistent\s*cookie))?",
-        _re.IGNORECASE | _re.DOTALL,
-    )
-    seen_names: set[str] = set()
-    out: list[dict] = []
-    for m in pattern.finditer(text):
-        name = (m.group("name") or "").strip()
-        # Filter obvious garbage (UI strings, navigation, common words)
-        if not name or len(name) < 3:
-            continue
-        nl = name.lower()
-        if nl in seen_names:
-            continue
-        # Reject common non-cookie words. Cookie-Namen sind technische IDs:
-        # haben oft Unterstrich/Bindestrich/Camel-Case oder sind kurze IDs.
-        if nl in ("name", "art", "zweck", "dauer", "kategorie", "anbieter",
-                  "cookie", "cookies", "name des cookies",
-                  "this", "dieser", "diese", "alle", "und", "von", "der",
-                  "die", "das", "ein", "eine", "session", "permanent",
-                  "category"):
-            continue
-        # Cookie-Namen sollen kein reines Lower-Word sein OHNE _ oder -
-        # (z.B. "verwendet" wuerde sonst matchen)
-        has_marker = any(c in name for c in "_-.<>")
-        is_caps = name.upper() == name and len(name) >= 3
-        is_camel = any(c.isupper() for c in name[1:]) and any(c.islower() for c in name)
-        if not (has_marker or is_caps or is_camel):
-            # Lowercase word ohne Marker → vermutlich kein Cookie-Name
-            continue
-        seen_names.add(nl)
-        out.append({
-            "name": name[:80],
-            "category": (m.group("category") or "").strip()[:60],
-            "purpose": (m.group("rest") or "").strip()[:200],
-            "duration": (m.group("duration") or "").strip()[:60],
-            "type": (m.group("type") or "").strip()[:30],
-            "vendor": "",
-        })
-    logger.info("parse_ocr_cookie_table: %d unique cookies extracted", len(out))
-    return out
-
-
-_VISION_PROMPT = (
-    "Du analysierst einen Screenshot einer Cookie-Richtlinie. Auf der Seite "
-    "ist eine Tabelle mit Cookies aufgelistet. Spalten sind ueblicherweise: "
-    "Name des Cookies, Kategorie (z.B. 'Funktional', 'Marketing', "
-    "'Analytics'), Verwendungszweck, Speicherdauer, Art des Cookies "
-    "(z.B. 'Permanent', 'Session').\n\n"
-    "Extrahiere ALLE Cookies aus dem Bild. Wenn die Tabelle abgeschnitten "
-    "ist, extrahiere alles was sichtbar ist. KEINE Cookies erfinden, KEINE "
-    "Halluzinationen.\n\n"
-    "Antworte als reines JSON-Objekt im Format:\n"
-    '{"cookies": [\n'
-    '  {"name": "<Cookie-Name exakt>", "category": "<Kategorie>", '
-    '"purpose": "<Kurzfassung Zweck max 120 chars>", '
-    '"duration": "<Speicherdauer mit Einheit>", '
-    '"type": "<Permanent|Session|...>", '
-    '"vendor": "<Anbieter falls bekannt, sonst leer>"}\n'
-    "]}\n\n"
-    "Nur JSON, kein Erklaerungstext, keine Code-Fences."
-)
+# Backward-compat: some callers may import _parse_vision_response
+_parse_vision_response = parse_vision_response


 async def capture_cookie_evidence_slices(
@@ -414,9 +127,7 @@ async def capture_cookie_evidence_slices(


 def _ocr_one_slice(s: dict) -> tuple[dict, list[dict]]:
-    """Helper for parallel execution: tesseract + parse for one slice.
-    Returns (slice_metadata_summary, cookies)."""
-    import base64 as _b64
+    """Helper for parallel execution: tesseract + parse for one slice."""
    try:
        png = _b64.b64decode(s.get("png_b64", ""))
    except Exception:
@@ -440,10 +151,6 @@ def ocr_slices_extract_cookies(
    ThreadPoolExecutor with 4 workers yields ~4x speedup on multi-core
    machines (M4 Pro has plenty). Sequential 32 slices = ~60s, parallel
    ~15s.
-
-    Returns (cookies, stats) where stats has:
-      per_slice: [{idx, cookies_found, ts, top_y, bot_y}]
-      total_raw, total_unique, slices
    """
    from concurrent.futures import ThreadPoolExecutor

@@ -451,7 +158,6 @@ def ocr_slices_extract_cookies(
        return [], {"per_slice": [], "total_raw": 0,
                    "total_unique": 0, "slices": 0}

-    # Keep slice order so the per-slice report is sequential.
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        results = list(ex.map(_ocr_one_slice, slices))

@@ -474,7 +180,8 @@ def ocr_slices_extract_cookies(
    }
    logger.info(
        "ocr_slices_extract_cookies (parallel=%d): %d slices → %d raw → %d unique",
-        max_workers, stats["slices"], stats["total_raw"], stats["total_unique"],
+        max_workers, stats["slices"], stats["total_raw"],
+        stats["total_unique"],
    )
    return all_cookies, stats

@@ -482,11 +189,7 @@ def ocr_slices_extract_cookies(
 async def capture_cookie_screenshot(
    cookie_url: str, check_id: str = "", timeout_s: float = 60.0,
 ) -> dict:
-    """Trigger consent-tester to capture full-page screenshot of cookie URL.
-
-    Returns dict with png_b64, captured_at, url, width_px, height_px etc.
-    Empty png_b64 on error.
-    """
+    """Trigger consent-tester to capture full-page screenshot of cookie URL."""
    if not cookie_url:
        return {"png_b64": "", "error": "no url"}
    try:
@@ -514,11 +217,7 @@ async def capture_cookie_screenshot(
 async def extract_cookies_via_vision(
    png_b64: str, timeout_s: float = 240.0,
 ) -> list[dict]:
-    """Call Ollama llama3.2-vision with the screenshot + extraction prompt.
-
-    Returns list of {name, category, purpose, duration, type, vendor}.
-    Empty list on failure.
-    """
+    """Call Ollama vision model with the screenshot + extraction prompt."""
    if not png_b64:
        return []
    payload = {
@@ -527,13 +226,10 @@ async def extract_cookies_via_vision(
        "format": "json",
        "messages": [{
            "role": "user",
-            "content": _VISION_PROMPT,
+            "content": VISION_PROMPT,
            "images": [png_b64],
        }],
-        "options": {
-            "temperature": 0.05,
-            "num_predict": 8000,
-        },
+        "options": {"temperature": 0.05, "num_predict": 8000},
    }
    try:
        async with httpx.AsyncClient(timeout=timeout_s) as c:
@@ -543,7 +239,7 @@ async def extract_cookies_via_vision(
            )
            r.raise_for_status()
        content = (r.json().get("message") or {}).get("content", "") or ""
-        cookies = _parse_vision_response(content)
+        cookies = parse_vision_response(content)
        logger.info(
            "Vision-OCR extracted %d cookies (model=%s, response_len=%d)",
            len(cookies), VISION_MODEL, len(content),
@@ -557,59 +253,11 @@ async def extract_cookies_via_vision(
        return []


-def _parse_vision_response(content: str) -> list[dict]:
-    """Be lenient: code fences, leading prose, partial JSON."""
-    if not content:
-        return []
-    txt = content.strip()
-    if txt.startswith("```"):
-        lines = txt.split("\n")
-        if lines and lines[-1].strip().startswith("```"):
-            txt = "\n".join(lines[1:-1])
-        else:
-            txt = "\n".join(lines[1:])
-    a, b = txt.find("{"), txt.rfind("}")
-    if not (0 <= a < b):
-        return []
-    try:
-        obj = json.loads(txt[a:b + 1])
-    except json.JSONDecodeError:
-        return []
-    if not isinstance(obj, dict):
-        return []
-    arr = obj.get("cookies") or obj.get("Cookies") or []
-    if not isinstance(arr, list):
-        return []
-    out: list[dict] = []
-    for item in arr[:300]:  # cap to sanity
-        if not isinstance(item, dict):
-            continue
-        name = (item.get("name") or "").strip()
-        if not name or len(name) < 2 or len(name) > 80:
-            continue
-        # Strip obvious garbage
-        if re.fullmatch(r"[\s\-_.]+", name):
-            continue
-        out.append({
-            "name": name[:80],
-            "category": (item.get("category") or "").strip()[:60],
-            "purpose": (item.get("purpose") or "").strip()[:200],
-            "duration": (item.get("duration") or "").strip()[:60],
-            "type": (item.get("type") or "").strip()[:30],
-            "vendor": (item.get("vendor") or "").strip()[:80],
-        })
-    return out
-
-
 def cookies_to_vendor_records(
    cookies: list[dict], guess_vendor_fn=None,
 ) -> list[dict]:
    """Aggregate OCR-extracted cookies into vendor records compatible with
-    cmp_vendors-schema. guess_vendor_fn: optional callable name → vendor.
-
-    Each cookie's vendor field is used; if empty, we fall back to
-    guess_vendor_fn (e.g. _guess_vendor from cookies_table_parser).
-    """
+    cmp_vendors-schema. guess_vendor_fn: optional callable name → vendor."""
    by_vendor: dict[str, dict] = {}
    for c in cookies:
        v_name = (c.get("vendor") or "").strip()
@@ -0,0 +1,353 @@
+"""OCR-Engine-Funktionen für cookie_screenshot_ocr (Phase-1 Split).
+
+Aus dem Hauptmodul ausgelagert, damit es unter dem 500-LOC-Hard-Cap bleibt:
+  - PIL-basiertes _slice_screenshot (zerteilt PNG in subimages)
+  - Vision-LLM-OCR (ollama qwen2.5vl:32b)
+  - PaddleOCR fallback
+  - Tesseract OCR (Hauptpfad)
+  - Anchor-basierter Parser parse_ocr_cookie_table
+  - _parse_vision_response (JSON-Toleranz für Vision-Output)
+"""
+
+from __future__ import annotations
+
+import base64 as _b64
+import json
+import logging
+import os
+import re
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+VISION_MODEL = os.getenv("COOKIE_VISION_MODEL", "qwen2.5vl:32b")
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+
+_PADDLE_OCR = None  # lazy-initialised PaddleOCR instance
+
+
+# ── 1. Screenshot-Slicing für Vision-Models ────────────────────────
+
+def _slice_screenshot(png_bytes: bytes, slice_h: int = 1500,
+                      max_slices: int = 25) -> list[str]:
+    """Cut a tall full-page screenshot into 1280×slice_h slices and return
+    each as base64-encoded PNG. Vision models choke on 25k-tall images
+    (resampled down to ~1024 → unreadable text); slicing keeps DPI."""
+    if not png_bytes:
+        return []
+    try:
+        from PIL import Image
+        from io import BytesIO
+    except ImportError:
+        return []
+    img = Image.open(BytesIO(png_bytes)).convert("RGB")
+    w, h = img.size
+    n = min((h + slice_h - 1) // slice_h, max_slices)
+    out: list[str] = []
+    for i in range(n):
+        top = i * slice_h
+        bot = min((i + 1) * slice_h, h)
+        chunk = img.crop((0, top, w, bot))
+        buf = BytesIO()
+        chunk.save(buf, format="PNG", optimize=True)
+        out.append(_b64.b64encode(buf.getvalue()).decode("ascii"))
+    return out
+
+
+# ── 2. Vision-LLM-OCR ──────────────────────────────────────────────
+
+async def _call_vision_on_slice(b64_png: str,
+                                  timeout_s: float = 240.0) -> str:
+    """Ask the vision model to dump all cookie-row text from one slice
+    as raw text (NOT JSON). We parse it downstream with parse_flat regex."""
+    prompt = (
+        "Du siehst einen Bildausschnitt einer Cookie-Richtlinien-Tabelle. "
+        "Liste ALLE Tabellen-Zeilen wortwoertlich auf, eine Zeile pro "
+        "Cookie. Jede Zeile soll enthalten: Cookie-Name, Kategorie, "
+        "Zweck, Speicherdauer, Art (Permanent/Session). "
+        "Format: '<Name> | <Kategorie> | <Zweck> | <Dauer> | <Art>'. "
+        "KEINE Cookies erfinden, nur was im Bild steht. Nur die Tabellen-"
+        "Zeilen, keine Erklaerungen."
+    )
+    payload = {
+        "model": VISION_MODEL,
+        "stream": False,
+        "messages": [{
+            "role": "user", "content": prompt, "images": [b64_png],
+        }],
+        "options": {"temperature": 0.05, "num_predict": 4000},
+    }
+    try:
+        async with httpx.AsyncClient(timeout=timeout_s) as c:
+            r = await c.post(f"{OLLAMA_URL.rstrip('/')}/api/chat",
+                              json=payload)
+            r.raise_for_status()
+        return (r.json().get("message") or {}).get("content", "") or ""
+    except Exception as e:
+        logger.debug("vision slice failed: %s", e)
+        return ""
+
+
+async def ocr_screenshot_via_vision_slices(png_bytes: bytes,
+                                            max_slices: int = 20) -> str:
+    """Slice + vision-OCR each slice + concatenate."""
+    slices = _slice_screenshot(png_bytes, slice_h=1500,
+                                max_slices=max_slices)
+    if not slices:
+        return ""
+    logger.info("Vision-slicing: %d slices → vision-OCR (model=%s)",
+                 len(slices), VISION_MODEL)
+    parts: list[str] = []
+    for i, s in enumerate(slices):
+        txt = await _call_vision_on_slice(s)
+        if txt:
+            parts.append(txt)
+        logger.info("Vision-slice %d/%d: %d chars", i + 1, len(slices),
+                     len(txt))
+    full = "\n".join(parts)
+    logger.info("Vision-OCR slicing total: %d chars from %d slices",
+                 len(full), len(slices))
+    return full
+
+
+# ── 3. PaddleOCR (fallback) ────────────────────────────────────────
+
+def ocr_screenshot_via_paddle(png_bytes: bytes) -> str:
+    """Run PaddleOCR over the full-page screenshot, returning the
+    concatenated text. Splits tall screenshots into 1280x3000 slices."""
+    if not png_bytes:
+        return ""
+    try:
+        from PIL import Image
+        from io import BytesIO
+        from paddleocr import PaddleOCR
+    except ImportError as e:
+        logger.warning("PaddleOCR / PIL not available: %s", e)
+        return ""
+
+    try:
+        img = Image.open(BytesIO(png_bytes)).convert("RGB")
+    except Exception as e:
+        logger.warning("PIL open failed: %s", e)
+        return ""
+
+    w, h = img.size
+    slice_h = 3000
+    n_slices = (h + slice_h - 1) // slice_h
+    logger.info("PaddleOCR: %dx%d screenshot → %d slices of %d high",
+                 w, h, n_slices, slice_h)
+
+    global _PADDLE_OCR
+    if _PADDLE_OCR is None:
+        try:
+            _PADDLE_OCR = PaddleOCR(use_angle_cls=False, lang="german",
+                                     show_log=False)
+        except Exception as e:
+            logger.warning("PaddleOCR init failed: %s", e)
+            return ""
+
+    parts: list[str] = []
+    import numpy as np
+    for i in range(n_slices):
+        top = i * slice_h
+        bot = min((i + 1) * slice_h, h)
+        crop = img.crop((0, top, w, bot))
+        arr = np.array(crop)
+        try:
+            result = _PADDLE_OCR.ocr(arr, cls=False)
+        except Exception as e:
+            logger.warning("PaddleOCR slice %d failed: %s", i, e)
+            continue
+        if not result:
+            continue
+        for page in result:
+            if not page:
+                continue
+            for line in page:
+                if not line:
+                    continue
+                try:
+                    if isinstance(line, list) and len(line) >= 2:
+                        txt = (line[1][0]
+                                if isinstance(line[1], (list, tuple))
+                                else str(line[1]))
+                    else:
+                        txt = str(line)
+                    if txt:
+                        parts.append(txt)
+                except Exception:
+                    continue
+
+    full_text = "\n".join(parts)
+    logger.info("PaddleOCR: extracted %d lines / %d chars from %d slices",
+                 len(parts), len(full_text), n_slices)
+    return full_text
+
+
+# ── 4. Tesseract OCR (Hauptpfad) ───────────────────────────────────
+
+def ocr_screenshot_via_tesseract(png_bytes: bytes,
+                                  lang: str = "deu",
+                                  psm: int = 4) -> str:
+    """Run Tesseract OCR on a full-page screenshot. psm=4 = single column
+    of text of variable sizes (cookie-tables)."""
+    if not png_bytes:
+        return ""
+    try:
+        import pytesseract
+        from PIL import Image
+        from io import BytesIO
+        import re as _re
+    except ImportError as e:
+        logger.warning("tesseract/PIL not available: %s", e)
+        return ""
+    try:
+        img = Image.open(BytesIO(png_bytes)).convert("RGB")
+        raw = pytesseract.image_to_string(img, lang=lang,
+                                            config=f"--psm {psm}")
+        norm = _re.sub(r"[ \t]+", " ", raw)
+        norm = _re.sub(r"\n(?!\s*\n)", " ", norm)
+        norm = _re.sub(r"\s{2,}", " ", norm)
+        logger.info(
+            "Tesseract OCR: %d chars / %d words (image %dx%d)",
+            len(norm), len(norm.split()), img.size[0], img.size[1],
+        )
+        return norm
+    except Exception as e:
+        logger.warning("Tesseract OCR failed: %s (%s)",
+                        str(e) or "(no msg)", type(e).__name__)
+        return ""
+
+
+# ── 5. Anchor-basierter Parser ─────────────────────────────────────
+
+_CATEGORY_ANCHORS = (
+    r"Funktionscookie", r"Trackingcookie",
+    r"Tracking Cookies?", r"Session Cookies?",
+    r"Funktional", r"Marketing", r"Analytics", r"Necessary",
+    r"Werbung", r"Personalisierung", r"Statistik",
+    r"Notwendig", r"Erforderlich",
+)
+_CATEGORY_PATTERN = ("(?:" + "|".join(_CATEGORY_ANCHORS)
+                      + r")(?:\s*\([^)]*\))?")
+_COOKIE_NAME_RE = (
+    r"(?:[A-Za-z][\w\-.]{2,60}|[A-Za-z][\w\-.]{2,60}<[^>]+>)"
+)
+
+
+def parse_ocr_cookie_table(text: str) -> list[dict]:
+    """Extract cookie-records from Tesseract-OCR text. KEINE Cookie-Namens-
+    Korrektur — `awsalb` bleibt `awsalb`."""
+    if not text or len(text) < 200:
+        return []
+    pattern = re.compile(
+        rf"(?P<name>{_COOKIE_NAME_RE})\s+"
+        rf"(?P<category>{_CATEGORY_PATTERN})"
+        rf"(?P<rest>[^A-Z]{{0,300}}?)"
+        rf"(?:(?P<duration>\d+(?:[.,]\s*)?\s*"
+        rf"(?:Tage|Jahre?|Monate?|Minuten|Stunden|Sekunden)\.?)?\s*"
+        rf"(?P<type>Permanent/Protokoll|Session\s*Cookie|"
+        rf"Persistent\s*Cookie|Persistent\s*cookie))?",
+        re.IGNORECASE | re.DOTALL,
+    )
+    seen_names: set[str] = set()
+    out: list[dict] = []
+    for m in pattern.finditer(text):
+        name = (m.group("name") or "").strip()
+        if not name or len(name) < 3:
+            continue
+        nl = name.lower()
+        if nl in seen_names:
+            continue
+        if nl in ("name", "art", "zweck", "dauer", "kategorie", "anbieter",
+                  "cookie", "cookies", "name des cookies",
+                  "this", "dieser", "diese", "alle", "und", "von", "der",
+                  "die", "das", "ein", "eine", "session", "permanent",
+                  "category"):
+            continue
+        has_marker = any(c in name for c in "_-.<>")
+        is_caps = name.upper() == name and len(name) >= 3
+        is_camel = (any(c.isupper() for c in name[1:])
+                    and any(c.islower() for c in name))
+        if not (has_marker or is_caps or is_camel):
+            continue
+        seen_names.add(nl)
+        out.append({
+            "name": name[:80],
+            "category": (m.group("category") or "").strip()[:60],
+            "purpose": (m.group("rest") or "").strip()[:200],
+            "duration": (m.group("duration") or "").strip()[:60],
+            "type": (m.group("type") or "").strip()[:30],
+            "vendor": "",
+        })
+    logger.info("parse_ocr_cookie_table: %d unique cookies extracted",
+                 len(out))
+    return out
+
+
+# ── 6. Vision-Response-Parser ──────────────────────────────────────
+
+VISION_PROMPT = (
+    "Du analysierst einen Screenshot einer Cookie-Richtlinie. Auf der Seite "
+    "ist eine Tabelle mit Cookies aufgelistet. Spalten sind ueblicherweise: "
+    "Name des Cookies, Kategorie (z.B. 'Funktional', 'Marketing', "
+    "'Analytics'), Verwendungszweck, Speicherdauer, Art des Cookies "
+    "(z.B. 'Permanent', 'Session').\n\n"
+    "Extrahiere ALLE Cookies aus dem Bild. Wenn die Tabelle abgeschnitten "
+    "ist, extrahiere alles was sichtbar ist. KEINE Cookies erfinden, KEINE "
+    "Halluzinationen.\n\n"
+    "Antworte als reines JSON-Objekt im Format:\n"
+    '{"cookies": [\n'
+    '  {"name": "<Cookie-Name exakt>", "category": "<Kategorie>", '
+    '"purpose": "<Kurzfassung Zweck max 120 chars>", '
+    '"duration": "<Speicherdauer mit Einheit>", '
+    '"type": "<Permanent|Session|...>", '
+    '"vendor": "<Anbieter falls bekannt, sonst leer>"}\n'
+    "]}\n\n"
+    "Nur JSON, kein Erklaerungstext, keine Code-Fences."
+)
+
+
+def parse_vision_response(content: str) -> list[dict]:
+    """Be lenient: code fences, leading prose, partial JSON."""
+    if not content:
+        return []
+    txt = content.strip()
+    if txt.startswith("```"):
+        lines = txt.split("\n")
+        if lines and lines[-1].strip().startswith("```"):
+            txt = "\n".join(lines[1:-1])
+        else:
+            txt = "\n".join(lines[1:])
+    a, b = txt.find("{"), txt.rfind("}")
+    if not (0 <= a < b):
+        return []
+    try:
+        obj = json.loads(txt[a:b + 1])
+    except json.JSONDecodeError:
+        return []
+    if not isinstance(obj, dict):
+        return []
+    arr = obj.get("cookies") or obj.get("Cookies") or []
+    if not isinstance(arr, list):
+        return []
+    out: list[dict] = []
+    for item in arr[:300]:
+        if not isinstance(item, dict):
+            continue
+        name = (item.get("name") or "").strip()
+        if not name or len(name) < 2 or len(name) > 80:
+            continue
+        if re.fullmatch(r"[\s\-_.]+", name):
+            continue
+        out.append({
+            "name": name[:80],
+            "category": (item.get("category") or "").strip()[:60],
+            "purpose": (item.get("purpose") or "").strip()[:200],
+            "duration": (item.get("duration") or "").strip()[:60],
+            "type": (item.get("type") or "").strip()[:30],
+            "vendor": (item.get("vendor") or "").strip()[:80],
+        })
+    return out