feat: add Kombi-Vergleich mode for side-by-side Paddle vs RapidOCR comparison

Add /rapid-kombi backend endpoint using local RapidOCR + Tesseract merge, KombiCompareStep component for parallel execution and side-by-side overlay, and wordResultOverride prop on OverlayReconstruction for direct data injection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 07:59:06 +01:00
parent c2c082d4b4
commit a994ddee83
6 changed files with 504 additions and 35 deletions
@@ -11,12 +11,13 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
 import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
 import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
 import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
-import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
+import { KombiCompareStep } from '@/components/ocr-overlay/KombiCompareStep'
+import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, KOMBI_COMPARE_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'

 const KLAUSUR_API = '/klausur-api'

 export default function OcrOverlayPage() {
-  const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
+  const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi' | 'kombi-compare'>('pipeline')
  const [currentStep, setCurrentStep] = useState(0)
  const [sessionId, setSessionId] = useState<string | null>(null)
  const [sessionName, setSessionName] = useState<string>('')
@@ -63,14 +64,15 @@ export default function OcrOverlayPage() {
      setSessionName(data.name || data.filename || '')
      setActiveCategory(data.document_category || undefined)

-      // Check if this session was processed with paddle_direct or kombi
+      // Check if this session was processed with paddle_direct, kombi, or rapid_kombi
      const ocrEngine = data.word_result?.ocr_engine
      const isPaddleDirect = ocrEngine === 'paddle_direct'
      const isKombi = ocrEngine === 'kombi'
+      const isRapidKombi = ocrEngine === 'rapid_kombi'

-      if (isPaddleDirect || isKombi) {
-        const m = isKombi ? 'kombi' : 'paddle-direct'
-        const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
+      if (isPaddleDirect || isKombi || isRapidKombi) {
+        const m = isKombi ? 'kombi' : isPaddleDirect ? 'paddle-direct' : 'kombi-compare'
+        const baseSteps = isKombi ? KOMBI_STEPS : isRapidKombi ? KOMBI_COMPARE_STEPS : PADDLE_DIRECT_STEPS
        setMode(m)
        setSteps(
          baseSteps.map((s, i) => ({
@@ -105,7 +107,7 @@ export default function OcrOverlayPage() {
      if (sessionId === sid) {
        setSessionId(null)
        setCurrentStep(0)
-        const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
+        const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
        setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
      }
    } catch (e) {
@@ -162,7 +164,7 @@ export default function OcrOverlayPage() {
  const handleNext = () => {
    if (currentStep >= steps.length - 1) {
      // Last step completed — return to session list
-      const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
+      const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
      setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
      setCurrentStep(0)
      setSessionId(null)
@@ -191,7 +193,7 @@ export default function OcrOverlayPage() {
    setSessionId(null)
    setSessionName('')
    setCurrentStep(0)
-    const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
+    const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
    setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
  }

@@ -230,7 +232,7 @@ export default function OcrOverlayPage() {
  }, [sessionId, goToStep])

  const renderStep = () => {
-    if (mode === 'paddle-direct' || mode === 'kombi') {
+    if (mode === 'paddle-direct' || mode === 'kombi' || mode === 'kombi-compare') {
      switch (currentStep) {
        case 0:
          return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
@@ -241,6 +243,9 @@ export default function OcrOverlayPage() {
        case 3:
          return <StepCrop sessionId={sessionId} onNext={handleNext} />
        case 4:
+          if (mode === 'kombi-compare') {
+            return <KombiCompareStep sessionId={sessionId} onNext={handleNext} />
+          }
          return mode === 'kombi' ? (
            <PaddleDirectStep
              sessionId={sessionId}
@@ -514,6 +519,22 @@ export default function OcrOverlayPage() {
        >
          Kombi (5 Schritte)
        </button>
+        <button
+          onClick={() => {
+            if (mode === 'kombi-compare') return
+            setMode('kombi-compare')
+            setCurrentStep(0)
+            setSessionId(null)
+            setSteps(KOMBI_COMPARE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
+          }}
+          className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
+            mode === 'kombi-compare'
+              ? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
+              : 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
+          }`}
+        >
+          Vergleich (5 Schritte)
+        </button>
      </div>

      <PipelineStepper
@@ -72,6 +72,18 @@ export const KOMBI_STEPS: PipelineStep[] = [
  { id: 'kombi', name: 'Paddle + Tesseract', icon: '🔀', status: 'pending' },
 ]

+/**
+ * 5-step pipeline for Kombi-Vergleich mode (Paddle-Kombi vs Rapid-Kombi side-by-side).
+ * Same preprocessing, then both kombi engines run in parallel and are shown side-by-side.
+ */
+export const KOMBI_COMPARE_STEPS: PipelineStep[] = [
+  { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
+  { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
+  { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
+  { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
+  { id: 'kombi-compare', name: 'Kombi-Vergleich', icon: '⚖️', status: 'pending' },
+]
+
 /** Map from DB step to overlay UI step index */
 export function dbStepToOverlayUi(dbStep: number): number {
  // DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
@@ -0,0 +1,231 @@
+'use client'
+
+import { useState } from 'react'
+import { OverlayReconstruction } from './OverlayReconstruction'
+import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
+
+const KLAUSUR_API = '/klausur-api'
+
+type Phase = 'idle' | 'running' | 'compare'
+
+interface KombiResult {
+  cells: GridCell[]
+  image_width: number
+  image_height: number
+  duration_seconds: number
+  summary: {
+    total_cells: number
+    non_empty_cells: number
+    merged_words: number
+    [key: string]: unknown
+  }
+  [key: string]: unknown
+}
+
+interface KombiCompareStepProps {
+  sessionId: string | null
+  onNext: () => void
+}
+
+export function KombiCompareStep({ sessionId, onNext }: KombiCompareStepProps) {
+  const [phase, setPhase] = useState<Phase>('idle')
+  const [error, setError] = useState('')
+  const [paddleResult, setPaddleResult] = useState<KombiResult | null>(null)
+  const [rapidResult, setRapidResult] = useState<KombiResult | null>(null)
+  const [paddleStatus, setPaddleStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
+  const [rapidStatus, setRapidStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending')
+
+  const runBothEngines = async () => {
+    if (!sessionId) return
+    setPhase('running')
+    setError('')
+    setPaddleStatus('running')
+    setRapidStatus('running')
+    setPaddleResult(null)
+    setRapidResult(null)
+
+    const fetchEngine = async (
+      endpoint: string,
+      setResult: (r: KombiResult) => void,
+      setStatus: (s: 'pending' | 'running' | 'done' | 'error') => void,
+    ) => {
+      try {
+        const res = await fetch(
+          `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`,
+          { method: 'POST' },
+        )
+        if (!res.ok) {
+          const body = await res.json().catch(() => ({}))
+          throw new Error(body.detail || `HTTP ${res.status}`)
+        }
+        const data = await res.json()
+        setResult(data)
+        setStatus('done')
+      } catch (e: unknown) {
+        setStatus('error')
+        throw e
+      }
+    }
+
+    try {
+      await Promise.all([
+        fetchEngine('paddle-kombi', setPaddleResult, setPaddleStatus),
+        fetchEngine('rapid-kombi', setRapidResult, setRapidStatus),
+      ])
+      setPhase('compare')
+    } catch (e: unknown) {
+      // At least one failed — still show compare if the other succeeded
+      setError(e instanceof Error ? e.message : String(e))
+      setPhase('compare')
+    }
+  }
+
+  if (phase === 'idle') {
+    return (
+      <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8 text-center">
+        <div className="text-4xl mb-3">⚖️</div>
+        <h3 className="text-lg font-semibold text-gray-800 dark:text-gray-200 mb-2">
+          Kombi-Vergleich
+        </h3>
+        <p className="text-sm text-gray-500 dark:text-gray-400 mb-6 max-w-lg mx-auto">
+          Beide Kombi-Modi (Paddle + Tesseract vs. RapidOCR + Tesseract) laufen parallel.
+          Die Ergebnisse werden nebeneinander angezeigt, damit die Qualitaet direkt verglichen werden kann.
+        </p>
+        <button
+          onClick={runBothEngines}
+          disabled={!sessionId}
+          className="px-5 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed font-medium"
+        >
+          Beide Kombi-Modi starten
+        </button>
+      </div>
+    )
+  }
+
+  if (phase === 'running' && !paddleResult && !rapidResult) {
+    return (
+      <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-8">
+        <div className="flex items-center justify-center gap-8">
+          <EngineStatusCard label="Paddle + Tesseract" status={paddleStatus} />
+          <EngineStatusCard label="RapidOCR + Tesseract" status={rapidStatus} />
+        </div>
+      </div>
+    )
+  }
+
+  // compare phase
+  return (
+    <div className="space-y-4">
+      {error && (
+        <div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-3 text-sm text-red-700 dark:text-red-300">
+          {error}
+        </div>
+      )}
+
+      <div className="flex items-center justify-between">
+        <h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
+          Side-by-Side Vergleich
+        </h3>
+        <button
+          onClick={() => { setPhase('idle'); setPaddleResult(null); setRapidResult(null) }}
+          className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
+        >
+          Neu starten
+        </button>
+      </div>
+
+      <div className="grid grid-cols-2 gap-4">
+        {/* Left: Paddle-Kombi */}
+        <div className="space-y-2">
+          <div className="flex items-center gap-2">
+            <span className="text-sm font-medium text-gray-700 dark:text-gray-300">
+              🔀 Paddle + Tesseract
+            </span>
+            {paddleStatus === 'error' && (
+              <span className="text-xs text-red-500">Fehler</span>
+            )}
+          </div>
+          {paddleResult ? (
+            <>
+              <OverlayReconstruction
+                sessionId={sessionId}
+                onNext={() => {}}
+                wordResultOverride={paddleResult}
+              />
+              <StatsBar result={paddleResult} engine="Paddle-Kombi" />
+            </>
+          ) : (
+            <div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
+              {paddleStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
+            </div>
+          )}
+        </div>
+
+        {/* Right: Rapid-Kombi */}
+        <div className="space-y-2">
+          <div className="flex items-center gap-2">
+            <span className="text-sm font-medium text-gray-700 dark:text-gray-300">
+              ⚡ RapidOCR + Tesseract
+            </span>
+            {rapidStatus === 'error' && (
+              <span className="text-xs text-red-500">Fehler</span>
+            )}
+          </div>
+          {rapidResult ? (
+            <>
+              <OverlayReconstruction
+                sessionId={sessionId}
+                onNext={() => {}}
+                wordResultOverride={rapidResult}
+              />
+              <StatsBar result={rapidResult} engine="Rapid-Kombi" />
+            </>
+          ) : (
+            <div className="bg-gray-50 dark:bg-gray-900 rounded-lg p-12 text-center text-sm text-gray-400">
+              {rapidStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'}
+            </div>
+          )}
+        </div>
+      </div>
+
+      <div className="flex justify-end">
+        <button
+          onClick={onNext}
+          className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
+        >
+          Fertig
+        </button>
+      </div>
+    </div>
+  )
+}
+
+function EngineStatusCard({ label, status }: { label: string; status: string }) {
+  return (
+    <div className="flex items-center gap-3 bg-gray-50 dark:bg-gray-900 rounded-lg px-5 py-4">
+      {status === 'running' && (
+        <div className="w-5 h-5 border-2 border-teal-400 border-t-transparent rounded-full animate-spin" />
+      )}
+      {status === 'done' && <span className="text-green-500 text-lg">✓</span>}
+      {status === 'error' && <span className="text-red-500 text-lg">✗</span>}
+      {status === 'pending' && <span className="text-gray-400 text-lg">○</span>}
+      <span className="text-sm text-gray-700 dark:text-gray-300">{label}</span>
+    </div>
+  )
+}
+
+function StatsBar({ result, engine }: { result: KombiResult; engine: string }) {
+  const nonEmpty = result.summary?.non_empty_cells ?? 0
+  const totalCells = result.summary?.total_cells ?? 0
+  const merged = result.summary?.merged_words ?? 0
+  const duration = result.duration_seconds ?? 0
+
+  return (
+    <div className="flex items-center gap-3 text-[11px] text-gray-500 dark:text-gray-400 bg-gray-50 dark:bg-gray-900 rounded-lg px-3 py-2">
+      <span className="font-medium text-gray-600 dark:text-gray-300">{engine}</span>
+      <span>{merged} Woerter</span>
+      <span>{nonEmpty}/{totalCells} Zellen</span>
+      <span>{duration.toFixed(2)}s</span>
+    </div>
+  )
+}
@@ -10,6 +10,8 @@ const KLAUSUR_API = '/klausur-api'
 interface OverlayReconstructionProps {
  sessionId: string | null
  onNext: () => void
+  /** When set, use this data directly instead of fetching from the session API. */
+  wordResultOverride?: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }
 }

 interface EditableCell {
@@ -24,7 +26,7 @@ interface EditableCell {

 type UndoAction = { cellId: string; oldText: string; newText: string }

-export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructionProps) {
+export function OverlayReconstruction({ sessionId, onNext, wordResultOverride }: OverlayReconstructionProps) {
  const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
  const [error, setError] = useState('')
  const [cells, setCells] = useState<EditableCell[]>([])
@@ -78,10 +80,39 @@ export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructi

  // Load session data
  useEffect(() => {
+    if (wordResultOverride) {
+      applyWordResult(wordResultOverride)
+      return
+    }
    if (!sessionId) return
    loadSessionData()
  // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [sessionId])
+  }, [sessionId, wordResultOverride])
+
+  const applyWordResult = (wordResult: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }) => {
+    const rawGridCells: GridCell[] = wordResult.cells || []
+    setGridCells(rawGridCells)
+
+    const editableCells: EditableCell[] = rawGridCells.map(c => ({
+      cellId: c.cell_id,
+      text: c.text,
+      originalText: c.text,
+      bboxPct: c.bbox_pct,
+      colType: c.col_type,
+      rowIndex: c.row_index,
+      colIndex: c.col_index,
+    }))
+    setCells(editableCells)
+    setEditedTexts(new Map())
+    setUndoStack([])
+    setRedoStack([])
+
+    if (wordResult.image_width && wordResult.image_height) {
+      setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
+    }
+
+    setStatus('ready')
+  }

  const loadSessionData = async () => {
    if (!sessionId) return
@@ -98,33 +129,11 @@ export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructi
        return
      }

-      const rawGridCells: GridCell[] = wordResult.cells || []
-      setGridCells(rawGridCells)
-
-      const editableCells: EditableCell[] = rawGridCells.map(c => ({
-        cellId: c.cell_id,
-        text: c.text,
-        originalText: c.text,
-        bboxPct: c.bbox_pct,
-        colType: c.col_type,
-        rowIndex: c.row_index,
-        colIndex: c.col_index,
-      }))
-      setCells(editableCells)
-      setEditedTexts(new Map())
-      setUndoStack([])
-      setRedoStack([])
+      applyWordResult(wordResult as unknown as { cells: GridCell[]; image_width: number; image_height: number })

      // Load rows
      const rowResult: RowResult | undefined = data.row_result
      if (rowResult?.rows) setRows(rowResult.rows)
-
-      // Store image dimensions
-      if (wordResult.image_width && wordResult.image_height) {
-        setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
-      }
-
-      setStatus('ready')
    } catch (e: unknown) {
      setError(e instanceof Error ? e.message : String(e))
      setStatus('error')
@@ -2976,6 +2976,141 @@ async def paddle_kombi(session_id: str):
    return {"session_id": session_id, **word_result}


+@router.post("/sessions/{session_id}/rapid-kombi")
+async def rapid_kombi(session_id: str):
+    """Run RapidOCR + Tesseract on the preprocessed image and merge results.
+
+    Same merge logic as paddle-kombi, but uses local RapidOCR (ONNX Runtime)
+    instead of remote PaddleOCR service.
+    """
+    img_png = await get_session_image(session_id, "cropped")
+    if not img_png:
+        img_png = await get_session_image(session_id, "dewarped")
+    if not img_png:
+        img_png = await get_session_image(session_id, "original")
+    if not img_png:
+        raise HTTPException(status_code=404, detail="No image found for this session")
+
+    img_arr = np.frombuffer(img_png, dtype=np.uint8)
+    img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR)
+    if img_bgr is None:
+        raise HTTPException(status_code=400, detail="Failed to decode image")
+
+    img_h, img_w = img_bgr.shape[:2]
+
+    from cv_ocr_engines import ocr_region_rapid
+    from cv_vocab_types import PageRegion
+
+    t0 = time.time()
+
+    # --- RapidOCR (local, synchronous) ---
+    full_region = PageRegion(
+        type="full_page", x=0, y=0, width=img_w, height=img_h,
+    )
+    rapid_words = ocr_region_rapid(img_bgr, full_region)
+    if not rapid_words:
+        rapid_words = []
+
+    # --- Tesseract ---
+    from PIL import Image
+    import pytesseract
+
+    pil_img = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
+    data = pytesseract.image_to_data(
+        pil_img, lang="eng+deu",
+        config="--psm 6 --oem 3",
+        output_type=pytesseract.Output.DICT,
+    )
+    tess_words = []
+    for i in range(len(data["text"])):
+        text = str(data["text"][i]).strip()
+        conf_raw = str(data["conf"][i])
+        conf = int(conf_raw) if conf_raw.lstrip("-").isdigit() else -1
+        if not text or conf < 20:
+            continue
+        tess_words.append({
+            "text": text,
+            "left": data["left"][i],
+            "top": data["top"][i],
+            "width": data["width"][i],
+            "height": data["height"][i],
+            "conf": conf,
+        })
+
+    # --- Split multi-word RapidOCR boxes into individual words ---
+    rapid_words_split = _split_paddle_multi_words(rapid_words)
+    logger.info(
+        "rapid_kombi: split %d rapid boxes → %d individual words",
+        len(rapid_words), len(rapid_words_split),
+    )
+
+    # --- Merge ---
+    if not rapid_words_split and not tess_words:
+        raise HTTPException(status_code=400, detail="Both OCR engines returned no words")
+
+    merged_words = _merge_paddle_tesseract(rapid_words_split, tess_words)
+
+    cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h)
+    duration = time.time() - t0
+
+    for cell in cells:
+        cell["ocr_engine"] = "rapid_kombi"
+
+    n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
+    n_cols = len(columns_meta)
+    col_types = {c.get("type") for c in columns_meta}
+    is_vocab = bool(col_types & {"column_en", "column_de"})
+
+    word_result = {
+        "cells": cells,
+        "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
+        "columns_used": columns_meta,
+        "layout": "vocab" if is_vocab else "generic",
+        "image_width": img_w,
+        "image_height": img_h,
+        "duration_seconds": round(duration, 2),
+        "ocr_engine": "rapid_kombi",
+        "grid_method": "rapid_kombi",
+        "raw_rapid_words": rapid_words,
+        "raw_rapid_words_split": rapid_words_split,
+        "raw_tesseract_words": tess_words,
+        "summary": {
+            "total_cells": len(cells),
+            "non_empty_cells": sum(1 for c in cells if c.get("text")),
+            "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
+            "rapid_words": len(rapid_words),
+            "rapid_words_split": len(rapid_words_split),
+            "tesseract_words": len(tess_words),
+            "merged_words": len(merged_words),
+        },
+    }
+
+    await update_session_db(
+        session_id,
+        word_result=word_result,
+        cropped_png=img_png,
+        current_step=8,
+    )
+
+    logger.info(
+        "rapid_kombi session %s: %d cells (%d rows, %d cols) in %.2fs "
+        "[rapid=%d, tess=%d, merged=%d]",
+        session_id, len(cells), n_rows, n_cols, duration,
+        len(rapid_words), len(tess_words), len(merged_words),
+    )
+
+    await _append_pipeline_log(session_id, "rapid_kombi", {
+        "total_cells": len(cells),
+        "non_empty_cells": word_result["summary"]["non_empty_cells"],
+        "rapid_words": len(rapid_words),
+        "tesseract_words": len(tess_words),
+        "merged_words": len(merged_words),
+        "ocr_engine": "rapid_kombi",
+    }, duration_ms=int(duration * 1000))
+
+    return {"session_id": session_id, **word_result}
+
+
 class WordGroundTruthRequest(BaseModel):
    is_correct: bool
    corrected_entries: Optional[List[Dict[str, Any]]] = None
@@ -449,6 +449,67 @@ class TestSpatialOverlapDedup:
        assert len(merged) == 2


+class TestRapidOcrMergeCompatibility:
+    """Test that _merge_paddle_tesseract works with RapidOCR word format.
+
+    RapidOCR words include an extra 'region_type' key that PaddleOCR words
+    don't have. The merge logic must tolerate this extra field.
+    """
+
+    def _rapid_word(self, text, left, top, width=60, height=20, conf=80, region_type="full_page"):
+        """Create a word dict in RapidOCR format (has region_type)."""
+        return {
+            "text": text,
+            "left": left,
+            "top": top,
+            "width": width,
+            "height": height,
+            "conf": conf,
+            "region_type": region_type,
+        }
+
+    def test_rapid_words_merge_with_tesseract(self):
+        """RapidOCR words (with region_type) merge correctly with Tesseract words."""
+        rapid = [
+            self._rapid_word("apple", 50, 10, 70, 20, conf=90),
+            self._rapid_word("Apfel", 300, 10, 60, 20, conf=85),
+        ]
+        tess = [
+            _word("apple", 52, 11, 68, 19, conf=75),
+            _word("Apfel", 298, 12, 62, 18, conf=70),
+        ]
+        merged = _merge_paddle_tesseract(rapid, tess)
+        assert len(merged) == 2
+        texts = sorted(w["text"] for w in merged)
+        assert texts == ["Apfel", "apple"]
+
+    def test_rapid_words_split_then_merge(self):
+        """Split + merge works with RapidOCR multi-word boxes."""
+        rapid_raw = [
+            self._rapid_word("More than 200", 944, 287, 160, 29, conf=96),
+        ]
+        tess = [
+            _word("More", 948, 292, 60, 20, conf=90),
+            _word("than", 1017, 291, 49, 21, conf=96),
+            _word("200", 1076, 292, 43, 20, conf=93),
+        ]
+        rapid_split = _split_paddle_multi_words(rapid_raw)
+        assert len(rapid_split) == 3
+        merged = _merge_paddle_tesseract(rapid_split, tess)
+        texts = [w["text"] for w in merged]
+        assert texts.count("More") == 1
+        assert texts.count("than") == 1
+        assert texts.count("200") == 1
+
+    def test_region_type_preserved_in_unmatched(self):
+        """Unmatched RapidOCR words keep their region_type field."""
+        rapid = [self._rapid_word("unique", 500, 10, 80, 20, conf=90)]
+        tess = []  # No Tesseract words
+        merged = _merge_paddle_tesseract(rapid, tess)
+        assert len(merged) == 1
+        assert merged[0]["text"] == "unique"
+
+
 class TestSplitThenMerge:
    """Test the full pipeline: split multi-word Paddle boxes, then merge."""