feat(ocr-pipeline): generic cell-grid with optional vocab mapping

Extract build_cell_grid() as layout-agnostic foundation from build_word_grid(). Step 5 now produces a generic cell grid (columns x rows) and auto-detects whether vocab layout is present. Frontend dynamically switches between vocab table (EN/DE/Example) and generic cell table based on layout type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 17:22:56 +01:00
parent 3bcb7aa638
commit 27b895a848
4 changed files with 802 additions and 301 deletions
@@ -29,7 +29,7 @@ export interface SessionInfo {
  dewarp_result?: DewarpResult
  column_result?: ColumnResult
  row_result?: RowResult
-  word_result?: WordResult
+  word_result?: GridResult
 }

 export interface DeskewResult {
@@ -124,6 +124,49 @@ export interface WordBbox {
  h: number
 }

+export interface GridCell {
+  cell_id: string          // "R03_C1"
+  row_index: number
+  col_index: number
+  col_type: string
+  text: string
+  confidence: number
+  bbox_px: WordBbox
+  bbox_pct: WordBbox
+  ocr_engine?: string
+  status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
+}
+
+export interface ColumnMeta {
+  index: number
+  type: string
+  x: number
+  width: number
+}
+
+export interface GridResult {
+  cells: GridCell[]
+  grid_shape: { rows: number; cols: number; total_cells: number }
+  columns_used: ColumnMeta[]
+  layout: 'vocab' | 'generic'
+  image_width: number
+  image_height: number
+  duration_seconds: number
+  ocr_engine?: string
+  vocab_entries?: WordEntry[]   // Only when layout='vocab'
+  entries?: WordEntry[]         // Backwards compat alias for vocab_entries
+  entry_count?: number
+  summary: {
+    total_cells: number
+    non_empty_cells: number
+    low_confidence: number
+    // Only when layout='vocab':
+    total_entries?: number
+    with_english?: number
+    with_german?: number
+  }
+}
+
 export interface WordEntry {
  row_index: number
  english: string
@@ -137,6 +180,7 @@ export interface WordEntry {
  status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
 }

+/** @deprecated Use GridResult instead */
 export interface WordResult {
  entries: WordEntry[]
  entry_count: number
@@ -1,13 +1,13 @@
 'use client'

 import { useCallback, useEffect, useRef, useState } from 'react'
-import type { WordResult, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
+import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'

 const KLAUSUR_API = '/klausur-api'

 /** Render text with \n as line breaks */
 function MultilineText({ text }: { text: string }) {
-  if (!text) return <span className="text-gray-300 dark:text-gray-600">—</span>
+  if (!text) return <span className="text-gray-300 dark:text-gray-600">&mdash;</span>
  const lines = text.split('\n')
  if (lines.length === 1) return <>{text}</>
  return <>{lines.map((line, i) => (
@@ -15,6 +15,31 @@ function MultilineText({ text }: { text: string }) {
  ))}</>
 }

+/** Column type → human-readable header */
+function colTypeLabel(colType: string): string {
+  const labels: Record<string, string> = {
+    column_en: 'English',
+    column_de: 'Deutsch',
+    column_example: 'Example',
+    column_text: 'Text',
+    column_marker: 'Marker',
+    page_ref: 'Seite',
+  }
+  return labels[colType] || colType.replace('column_', '')
+}
+
+/** Column type → color class */
+function colTypeColor(colType: string): string {
+  const colors: Record<string, string> = {
+    column_en: 'text-blue-600 dark:text-blue-400',
+    column_de: 'text-green-600 dark:text-green-400',
+    column_example: 'text-orange-600 dark:text-orange-400',
+    column_text: 'text-purple-600 dark:text-purple-400',
+    column_marker: 'text-gray-500 dark:text-gray-400',
+  }
+  return colors[colType] || 'text-gray-600 dark:text-gray-400'
+}
+
 interface StepWordRecognitionProps {
  sessionId: string | null
  onNext: () => void
@@ -22,7 +47,7 @@ interface StepWordRecognitionProps {
 }

 export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) {
-  const [wordResult, setWordResult] = useState<WordResult | null>(null)
+  const [gridResult, setGridResult] = useState<GridResult | null>(null)
  const [detecting, setDetecting] = useState(false)
  const [error, setError] = useState<string | null>(null)
  const [gtNotes, setGtNotes] = useState('')
@@ -31,6 +56,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
  // Step-through labeling state
  const [activeIndex, setActiveIndex] = useState(0)
  const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
+  const [editedCells, setEditedCells] = useState<GridCell[]>([])
  const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
  const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid'>('auto')
  const [usedEngine, setUsedEngine] = useState<string>('')
@@ -38,6 +64,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec

  const enRef = useRef<HTMLInputElement>(null)

+  const isVocab = gridResult?.layout === 'vocab'
+
  useEffect(() => {
    if (!sessionId) return

@@ -47,9 +75,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
        if (res.ok) {
          const info = await res.json()
          if (info.word_result) {
-            setWordResult(info.word_result)
-            setUsedEngine(info.word_result.ocr_engine || '')
-            initEntries(info.word_result.entries)
+            applyGridResult(info.word_result)
            return
          }
        }
@@ -63,6 +89,17 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
  // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [sessionId])

+  const applyGridResult = (data: GridResult) => {
+    setGridResult(data)
+    setUsedEngine(data.ocr_engine || '')
+    if (data.layout === 'vocab' && data.entries) {
+      initEntries(data.entries)
+    }
+    if (data.cells) {
+      setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' })))
+    }
+  }
+
  const initEntries = (entries: WordEntry[]) => {
    setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
    setActiveIndex(0)
@@ -82,21 +119,20 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
        throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
      }
      const data = await res.json()
-      setWordResult(data)
-      setUsedEngine(data.ocr_engine || eng)
-      initEntries(data.entries)
+      applyGridResult(data)
    } catch (e) {
      setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
    } finally {
      setDetecting(false)
    }
-  }, [sessionId, ocrEngine])
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [sessionId, ocrEngine, pronunciation])

  const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
    if (!sessionId) return
    const gt: WordGroundTruth = {
      is_correct: isCorrect,
-      corrected_entries: isCorrect ? undefined : editedEntries,
+      corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined),
      notes: gtNotes || undefined,
    }
    try {
@@ -109,35 +145,68 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
    } catch (e) {
      console.error('Ground truth save failed:', e)
    }
-  }, [sessionId, gtNotes, editedEntries])
+  }, [sessionId, gtNotes, editedEntries, isVocab])

-  // Step-through: update entry field
+  // Vocab mode: update entry field
  const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
    setEditedEntries(prev => prev.map((e, i) =>
      i === index ? { ...e, [field]: value, status: 'edited' as const } : e
    ))
  }

+  // Generic mode: update cell text
+  const updateCell = (cellId: string, value: string) => {
+    setEditedCells(prev => prev.map(c =>
+      c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c
+    ))
+  }
+
  // Step-through: confirm current entry
  const confirmEntry = () => {
-    setEditedEntries(prev => prev.map((e, i) =>
-      i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
-    ))
-    if (activeIndex < editedEntries.length - 1) {
+    if (isVocab) {
+      setEditedEntries(prev => prev.map((e, i) =>
+        i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
+      ))
+    } else {
+      // Generic: confirm all cells in this row
+      const rowCells = getRowCells(activeIndex)
+      const cellIds = new Set(rowCells.map(c => c.cell_id))
+      setEditedCells(prev => prev.map(c =>
+        cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
+      ))
+    }
+    const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1
+    if (activeIndex < maxIdx) {
      setActiveIndex(activeIndex + 1)
    }
  }

  // Step-through: skip current entry
  const skipEntry = () => {
-    setEditedEntries(prev => prev.map((e, i) =>
-      i === activeIndex ? { ...e, status: 'skipped' as const } : e
-    ))
-    if (activeIndex < editedEntries.length - 1) {
+    if (isVocab) {
+      setEditedEntries(prev => prev.map((e, i) =>
+        i === activeIndex ? { ...e, status: 'skipped' as const } : e
+      ))
+    }
+    const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1
+    if (activeIndex < maxIdx) {
      setActiveIndex(activeIndex + 1)
    }
  }

+  // Helper: get unique row indices from cells
+  const getUniqueRowCount = () => {
+    if (!editedCells.length) return 0
+    return new Set(editedCells.map(c => c.row_index)).size
+  }
+
+  // Helper: get cells for a given row index (by position in sorted unique rows)
+  const getRowCells = (rowPosition: number) => {
+    const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b)
+    const rowIdx = uniqueRows[rowPosition]
+    return editedCells.filter(c => c.row_index === rowIdx)
+  }
+
  // Focus english input when active entry changes in labeling mode
  useEffect(() => {
    if (mode === 'labeling' && enRef.current) {
@@ -152,8 +221,6 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
      if (e.key === 'Enter' && !e.shiftKey) {
        e.preventDefault()
        confirmEntry()
-      } else if (e.key === 'Tab' && !e.shiftKey) {
-        // Let Tab move between fields naturally unless on last field
      } else if (e.key === 'ArrowDown' && e.ctrlKey) {
        e.preventDefault()
        skipEntry()
@@ -165,7 +232,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
    window.addEventListener('keydown', handler)
    return () => window.removeEventListener('keydown', handler)
  // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [mode, activeIndex, editedEntries])
+  }, [mode, activeIndex, editedEntries, editedCells])

  if (!sessionId) {
    return (
@@ -200,9 +267,24 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
    return map[status || 'pending'] || map.pending
  }

-  const summary = wordResult?.summary
-  const confirmedCount = editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
-  const totalCount = editedEntries.length
+  const summary = gridResult?.summary
+  const columnsUsed = gridResult?.columns_used || []
+  const gridShape = gridResult?.grid_shape
+
+  // Counts for labeling progress
+  const confirmedCount = isVocab
+    ? editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
+    : editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').length
+  const totalCount = isVocab ? editedEntries.length : getUniqueRowCount()
+
+  // Group cells by row for generic table display
+  const cellsByRow: Map<number, GridCell[]> = new Map()
+  for (const cell of editedCells) {
+    const existing = cellsByRow.get(cell.row_index) || []
+    existing.push(cell)
+    cellsByRow.set(cell.row_index, existing)
+  }
+  const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b)

  return (
    <div className="space-y-4">
@@ -214,9 +296,26 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
        </div>
      )}

-      {/* Mode toggle */}
-      {wordResult && (
+      {/* Layout badge + Mode toggle */}
+      {gridResult && (
        <div className="flex items-center gap-2">
+          {/* Layout badge */}
+          <span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
+            isVocab
+              ? 'bg-indigo-100 dark:bg-indigo-900/30 text-indigo-700 dark:text-indigo-300'
+              : 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
+          }`}>
+            {isVocab ? 'Vokabel-Layout' : 'Generisch'}
+          </span>
+
+          {gridShape && (
+            <span className="text-[10px] text-gray-400">
+              {gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen
+            </span>
+          )}
+
+          <div className="flex-1" />
+
          <button
            onClick={() => setMode('overview')}
            className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
@@ -240,7 +339,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
        </div>
      )}

-      {/* Overview mode: side-by-side images + entry list */}
+      {/* Overview mode */}
      {mode === 'overview' && (
        <>
          {/* Images: overlay vs clean */}
@@ -250,7 +349,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
                Mit Grid-Overlay
              </div>
              <div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
-                {wordResult ? (
+                {gridResult ? (
                  // eslint-disable-next-line @next/next/no-img-element
                  <img
                    src={`${overlayUrl}?t=${Date.now()}`}
@@ -280,25 +379,43 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
          </div>

          {/* Result summary */}
-          {wordResult && summary && (
+          {gridResult && summary && (
            <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
              <div className="flex items-center justify-between">
                <h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
-                  Ergebnis: {summary.total_entries} Eintraege erkannt
+                  {isVocab
+                    ? `Ergebnis: ${summary.total_entries ?? 0} Vokabel-Eintraege erkannt`
+                    : `Ergebnis: ${summary.non_empty_cells}/${summary.total_cells} Zellen mit Text`
+                  }
                </h4>
                <span className="text-xs text-gray-400">
-                  {wordResult.duration_seconds}s
+                  {gridResult.duration_seconds}s
                </span>
              </div>

              {/* Summary badges */}
              <div className="flex gap-2 flex-wrap">
-                <span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
-                  EN: {summary.with_english}
-                </span>
-                <span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
-                  DE: {summary.with_german}
-                </span>
+                {isVocab ? (
+                  <>
+                    <span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
+                      EN: {summary.with_english ?? 0}
+                    </span>
+                    <span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
+                      DE: {summary.with_german ?? 0}
+                    </span>
+                  </>
+                ) : (
+                  <>
+                    <span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
+                      Zellen: {summary.non_empty_cells}/{summary.total_cells}
+                    </span>
+                    {columnsUsed.map((col, i) => (
+                      <span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
+                        C{col.index}: {colTypeLabel(col.type)}
+                      </span>
+                    ))}
+                  </>
+                )}
                {summary.low_confidence > 0 && (
                  <span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
                    Unsicher: {summary.low_confidence}
@@ -306,57 +423,110 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
                )}
              </div>

-              {/* Entry table */}
+              {/* Entry/Cell table */}
              <div className="max-h-80 overflow-y-auto">
-                <table className="w-full text-xs">
-                  <thead className="sticky top-0 bg-white dark:bg-gray-800">
-                    <tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
-                      <th className="py-1 pr-2 w-8">#</th>
-                      <th className="py-1 pr-2">English</th>
-                      <th className="py-1 pr-2">Deutsch</th>
-                      <th className="py-1 pr-2">Example</th>
-                      <th className="py-1 w-12 text-right">Conf</th>
-                    </tr>
-                  </thead>
-                  <tbody>
-                    {editedEntries.map((entry, idx) => (
-                      <tr
-                        key={idx}
-                        className={`border-b dark:border-gray-700/50 ${
-                          idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
-                        }`}
-                        onClick={() => { setActiveIndex(idx); setMode('labeling') }}
-                      >
-                        <td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
-                        <td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
-                          <MultilineText text={entry.english} />
-                        </td>
-                        <td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
-                          <MultilineText text={entry.german} />
-                        </td>
-                        <td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
-                          <MultilineText text={entry.example} />
-                        </td>
-                        <td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
-                          {entry.confidence}%
-                        </td>
+                {isVocab ? (
+                  /* Vocab table: EN/DE/Example columns */
+                  <table className="w-full text-xs">
+                    <thead className="sticky top-0 bg-white dark:bg-gray-800">
+                      <tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
+                        <th className="py-1 pr-2 w-8">#</th>
+                        <th className="py-1 pr-2">English</th>
+                        <th className="py-1 pr-2">Deutsch</th>
+                        <th className="py-1 pr-2">Example</th>
+                        <th className="py-1 w-12 text-right">Conf</th>
                      </tr>
-                    ))}
-                  </tbody>
-                </table>
+                    </thead>
+                    <tbody>
+                      {editedEntries.map((entry, idx) => (
+                        <tr
+                          key={idx}
+                          className={`border-b dark:border-gray-700/50 ${
+                            idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
+                          }`}
+                          onClick={() => { setActiveIndex(idx); setMode('labeling') }}
+                        >
+                          <td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
+                          <td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
+                            <MultilineText text={entry.english} />
+                          </td>
+                          <td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
+                            <MultilineText text={entry.german} />
+                          </td>
+                          <td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
+                            <MultilineText text={entry.example} />
+                          </td>
+                          <td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
+                            {entry.confidence}%
+                          </td>
+                        </tr>
+                      ))}
+                    </tbody>
+                  </table>
+                ) : (
+                  /* Generic table: dynamic columns from columns_used */
+                  <table className="w-full text-xs">
+                    <thead className="sticky top-0 bg-white dark:bg-gray-800">
+                      <tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
+                        <th className="py-1 pr-2 w-12">Zeile</th>
+                        {columnsUsed.map((col, i) => (
+                          <th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
+                            {colTypeLabel(col.type)}
+                          </th>
+                        ))}
+                        <th className="py-1 w-12 text-right">Conf</th>
+                      </tr>
+                    </thead>
+                    <tbody>
+                      {sortedRowIndices.map((rowIdx, posIdx) => {
+                        const rowCells = cellsByRow.get(rowIdx) || []
+                        const avgConf = rowCells.length
+                          ? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
+                          : 0
+                        return (
+                          <tr
+                            key={rowIdx}
+                            className={`border-b dark:border-gray-700/50 ${
+                              posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
+                            }`}
+                            onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
+                          >
+                            <td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
+                              R{String(rowIdx).padStart(2, '0')}
+                            </td>
+                            {columnsUsed.map((col) => {
+                              const cell = rowCells.find(c => c.col_index === col.index)
+                              return (
+                                <td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
+                                  <MultilineText text={cell?.text || ''} />
+                                </td>
+                              )
+                            })}
+                            <td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
+                              {avgConf}%
+                            </td>
+                          </tr>
+                        )
+                      })}
+                    </tbody>
+                  </table>
+                )}
              </div>
            </div>
          )}
        </>
      )}

-      {/* Labeling mode: image crop + editable fields */}
-      {mode === 'labeling' && editedEntries.length > 0 && (
+      {/* Labeling mode */}
+      {mode === 'labeling' && (isVocab ? editedEntries.length > 0 : editedCells.length > 0) && (
        <div className="grid grid-cols-3 gap-4">
          {/* Left 2/3: Image with highlighted active row */}
          <div className="col-span-2">
            <div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
-              Eintrag {activeIndex + 1} von {editedEntries.length}
+              {isVocab
+                ? `Eintrag ${activeIndex + 1} von ${editedEntries.length}`
+                : `Zeile ${activeIndex + 1} von ${getUniqueRowCount()}`
+              }
            </div>
            <div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
              {/* eslint-disable-next-line @next/next/no-img-element */}
@@ -365,8 +535,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
                alt="Wort-Overlay"
                className="w-full h-auto"
              />
-              {/* Highlight overlay for active entry bbox */}
-              {editedEntries[activeIndex]?.bbox && (
+              {/* Highlight overlay for active row/entry */}
+              {isVocab && editedEntries[activeIndex]?.bbox && (
                <div
                  className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
                  style={{
@@ -377,10 +547,25 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
                  }}
                />
              )}
+              {!isVocab && (() => {
+                const rowCells = getRowCells(activeIndex)
+                return rowCells.map(cell => (
+                  <div
+                    key={cell.cell_id}
+                    className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
+                    style={{
+                      left: `${cell.bbox_pct.x}%`,
+                      top: `${cell.bbox_pct.y}%`,
+                      width: `${cell.bbox_pct.w}%`,
+                      height: `${cell.bbox_pct.h}%`,
+                    }}
+                  />
+                ))
+              })()}
            </div>
          </div>

-          {/* Right 1/3: Editable entry fields */}
+          {/* Right 1/3: Editable fields */}
          <div className="space-y-3">
            {/* Navigation */}
            <div className="flex items-center justify-between">
@@ -391,10 +576,15 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
              >
                Zurueck
              </button>
-              <span className="text-xs text-gray-500">{activeIndex + 1} / {editedEntries.length}</span>
+              <span className="text-xs text-gray-500">
+                {activeIndex + 1} / {isVocab ? editedEntries.length : getUniqueRowCount()}
+              </span>
              <button
-                onClick={() => setActiveIndex(Math.min(editedEntries.length - 1, activeIndex + 1))}
-                disabled={activeIndex >= editedEntries.length - 1}
+                onClick={() => setActiveIndex(Math.min(
+                  (isVocab ? editedEntries.length : getUniqueRowCount()) - 1,
+                  activeIndex + 1
+                ))}
+                disabled={activeIndex >= (isVocab ? editedEntries.length : getUniqueRowCount()) - 1}
                className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
              >
                Weiter
@@ -403,16 +593,31 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec

            {/* Status badge */}
            <div className="flex items-center gap-2">
-              <span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
-                {editedEntries[activeIndex]?.status || 'pending'}
-              </span>
-              <span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
-                {editedEntries[activeIndex]?.confidence}% Konfidenz
-              </span>
+              {isVocab && (
+                <>
+                  <span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
+                    {editedEntries[activeIndex]?.status || 'pending'}
+                  </span>
+                  <span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
+                    {editedEntries[activeIndex]?.confidence}% Konfidenz
+                  </span>
+                </>
+              )}
+              {!isVocab && (() => {
+                const rowCells = getRowCells(activeIndex)
+                const avgConf = rowCells.length
+                  ? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
+                  : 0
+                return (
+                  <span className={`text-xs font-mono ${confColor(avgConf)}`}>
+                    {avgConf}% Konfidenz
+                  </span>
+                )
+              })()}
            </div>

-            {/* Cell crops */}
-            {editedEntries[activeIndex]?.bbox_en && (
+            {/* Cell crops (vocab mode) */}
+            {isVocab && editedEntries[activeIndex]?.bbox_en && (
              <div>
                <div className="text-[10px] font-medium text-blue-500 mb-0.5">EN-Zelle</div>
                <div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
@@ -423,7 +628,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
                </div>
              </div>
            )}
-            {editedEntries[activeIndex]?.bbox_de && (
+            {isVocab && editedEntries[activeIndex]?.bbox_de && (
              <div>
                <div className="text-[10px] font-medium text-green-500 mb-0.5">DE-Zelle</div>
                <div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
@@ -437,34 +642,70 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec

            {/* Editable fields */}
            <div className="space-y-2">
-              <div>
-                <label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
-                <textarea
-                  ref={enRef as any}
-                  rows={Math.max(1, (editedEntries[activeIndex]?.english || '').split('\n').length)}
-                  value={editedEntries[activeIndex]?.english || ''}
-                  onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
-                  className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
-                />
-              </div>
-              <div>
-                <label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
-                <textarea
-                  rows={Math.max(1, (editedEntries[activeIndex]?.german || '').split('\n').length)}
-                  value={editedEntries[activeIndex]?.german || ''}
-                  onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
-                  className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
-                />
-              </div>
-              <div>
-                <label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
-                <textarea
-                  rows={Math.max(1, (editedEntries[activeIndex]?.example || '').split('\n').length)}
-                  value={editedEntries[activeIndex]?.example || ''}
-                  onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
-                  className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
-                />
-              </div>
+              {isVocab ? (
+                /* Vocab mode: EN/DE/Example fields */
+                <>
+                  <div>
+                    <label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
+                    <textarea
+                      ref={enRef as any}
+                      rows={Math.max(1, (editedEntries[activeIndex]?.english || '').split('\n').length)}
+                      value={editedEntries[activeIndex]?.english || ''}
+                      onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
+                      className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
+                    />
+                  </div>
+                  <div>
+                    <label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
+                    <textarea
+                      rows={Math.max(1, (editedEntries[activeIndex]?.german || '').split('\n').length)}
+                      value={editedEntries[activeIndex]?.german || ''}
+                      onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
+                      className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
+                    />
+                  </div>
+                  <div>
+                    <label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
+                    <textarea
+                      rows={Math.max(1, (editedEntries[activeIndex]?.example || '').split('\n').length)}
+                      value={editedEntries[activeIndex]?.example || ''}
+                      onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
+                      className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
+                    />
+                  </div>
+                </>
+              ) : (
+                /* Generic mode: one field per column */
+                <>
+                  {(() => {
+                    const rowCells = getRowCells(activeIndex)
+                    return columnsUsed.map((col) => {
+                      const cell = rowCells.find(c => c.col_index === col.index)
+                      if (!cell) return null
+                      return (
+                        <div key={col.index}>
+                          <div className="flex items-center gap-1 mb-0.5">
+                            <label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
+                              {colTypeLabel(col.type)}
+                            </label>
+                            <span className="text-[9px] text-gray-400">{cell.cell_id}</span>
+                          </div>
+                          {/* Cell crop */}
+                          <div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
+                            <CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
+                          </div>
+                          <textarea
+                            rows={Math.max(1, (cell.text || '').split('\n').length)}
+                            value={cell.text || ''}
+                            onChange={(e) => updateCell(cell.cell_id, e.target.value)}
+                            className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
+                          />
+                        </div>
+                      )
+                    })
+                  })()}
+                </>
+              )}
            </div>

            {/* Action buttons */}
@@ -486,38 +727,61 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
            {/* Shortcuts hint */}
            <div className="text-[10px] text-gray-400 space-y-0.5">
              <div>Enter = Bestaetigen & weiter</div>
-              <div>Ctrl+↓ = Ueberspringen</div>
-              <div>Ctrl+↑ = Zurueck</div>
+              <div>Ctrl+Down = Ueberspringen</div>
+              <div>Ctrl+Up = Zurueck</div>
            </div>

-            {/* Entry list (compact) */}
+            {/* Entry/Row list (compact) */}
            <div className="border-t dark:border-gray-700 pt-2 mt-2">
              <div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
-                Alle Eintraege
+                {isVocab ? 'Alle Eintraege' : 'Alle Zeilen'}
              </div>
              <div className="max-h-48 overflow-y-auto space-y-0.5">
-                {editedEntries.map((entry, idx) => (
-                  <div
-                    key={idx}
-                    onClick={() => setActiveIndex(idx)}
-                    className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
-                      idx === activeIndex
-                        ? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
-                        : 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
-                    }`}
-                  >
-                    <span className="w-4 text-right text-gray-400">{idx + 1}</span>
-                    <span className={`w-2 h-2 rounded-full ${
-                      entry.status === 'confirmed' ? 'bg-green-500' :
-                      entry.status === 'edited' ? 'bg-blue-500' :
-                      entry.status === 'skipped' ? 'bg-orange-400' :
-                      'bg-gray-300 dark:bg-gray-600'
-                    }`} />
-                    <span className="truncate text-gray-600 dark:text-gray-400 font-mono">
-                      {(entry.english || '—').replace(/\n/g, ' ')} → {(entry.german || '—').replace(/\n/g, ' ')}
-                    </span>
-                  </div>
-                ))}
+                {isVocab ? (
+                  editedEntries.map((entry, idx) => (
+                    <div
+                      key={idx}
+                      onClick={() => setActiveIndex(idx)}
+                      className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
+                        idx === activeIndex
+                          ? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
+                          : 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
+                      }`}
+                    >
+                      <span className="w-4 text-right text-gray-400">{idx + 1}</span>
+                      <span className={`w-2 h-2 rounded-full ${
+                        entry.status === 'confirmed' ? 'bg-green-500' :
+                        entry.status === 'edited' ? 'bg-blue-500' :
+                        entry.status === 'skipped' ? 'bg-orange-400' :
+                        'bg-gray-300 dark:bg-gray-600'
+                      }`} />
+                      <span className="truncate text-gray-600 dark:text-gray-400 font-mono">
+                        {(entry.english || '\u2014').replace(/\n/g, ' ')} &rarr; {(entry.german || '\u2014').replace(/\n/g, ' ')}
+                      </span>
+                    </div>
+                  ))
+                ) : (
+                  sortedRowIndices.map((rowIdx, posIdx) => {
+                    const rowCells = cellsByRow.get(rowIdx) || []
+                    const firstText = rowCells.find(c => c.text)?.text || ''
+                    return (
+                      <div
+                        key={rowIdx}
+                        onClick={() => setActiveIndex(posIdx)}
+                        className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
+                          posIdx === activeIndex
+                            ? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
+                            : 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
+                        }`}
+                      >
+                        <span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
+                        <span className="truncate text-gray-600 dark:text-gray-400 font-mono">
+                          {firstText.replace(/\n/g, ' ').substring(0, 60) || '\u2014'}
+                        </span>
+                      </div>
+                    )
+                  })
+                )}
              </div>
            </div>
          </div>
@@ -525,7 +789,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
      )}

      {/* Controls */}
-      {wordResult && (
+      {gridResult && (
        <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
          <div className="flex items-center gap-3 flex-wrap">
            {/* OCR Engine selector */}
@@ -539,15 +803,17 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
              <option value="tesseract">Tesseract</option>
            </select>

-            {/* Pronunciation selector */}
-            <select
-              value={pronunciation}
-              onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
-              className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
-            >
-              <option value="british">Britisch (RP)</option>
-              <option value="american">Amerikanisch</option>
-            </select>
+            {/* Pronunciation selector (only for vocab) */}
+            {isVocab && (
+              <select
+                value={pronunciation}
+                onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
+                className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
+              >
+                <option value="british">Britisch (RP)</option>
+                <option value="american">Amerikanisch</option>
+              </select>
+            )}

            <button
              onClick={() => runAutoDetection()}
@@ -3009,7 +3009,7 @@ def _replace_phonetics_in_text(text: str, pronunciation: str = 'british') -> str
    return _PHONETIC_BRACKET_RE.sub(replacer, text)


-def build_word_grid(
+def build_cell_grid(
    ocr_img: np.ndarray,
    column_regions: List[PageRegion],
    row_geometries: List[RowGeometry],
@@ -3018,9 +3018,11 @@ def build_word_grid(
    lang: str = "eng+deu",
    ocr_engine: str = "auto",
    img_bgr: Optional[np.ndarray] = None,
-    pronunciation: str = "british",
-) -> List[Dict[str, Any]]:
-    """Build a word grid by intersecting columns and rows, then OCR each cell.
+) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """Generic Cell-Grid: Columns × Rows → cells with OCR text.
+
+    This is the layout-agnostic foundation. Every column (except column_ignore)
+    is intersected with every content row to produce numbered cells.

    Args:
        ocr_img: Binarized full-page image (for Tesseract).
@@ -3029,11 +3031,12 @@ def build_word_grid(
        img_w: Image width in pixels.
        img_h: Image height in pixels.
        lang: Default Tesseract language.
-        ocr_engine: 'tesseract', 'rapid', or 'auto' (rapid if available, else tesseract).
+        ocr_engine: 'tesseract', 'rapid', or 'auto'.
        img_bgr: BGR color image (required for RapidOCR).

    Returns:
-        List of entry dicts with english/german/example text and bbox info (percent).
+        (cells, columns_meta) where cells is a list of cell dicts and
+        columns_meta describes the columns used.
    """
    # Resolve engine choice
    use_rapid = False
@@ -3046,24 +3049,34 @@ def build_word_grid(
            use_rapid = True

    engine_name = "rapid" if use_rapid else "tesseract"
-    logger.info(f"build_word_grid: using OCR engine '{engine_name}'")
+    logger.info(f"build_cell_grid: using OCR engine '{engine_name}'")

    # Filter to content rows only (skip header/footer)
    content_rows = [r for r in row_geometries if r.row_type == 'content']
    if not content_rows:
-        logger.warning("build_word_grid: no content rows found")
-        return []
+        logger.warning("build_cell_grid: no content rows found")
+        return [], []

-    # Map column types to roles
-    VOCAB_COLUMN_TYPES = {'column_en', 'column_de', 'column_example'}
-    relevant_cols = [c for c in column_regions if c.type in VOCAB_COLUMN_TYPES]
+    # Use all columns except column_ignore
+    relevant_cols = [c for c in column_regions if c.type != 'column_ignore']
    if not relevant_cols:
-        logger.warning("build_word_grid: no relevant vocabulary columns found")
-        return []
+        logger.warning("build_cell_grid: no usable columns found")
+        return [], []

    # Sort columns left-to-right
    relevant_cols.sort(key=lambda c: c.x)

+    # Build columns_meta
+    columns_meta = [
+        {
+            'index': col_idx,
+            'type': col.type,
+            'x': col.x,
+            'width': col.width,
+        }
+        for col_idx, col in enumerate(relevant_cols)
+    ]
+
    # Choose OCR language per column type (Tesseract only)
    lang_map = {
        'column_en': 'eng',
@@ -3071,47 +3084,40 @@ def build_word_grid(
        'column_example': 'eng+deu',
    }

-    entries: List[Dict[str, Any]] = []
+    cells: List[Dict[str, Any]] = []

    for row_idx, row in enumerate(content_rows):
-        entry: Dict[str, Any] = {
-            'row_index': row_idx,
-            'english': '',
-            'german': '',
-            'example': '',
-            'confidence': 0.0,
-            'bbox': {
-                'x': round(row.x / img_w * 100, 2),
-                'y': round(row.y / img_h * 100, 2),
-                'w': round(row.width / img_w * 100, 2),
-                'h': round(row.height / img_h * 100, 2),
-            },
-            'bbox_en': None,
-            'bbox_de': None,
-            'bbox_ex': None,
-            'ocr_engine': engine_name,
-        }
-
-        confidences: List[float] = []
-
-        for col in relevant_cols:
+        for col_idx, col in enumerate(relevant_cols):
            # Compute cell region: column x/width, row y/height
-            # Add padding to avoid clipping edge words
            pad = 8  # pixels
-            cell_x = col.x - pad
-            cell_y = row.y - pad
+            cell_x = max(0, col.x - pad)
+            cell_y = max(0, row.y - pad)
            cell_w = col.width + 2 * pad
            cell_h = row.height + 2 * pad

            # Clamp to image bounds
-            cell_x = max(0, cell_x)
-            cell_y = max(0, cell_y)
            if cell_x + cell_w > img_w:
                cell_w = img_w - cell_x
            if cell_y + cell_h > img_h:
                cell_h = img_h - cell_y

            if cell_w <= 0 or cell_h <= 0:
+                cells.append({
+                    'cell_id': f"R{row_idx:02d}_C{col_idx}",
+                    'row_index': row_idx,
+                    'col_index': col_idx,
+                    'col_type': col.type,
+                    'text': '',
+                    'confidence': 0.0,
+                    'bbox_px': {'x': col.x, 'y': row.y, 'w': col.width, 'h': row.height},
+                    'bbox_pct': {
+                        'x': round(col.x / img_w * 100, 2),
+                        'y': round(row.y / img_h * 100, 2),
+                        'w': round(col.width / img_w * 100, 2),
+                        'h': round(row.height / img_h * 100, 2),
+                    },
+                    'ocr_engine': engine_name,
+                })
                continue

            cell_region = PageRegion(
@@ -3119,6 +3125,7 @@ def build_word_grid(
                x=cell_x, y=cell_y,
                width=cell_w, height=cell_h,
            )
+
            # OCR the cell
            if use_rapid:
                words = ocr_region_rapid(img_bgr, cell_region)
@@ -3126,8 +3133,7 @@ def build_word_grid(
                cell_lang = lang_map.get(col.type, lang)
                words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=6)

-            # Group into lines, then join in reading order (Fix A)
-            # Use half of average word height as Y-tolerance
+            # Group into lines, then join in reading order
            if words:
                avg_h = sum(w['height'] for w in words) / len(words)
                y_tol = max(10, int(avg_h * 0.5))
@@ -3135,36 +3141,162 @@ def build_word_grid(
                y_tol = 15
            text = _words_to_reading_order_text(words, y_tolerance_px=y_tol)

+            avg_conf = 0.0
            if words:
-                avg_conf = sum(w['conf'] for w in words) / len(words)
-                confidences.append(avg_conf)
+                avg_conf = round(sum(w['conf'] for w in words) / len(words), 1)

-            # Bbox in percent
-            cell_bbox = {
-                'x': round(cell_x / img_w * 100, 2),
-                'y': round(cell_y / img_h * 100, 2),
-                'w': round(cell_w / img_w * 100, 2),
-                'h': round(cell_h / img_h * 100, 2),
+            cells.append({
+                'cell_id': f"R{row_idx:02d}_C{col_idx}",
+                'row_index': row_idx,
+                'col_index': col_idx,
+                'col_type': col.type,
+                'text': text,
+                'confidence': avg_conf,
+                'bbox_px': {'x': cell_x, 'y': cell_y, 'w': cell_w, 'h': cell_h},
+                'bbox_pct': {
+                    'x': round(cell_x / img_w * 100, 2),
+                    'y': round(cell_y / img_h * 100, 2),
+                    'w': round(cell_w / img_w * 100, 2),
+                    'h': round(cell_h / img_h * 100, 2),
+                },
+                'ocr_engine': engine_name,
+            })
+
+    logger.info(f"build_cell_grid: {len(cells)} cells from "
+                f"{len(content_rows)} rows × {len(relevant_cols)} columns, "
+                f"engine={engine_name}")
+
+    return cells, columns_meta
+
+
+def _cells_to_vocab_entries(
+    cells: List[Dict[str, Any]],
+    columns_meta: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Map generic cells to vocab entries with english/german/example fields.
+
+    Groups cells by row_index, maps col_type → field name, and produces
+    one entry per row (only rows with at least one non-empty field).
+    """
+    # Determine image dimensions from first cell (for row-level bbox)
+    col_type_to_field = {
+        'column_en': 'english',
+        'column_de': 'german',
+        'column_example': 'example',
+    }
+    bbox_key_map = {
+        'column_en': 'bbox_en',
+        'column_de': 'bbox_de',
+        'column_example': 'bbox_ex',
+    }
+
+    # Group cells by row_index
+    rows: Dict[int, List[Dict]] = {}
+    for cell in cells:
+        ri = cell['row_index']
+        rows.setdefault(ri, []).append(cell)
+
+    entries: List[Dict[str, Any]] = []
+    for row_idx in sorted(rows.keys()):
+        row_cells = rows[row_idx]
+        entry: Dict[str, Any] = {
+            'row_index': row_idx,
+            'english': '',
+            'german': '',
+            'example': '',
+            'confidence': 0.0,
+            'bbox': None,
+            'bbox_en': None,
+            'bbox_de': None,
+            'bbox_ex': None,
+            'ocr_engine': row_cells[0].get('ocr_engine', '') if row_cells else '',
+        }
+
+        confidences = []
+        for cell in row_cells:
+            col_type = cell['col_type']
+            field = col_type_to_field.get(col_type)
+            if field:
+                entry[field] = cell['text']
+            bbox_field = bbox_key_map.get(col_type)
+            if bbox_field:
+                entry[bbox_field] = cell['bbox_pct']
+            if cell['confidence'] > 0:
+                confidences.append(cell['confidence'])
+
+        # Compute row-level bbox as union of all cell bboxes
+        all_bboxes = [c['bbox_pct'] for c in row_cells if c.get('bbox_pct')]
+        if all_bboxes:
+            min_x = min(b['x'] for b in all_bboxes)
+            min_y = min(b['y'] for b in all_bboxes)
+            max_x2 = max(b['x'] + b['w'] for b in all_bboxes)
+            max_y2 = max(b['y'] + b['h'] for b in all_bboxes)
+            entry['bbox'] = {
+                'x': round(min_x, 2),
+                'y': round(min_y, 2),
+                'w': round(max_x2 - min_x, 2),
+                'h': round(max_y2 - min_y, 2),
            }

-            if col.type == 'column_en':
-                entry['english'] = text
-                entry['bbox_en'] = cell_bbox
-            elif col.type == 'column_de':
-                entry['german'] = text
-                entry['bbox_de'] = cell_bbox
-            elif col.type == 'column_example':
-                entry['example'] = text
-                entry['bbox_ex'] = cell_bbox
-
        entry['confidence'] = round(
            sum(confidences) / len(confidences), 1
        ) if confidences else 0.0

-        # Only include if at least one field has text
+        # Only include if at least one vocab field has text
        if entry['english'] or entry['german'] or entry['example']:
            entries.append(entry)

+    return entries
+
+
+def build_word_grid(
+    ocr_img: np.ndarray,
+    column_regions: List[PageRegion],
+    row_geometries: List[RowGeometry],
+    img_w: int,
+    img_h: int,
+    lang: str = "eng+deu",
+    ocr_engine: str = "auto",
+    img_bgr: Optional[np.ndarray] = None,
+    pronunciation: str = "british",
+) -> List[Dict[str, Any]]:
+    """Vocab-specific: Cell-Grid + Vocab-Mapping + Post-Processing.
+
+    Wrapper around build_cell_grid() that adds vocabulary-specific logic:
+    - Maps cells to english/german/example entries
+    - Applies character confusion fixes, IPA lookup, comma splitting, etc.
+    - Falls back to returning raw cells if no vocab columns detected.
+
+    Args:
+        ocr_img: Binarized full-page image (for Tesseract).
+        column_regions: Classified columns from Step 3.
+        row_geometries: Rows from Step 4.
+        img_w, img_h: Image dimensions.
+        lang: Default Tesseract language.
+        ocr_engine: 'tesseract', 'rapid', or 'auto'.
+        img_bgr: BGR color image (required for RapidOCR).
+        pronunciation: 'british' or 'american' for IPA lookup.
+
+    Returns:
+        List of entry dicts with english/german/example text and bbox info (percent).
+    """
+    cells, columns_meta = build_cell_grid(
+        ocr_img, column_regions, row_geometries, img_w, img_h,
+        lang=lang, ocr_engine=ocr_engine, img_bgr=img_bgr,
+    )
+
+    if not cells:
+        return []
+
+    # Check if vocab layout is present
+    col_types = {c['type'] for c in columns_meta}
+    if not (col_types & {'column_en', 'column_de'}):
+        logger.info("build_word_grid: no vocab columns — returning raw cells")
+        return cells
+
+    # Vocab mapping: cells → entries
+    entries = _cells_to_vocab_entries(cells, columns_meta)
+
    # --- Post-processing pipeline (deterministic, no LLM) ---
    n_raw = len(entries)

@@ -3177,13 +3309,13 @@ def build_word_grid(
    # 3. Split comma-separated word forms (break, broke, broken → 3 entries)
    entries = _split_comma_entries(entries)

-    # 5. Attach example sentences (rows without DE → examples for preceding entry)
+    # 4. Attach example sentences (rows without DE → examples for preceding entry)
    entries = _attach_example_sentences(entries)

+    engine_name = cells[0].get('ocr_engine', 'unknown') if cells else 'unknown'
    logger.info(f"build_word_grid: {len(entries)} entries from "
                f"{n_raw} raw → {len(entries)} after post-processing "
-                f"({len(content_rows)} content rows × {len(relevant_cols)} columns, "
-                f"engine={engine_name})")
+                f"(engine={engine_name})")

    return entries

@@ -31,8 +31,14 @@ from pydantic import BaseModel
 from cv_vocab_pipeline import (
    PageRegion,
    RowGeometry,
+    _cells_to_vocab_entries,
+    _fix_character_confusion,
+    _fix_phonetic_brackets,
+    _split_comma_entries,
+    _attach_example_sentences,
    analyze_layout,
    analyze_layout_by_words,
+    build_cell_grid,
    build_word_grid,
    classify_column_types,
    create_layout_image,
@@ -1075,35 +1081,60 @@ async def detect_words(session_id: str, engine: str = "auto", pronunciation: str
        for r in row_result["rows"]
    ]

-    # Build word grid — pass both binarized (for Tesseract) and BGR (for RapidOCR)
-    entries = build_word_grid(
+    # Build generic cell grid
+    cells, columns_meta = build_cell_grid(
        ocr_img, col_regions, row_geoms, img_w, img_h,
        ocr_engine=engine, img_bgr=dewarped_bgr,
-        pronunciation=pronunciation,
    )
    duration = time.time() - t0

-    # Build summary
-    summary = {
-        "total_entries": len(entries),
-        "with_english": sum(1 for e in entries if e.get("english")),
-        "with_german": sum(1 for e in entries if e.get("german")),
-        "low_confidence": sum(1 for e in entries if e.get("confidence", 0) < 50),
-    }
+    # Layout detection
+    col_types = {c['type'] for c in columns_meta}
+    is_vocab = bool(col_types & {'column_en', 'column_de'})
+
+    # Count content rows and columns for grid_shape
+    n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
+    n_cols = len(columns_meta)

    # Determine which engine was actually used
-    used_engine = entries[0].get("ocr_engine", "tesseract") if entries else engine
+    used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine

+    # Grid result (always generic)
    word_result = {
-        "entries": entries,
-        "entry_count": len(entries),
+        "cells": cells,
+        "grid_shape": {
+            "rows": n_content_rows,
+            "cols": n_cols,
+            "total_cells": len(cells),
+        },
+        "columns_used": columns_meta,
+        "layout": "vocab" if is_vocab else "generic",
        "image_width": img_w,
        "image_height": img_h,
        "duration_seconds": round(duration, 2),
-        "summary": summary,
        "ocr_engine": used_engine,
+        "summary": {
+            "total_cells": len(cells),
+            "non_empty_cells": sum(1 for c in cells if c.get("text")),
+            "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
+        },
    }

+    # For vocab layout: add post-processed vocab_entries (backwards compat)
+    if is_vocab:
+        entries = _cells_to_vocab_entries(cells, columns_meta)
+        entries = _fix_character_confusion(entries)
+        entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
+        entries = _split_comma_entries(entries)
+        entries = _attach_example_sentences(entries)
+        word_result["vocab_entries"] = entries
+        # Also keep "entries" key for backwards compatibility
+        word_result["entries"] = entries
+        word_result["entry_count"] = len(entries)
+        word_result["summary"]["total_entries"] = len(entries)
+        word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
+        word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
+
    # Persist to DB
    await update_session_db(
        session_id,
@@ -1114,7 +1145,8 @@ async def detect_words(session_id: str, engine: str = "auto", pronunciation: str
    cached["word_result"] = word_result

    logger.info(f"OCR Pipeline: words session {session_id}: "
-                f"{len(entries)} entries ({duration:.2f}s), summary: {summary}")
+                f"layout={word_result['layout']}, "
+                f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}")

    return {
        "session_id": session_id,
@@ -1232,17 +1264,19 @@ async def _get_rows_overlay(session_id: str) -> Response:


 async def _get_words_overlay(session_id: str) -> Response:
-    """Generate dewarped image with word grid cells drawn on it."""
+    """Generate dewarped image with cell grid drawn on it."""
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")

    word_result = session.get("word_result")
-    if not word_result or not word_result.get("entries"):
+    if not word_result:
        raise HTTPException(status_code=404, detail="No word data available")

-    column_result = session.get("column_result")
-    row_result = session.get("row_result")
+    # Support both new cell-based and legacy entry-based formats
+    cells = word_result.get("cells")
+    if not cells and not word_result.get("entries"):
+        raise HTTPException(status_code=404, detail="No word data available")

    # Load dewarped image
    dewarped_png = await get_session_image(session_id, "dewarped")
@@ -1256,80 +1290,105 @@ async def _get_words_overlay(session_id: str) -> Response:

    img_h, img_w = img.shape[:2]

-    # Color map for column types (BGR)
-    col_colors = {
-        "column_en": (255, 180, 0),      # Blue
-        "column_de": (0, 200, 0),         # Green
-        "column_example": (0, 140, 255),  # Orange
-    }
-
    overlay = img.copy()

-    # Build grid from column_result × row_result (the actual cells)
-    columns = []
-    if column_result and column_result.get("columns"):
-        columns = [c for c in column_result["columns"]
-                   if c.get("type", "").startswith("column_")]
+    if cells:
+        # New cell-based overlay: color by column index
+        col_palette = [
+            (255, 180, 0),      # Blue (BGR)
+            (0, 200, 0),        # Green
+            (0, 140, 255),      # Orange
+            (200, 100, 200),    # Purple
+            (200, 200, 0),      # Cyan
+            (100, 200, 200),    # Yellow-ish
+        ]

-    content_rows_data = []
-    if row_result and row_result.get("rows"):
-        content_rows_data = [r for r in row_result["rows"]
-                             if r.get("row_type") == "content"]
+        for cell in cells:
+            bbox = cell.get("bbox_px", {})
+            cx = bbox.get("x", 0)
+            cy = bbox.get("y", 0)
+            cw = bbox.get("w", 0)
+            ch = bbox.get("h", 0)
+            if cw <= 0 or ch <= 0:
+                continue

-    # Draw grid: column × row cells
-    for col in columns:
-        col_type = col.get("type", "")
-        color = col_colors.get(col_type, (200, 200, 200))
-        cx, cw = col["x"], col["width"]
+            col_idx = cell.get("col_index", 0)
+            color = col_palette[col_idx % len(col_palette)]

-        for row in content_rows_data:
-            ry, rh = row["y"], row["height"]
-            # Cell rectangle (exact grid intersection, no padding)
-            cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1)
+            # Cell rectangle border
+            cv2.rectangle(img, (cx, cy), (cx + cw, cy + ch), color, 1)
            # Semi-transparent fill
-            cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1)
+            cv2.rectangle(overlay, (cx, cy), (cx + cw, cy + ch), color, -1)

-    # Place OCR text labels inside grid cells
-    # Build lookup: row_index → entry for fast access
-    entries = word_result["entries"]
-    entry_by_row: Dict[int, Dict] = {}
-    for entry in entries:
-        entry_by_row[entry.get("row_index", -1)] = entry
+            # Cell-ID label (top-left corner)
+            cell_id = cell.get("cell_id", "")
+            cv2.putText(img, cell_id, (cx + 2, cy + 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.28, color, 1)

-    for row_idx, row in enumerate(content_rows_data):
-        entry = entry_by_row.get(row_idx)
-        if not entry:
-            continue
+            # Text label (bottom of cell)
+            text = cell.get("text", "")
+            if text:
+                conf = cell.get("confidence", 0)
+                if conf >= 70:
+                    text_color = (0, 180, 0)
+                elif conf >= 50:
+                    text_color = (0, 180, 220)
+                else:
+                    text_color = (0, 0, 220)

-        conf = entry.get("confidence", 0)
-        if conf >= 70:
-            text_color = (0, 180, 0)
-        elif conf >= 50:
-            text_color = (0, 180, 220)
-        else:
-            text_color = (0, 0, 220)
+                label = text.replace('\n', ' ')[:30]
+                cv2.putText(img, label, (cx + 3, cy + ch - 4),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1)
+    else:
+        # Legacy fallback: entry-based overlay (for old sessions)
+        column_result = session.get("column_result")
+        row_result = session.get("row_result")
+        col_colors = {
+            "column_en": (255, 180, 0),
+            "column_de": (0, 200, 0),
+            "column_example": (0, 140, 255),
+        }

-        ry, rh = row["y"], row["height"]
+        columns = []
+        if column_result and column_result.get("columns"):
+            columns = [c for c in column_result["columns"]
+                       if c.get("type", "").startswith("column_")]
+
+        content_rows_data = []
+        if row_result and row_result.get("rows"):
+            content_rows_data = [r for r in row_result["rows"]
+                                 if r.get("row_type") == "content"]

        for col in columns:
            col_type = col.get("type", "")
+            color = col_colors.get(col_type, (200, 200, 200))
            cx, cw = col["x"], col["width"]
+            for row in content_rows_data:
+                ry, rh = row["y"], row["height"]
+                cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1)
+                cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1)

-            # Pick the right text field for this column
-            if col_type == "column_en":
-                text = entry.get("english", "")
-            elif col_type == "column_de":
-                text = entry.get("german", "")
-            elif col_type == "column_example":
-                text = entry.get("example", "")
-            else:
-                text = ""
+        entries = word_result["entries"]
+        entry_by_row: Dict[int, Dict] = {}
+        for entry in entries:
+            entry_by_row[entry.get("row_index", -1)] = entry

-            if text:
-                label = text.replace('\n', ' ')[:30]
-                font_scale = 0.35
-                cv2.putText(img, label, (cx + 3, ry + rh - 4),
-                            cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
+        for row_idx, row in enumerate(content_rows_data):
+            entry = entry_by_row.get(row_idx)
+            if not entry:
+                continue
+            conf = entry.get("confidence", 0)
+            text_color = (0, 180, 0) if conf >= 70 else (0, 180, 220) if conf >= 50 else (0, 0, 220)
+            ry, rh = row["y"], row["height"]
+            for col in columns:
+                col_type = col.get("type", "")
+                cx, cw = col["x"], col["width"]
+                field = {"column_en": "english", "column_de": "german", "column_example": "example"}.get(col_type, "")
+                text = entry.get(field, "") if field else ""
+                if text:
+                    label = text.replace('\n', ' ')[:30]
+                    cv2.putText(img, label, (cx + 3, ry + rh - 4),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1)

    # Blend overlay at 10% opacity
    cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)