From 293e7914d8e1e2b8765fd5ee53c89f35f73e1625 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Thu, 5 Mar 2026 09:44:38 +0100
Subject: [PATCH] feat: improved OCR pipeline session manager with categories,
 thumbnails, pipeline logging

- Add document_category (10 types) and pipeline_log JSONB columns
- Session list: thumbnails, copyable IDs, category/doc_type badges
- Inline category dropdown, bulk delete, pipeline step logging
- New endpoints: thumbnail, delete-all, pipeline-log, categories
- Cleared all 22 old test sessions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../app/(admin)/ai/ocr-pipeline/page.tsx      | 270 +++++++++++++-----
 .../app/(admin)/ai/ocr-pipeline/types.ts      |  33 +++
 klausur-service/backend/ocr_pipeline_api.py   | 160 ++++++++++-
 .../backend/ocr_pipeline_session_store.py     |  25 +-
 4 files changed, 411 insertions(+), 77 deletions(-)
diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
index bbfba67..c94ad4f 100644
--- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
@@ -11,7 +11,7 @@ import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecogniti
 import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
 import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
 import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
-import { PIPELINE_STEPS, type PipelineStep, type SessionListItem, type DocumentTypeResult } from './types'
+import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory } from './types'
 
 const KLAUSUR_API = '/klausur-api'
 
@@ -23,7 +23,9 @@ export default function OcrPipelinePage() {
   const [loadingSessions, setLoadingSessions] = useState(true)
   const [editingName, setEditingName] = useState<string | null>(null)
   const [editNameValue, setEditNameValue] = useState('')
+  const [editingCategory, setEditingCategory] = useState<string | null>(null)
   const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
+  const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
   const [steps, setSteps] = useState<PipelineStep[]>(
     PIPELINE_STEPS.map((s, i) => ({
       ...s,
@@ -59,6 +61,7 @@ export default function OcrPipelinePage() {
 
       setSessionId(sid)
       setSessionName(data.name || data.filename || '')
+      setActiveCategory(data.document_category || undefined)
 
       // Restore doc type result if available
       const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
@@ -115,6 +118,36 @@ export default function OcrPipelinePage() {
     setEditingName(null)
   }, [sessionId])
 
+  const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
+    try {
+      await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
+        method: 'PUT',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ document_category: category }),
+      })
+      setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
+      if (sessionId === sid) setActiveCategory(category)
+    } catch (e) {
+      console.error('Failed to update category:', e)
+    }
+    setEditingCategory(null)
+  }, [sessionId])
+
+  const deleteAllSessions = useCallback(async () => {
+    if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
+    try {
+      await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
+      setSessions([])
+      setSessionId(null)
+      setCurrentStep(0)
+      setDocTypeResult(null)
+      setActiveCategory(undefined)
+      setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
+    } catch (e) {
+      console.error('Failed to delete all sessions:', e)
+    }
+  }, [])
+
   const handleStepClick = (index: number) => {
     if (index <= currentStep || steps[index].status === 'completed') {
       setCurrentStep(index)
@@ -307,14 +340,25 @@ export default function OcrPipelinePage() {
       <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
         <div className="flex items-center justify-between mb-3">
           <h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
-            Sessions
+            Sessions ({sessions.length})
           </h3>
-          <button
-            onClick={handleNewSession}
-            className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
-          >
-            + Neue Session
-          </button>
+          <div className="flex gap-2">
+            {sessions.length > 0 && (
+              <button
+                onClick={deleteAllSessions}
+                className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
+                title="Alle Sessions loeschen"
+              >
+                Alle loeschen
+              </button>
+            )}
+            <button
+              onClick={handleNewSession}
+              className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
+            >
+              + Neue Session
+            </button>
+          </div>
         </div>
 
         {loadingSessions ? (
@@ -322,75 +366,165 @@ export default function OcrPipelinePage() {
         ) : sessions.length === 0 ? (
           <div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
         ) : (
-          <div className="space-y-1 max-h-48 overflow-y-auto">
-            {sessions.map((s) => (
-              <div
-                key={s.id}
-                className={`flex items-center gap-2 px-3 py-2 rounded-lg text-sm transition-colors cursor-pointer ${
-                  sessionId === s.id
-                    ? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
-                    : 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
-                }`}
-              >
-                <div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
-                  {editingName === s.id ? (
-                    <input
-                      autoFocus
-                      value={editNameValue}
-                      onChange={(e) => setEditNameValue(e.target.value)}
-                      onBlur={() => renameSession(s.id, editNameValue)}
-                      onKeyDown={(e) => {
-                        if (e.key === 'Enter') renameSession(s.id, editNameValue)
-                        if (e.key === 'Escape') setEditingName(null)
-                      }}
-                      onClick={(e) => e.stopPropagation()}
-                      className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
+          <div className="space-y-1.5 max-h-[320px] overflow-y-auto">
+            {sessions.map((s) => {
+              const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
+              return (
+                <div
+                  key={s.id}
+                  className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
+                    sessionId === s.id
+                      ? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
+                      : 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
+                  }`}
+                >
+                  {/* Thumbnail */}
+                  <div
+                    className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
+                    onClick={() => openSession(s.id)}
+                  >
+                    {/* eslint-disable-next-line @next/next/no-img-element */}
+                    <img
+                      src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
+                      alt=""
+                      className="w-full h-full object-cover"
+                      loading="lazy"
+                      onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
                     />
-                  ) : (
-                    <div className="truncate font-medium text-gray-700 dark:text-gray-300">
-                      {s.name || s.filename}
+                  </div>
+
+                  {/* Info */}
+                  <div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
+                    {editingName === s.id ? (
+                      <input
+                        autoFocus
+                        value={editNameValue}
+                        onChange={(e) => setEditNameValue(e.target.value)}
+                        onBlur={() => renameSession(s.id, editNameValue)}
+                        onKeyDown={(e) => {
+                          if (e.key === 'Enter') renameSession(s.id, editNameValue)
+                          if (e.key === 'Escape') setEditingName(null)
+                        }}
+                        onClick={(e) => e.stopPropagation()}
+                        className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
+                      />
+                    ) : (
+                      <div className="truncate font-medium text-gray-700 dark:text-gray-300">
+                        {s.name || s.filename}
+                      </div>
+                    )}
+                    {/* ID row */}
+                    <button
+                      onClick={(e) => {
+                        e.stopPropagation()
+                        navigator.clipboard.writeText(s.id)
+                        const btn = e.currentTarget
+                        btn.textContent = 'Kopiert!'
+                        setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
+                      }}
+                      className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
+                      title={`Volle ID: ${s.id} — Klick zum Kopieren`}
+                    >
+                      ID: {s.id.slice(0, 8)}
+                    </button>
+                    <div className="text-xs text-gray-400 flex gap-2 mt-0.5">
+                      <span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
+                      <span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
+                    </div>
+                  </div>
+
+                  {/* Badges */}
+                  <div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
+                    {/* Category Badge */}
+                    <button
+                      onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
+                      className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
+                        catInfo
+                          ? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
+                          : 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
+                      }`}
+                      title="Kategorie setzen"
+                    >
+                      {catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
+                    </button>
+                    {/* Doc Type Badge (read-only) */}
+                    {s.doc_type && (
+                      <span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
+                        {s.doc_type}
+                      </span>
+                    )}
+                  </div>
+
+                  {/* Action buttons */}
+                  <div className="flex flex-col gap-0.5 flex-shrink-0">
+                    <button
+                      onClick={(e) => {
+                        e.stopPropagation()
+                        setEditNameValue(s.name || s.filename)
+                        setEditingName(s.id)
+                      }}
+                      className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
+                      title="Umbenennen"
+                    >
+                      <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
+                        <path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
+                      </svg>
+                    </button>
+                    <button
+                      onClick={(e) => {
+                        e.stopPropagation()
+                        if (confirm('Session loeschen?')) deleteSession(s.id)
+                      }}
+                      className="p-1 text-gray-400 hover:text-red-500"
+                      title="Loeschen"
+                    >
+                      <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
+                        <path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
+                      </svg>
+                    </button>
+                  </div>
+
+                  {/* Category dropdown (inline) */}
+                  {editingCategory === s.id && (
+                    <div
+                      className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
+                      onClick={(e) => e.stopPropagation()}
+                    >
+                      {DOCUMENT_CATEGORIES.map((cat) => (
+                        <button
+                          key={cat.value}
+                          onClick={() => updateCategory(s.id, cat.value)}
+                          className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
+                            s.document_category === cat.value
+                              ? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
+                              : 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
+                          }`}
+                        >
+                          {cat.icon} {cat.label}
+                        </button>
+                      ))}
                     </div>
                   )}
-                  <div className="text-xs text-gray-400 flex gap-2">
-                    <span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
-                    <span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
-                  </div>
                 </div>
-                <button
-                  onClick={(e) => {
-                    e.stopPropagation()
-                    setEditNameValue(s.name || s.filename)
-                    setEditingName(s.id)
-                  }}
-                  className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
-                  title="Umbenennen"
-                >
-                  <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
-                    <path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
-                  </svg>
-                </button>
-                <button
-                  onClick={(e) => {
-                    e.stopPropagation()
-                    if (confirm('Session loeschen?')) deleteSession(s.id)
-                  }}
-                  className="p-1 text-gray-400 hover:text-red-500"
-                  title="Loeschen"
-                >
-                  <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
-                    <path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
-                  </svg>
-                </button>
-              </div>
-            ))}
+              )
+            })}
           </div>
         )}
       </div>
 
-      {/* Active session name */}
+      {/* Active session info */}
       {sessionId && sessionName && (
-        <div className="text-sm text-gray-500 dark:text-gray-400">
-          Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span>
+        <div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
+          <span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
+          {activeCategory && (() => {
+            const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
+            return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
+          })()}
+          {docTypeResult && (
+            <span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
+              {docTypeResult.doc_type}
+            </span>
+          )}
         </div>
       )}
 
diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
index 4b1c86f..8734715 100644
--- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
+++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
@@ -7,16 +7,47 @@ export interface PipelineStep {
   status: PipelineStepStatus
 }
 
+export type DocumentCategory =
+  | 'vokabelseite' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
+  | 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
+
+export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
+  { value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
+  { value: 'buchseite', label: 'Buchseite', icon: '📚' },
+  { value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
+  { value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
+  { value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
+  { value: 'statistik', label: 'Statistik', icon: '📊' },
+  { value: 'zeitung', label: 'Zeitung', icon: '📰' },
+  { value: 'formular', label: 'Formular', icon: '📋' },
+  { value: 'handschrift', label: 'Handschrift', icon: '✍️' },
+  { value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
+]
+
 export interface SessionListItem {
   id: string
   name: string
   filename: string
   status: string
   current_step: number
+  document_category?: DocumentCategory
+  doc_type?: string
   created_at: string
   updated_at?: string
 }
 
+export interface PipelineLogEntry {
+  step: string
+  completed_at: string
+  success: boolean
+  duration_ms?: number
+  metrics: Record<string, unknown>
+}
+
+export interface PipelineLog {
+  steps: PipelineLogEntry[]
+}
+
 export interface DocumentTypeResult {
   doc_type: 'vocab_table' | 'full_text' | 'generic_table'
   confidence: number
@@ -34,6 +65,8 @@ export interface SessionInfo {
   image_height: number
   original_image_url: string
   current_step?: number
+  document_category?: DocumentCategory
+  doc_type?: string
   deskew_result?: DeskewResult
   dewarp_result?: DewarpResult
   column_result?: ColumnResult
diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py
index 3e14872..ab8bc20 100644
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -66,6 +66,7 @@ from cv_vocab_pipeline import (
 )
 from ocr_pipeline_session_store import (
     create_session_db,
+    delete_all_sessions_db,
     delete_session_db,
     get_session_db,
     get_session_image,
@@ -151,8 +152,15 @@ class DewarpGroundTruthRequest(BaseModel):
     notes: Optional[str] = None
 
 
-class RenameSessionRequest(BaseModel):
-    name: str
+VALID_DOCUMENT_CATEGORIES = {
+    'vokabelseite', 'buchseite', 'arbeitsblatt', 'klausurseite',
+    'mathearbeit', 'statistik', 'zeitung', 'formular', 'handschrift', 'sonstiges',
+}
+
+
+class UpdateSessionRequest(BaseModel):
+    name: Optional[str] = None
+    document_category: Optional[str] = None
 
 
 class ManualColumnsRequest(BaseModel):
@@ -281,6 +289,8 @@ async def get_session_info(session_id: str):
         "image_height": img_h,
         "original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
         "current_step": session.get("current_step", 1),
+        "document_category": session.get("document_category"),
+        "doc_type": session.get("doc_type"),
     }
 
     if session.get("deskew_result"):
@@ -293,17 +303,31 @@ async def get_session_info(session_id: str):
         result["row_result"] = session["row_result"]
     if session.get("word_result"):
         result["word_result"] = session["word_result"]
+    if session.get("doc_type_result"):
+        result["doc_type_result"] = session["doc_type_result"]
 
     return result
 
 
 @router.put("/sessions/{session_id}")
-async def rename_session(session_id: str, req: RenameSessionRequest):
-    """Rename a session."""
-    updated = await update_session_db(session_id, name=req.name)
+async def update_session(session_id: str, req: UpdateSessionRequest):
+    """Update session name and/or document category."""
+    kwargs: Dict[str, Any] = {}
+    if req.name is not None:
+        kwargs["name"] = req.name
+    if req.document_category is not None:
+        if req.document_category not in VALID_DOCUMENT_CATEGORIES:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid category '{req.document_category}'. Valid: {sorted(VALID_DOCUMENT_CATEGORIES)}",
+            )
+        kwargs["document_category"] = req.document_category
+    if not kwargs:
+        raise HTTPException(status_code=400, detail="Nothing to update")
+    updated = await update_session_db(session_id, **kwargs)
     if not updated:
         raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-    return {"session_id": session_id, "name": req.name}
+    return {"session_id": session_id, **kwargs}
 
 
 @router.delete("/sessions/{session_id}")
@@ -316,6 +340,78 @@ async def delete_session(session_id: str):
     return {"session_id": session_id, "deleted": True}
 
 
+@router.delete("/sessions")
+async def delete_all_sessions():
+    """Delete ALL sessions (cleanup)."""
+    _cache.clear()
+    count = await delete_all_sessions_db()
+    return {"deleted_count": count}
+
+
+@router.get("/sessions/{session_id}/thumbnail")
+async def get_session_thumbnail(session_id: str, size: int = Query(default=80, ge=16, le=400)):
+    """Return a small thumbnail of the original image."""
+    original_png = await get_session_image(session_id, "original")
+    if not original_png:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found or no image")
+    arr = np.frombuffer(original_png, dtype=np.uint8)
+    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+    if img is None:
+        raise HTTPException(status_code=500, detail="Failed to decode image")
+    h, w = img.shape[:2]
+    scale = size / max(h, w)
+    new_w, new_h = int(w * scale), int(h * scale)
+    thumb = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    _, png_bytes = cv2.imencode(".png", thumb)
+    return Response(content=png_bytes.tobytes(), media_type="image/png",
+                    headers={"Cache-Control": "public, max-age=3600"})
+
+
+@router.get("/sessions/{session_id}/pipeline-log")
+async def get_pipeline_log(session_id: str):
+    """Get the pipeline execution log for a session."""
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+    return {"session_id": session_id, "pipeline_log": session.get("pipeline_log") or {"steps": []}}
+
+
+@router.get("/categories")
+async def list_categories():
+    """List valid document categories."""
+    return {"categories": sorted(VALID_DOCUMENT_CATEGORIES)}
+
+
+# ---------------------------------------------------------------------------
+# Pipeline Log Helper
+# ---------------------------------------------------------------------------
+
+async def _append_pipeline_log(
+    session_id: str,
+    step_name: str,
+    metrics: Dict[str, Any],
+    success: bool = True,
+    duration_ms: Optional[int] = None,
+):
+    """Append a step entry to the session's pipeline_log JSONB."""
+    session = await get_session_db(session_id)
+    if not session:
+        return
+    log = session.get("pipeline_log") or {"steps": []}
+    if not isinstance(log, dict):
+        log = {"steps": []}
+    entry = {
+        "step": step_name,
+        "completed_at": datetime.utcnow().isoformat(),
+        "success": success,
+        "metrics": metrics,
+    }
+    if duration_ms is not None:
+        entry["duration_ms"] = duration_ms
+    log.setdefault("steps", []).append(entry)
+    await update_session_db(session_id, pipeline_log=log)
+
+
 # ---------------------------------------------------------------------------
 # Image Endpoints
 # ---------------------------------------------------------------------------
@@ -448,6 +544,12 @@ async def auto_deskew(session_id: str):
     logger.info(f"OCR Pipeline: deskew session {session_id}: "
                 f"hough={angle_hough:.2f} wa={angle_wa:.2f} -> {method_used} {angle_applied:.2f}")
 
+    await _append_pipeline_log(session_id, "deskew", {
+        "angle_applied": round(angle_applied, 3),
+        "confidence": round(confidence, 2),
+        "method": method_used,
+    }, duration_ms=int(duration * 1000))
+
     return {
         "session_id": session_id,
         **deskew_result,
@@ -680,6 +782,13 @@ async def auto_dewarp(
                 f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
                 f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
 
+    await _append_pipeline_log(session_id, "dewarp", {
+        "shear_degrees": dewarp_info["shear_degrees"],
+        "confidence": dewarp_info["confidence"],
+        "method": dewarp_info["method"],
+        "ensemble_methods": [d.get("method", "") for d in dewarp_info.get("detections", [])],
+    }, duration_ms=int(duration * 1000))
+
     return {
         "session_id": session_id,
         **dewarp_result,
@@ -808,6 +917,13 @@ async def detect_type(session_id: str):
     logger.info(f"OCR Pipeline: detect-type session {session_id}: "
                 f"{result.doc_type} (confidence={result.confidence}, {duration:.2f}s)")
 
+    await _append_pipeline_log(session_id, "detect_type", {
+        "doc_type": result.doc_type,
+        "pipeline": result.pipeline,
+        "confidence": result.confidence,
+        **{k: v for k, v in (result.features or {}).items() if isinstance(v, (int, float, str, bool))},
+    }, duration_ms=int(duration * 1000))
+
     return {"session_id": session_id, **result_dict}
 
 
@@ -896,6 +1012,13 @@ async def detect_columns(session_id: str):
     logger.info(f"OCR Pipeline: columns session {session_id}: "
                 f"{col_count} columns detected ({duration:.2f}s)")
 
+    img_w = dewarped_bgr.shape[1]
+    await _append_pipeline_log(session_id, "columns", {
+        "total_columns": len(columns),
+        "column_widths_pct": [round(c["width"] / img_w * 100, 1) for c in columns],
+        "column_types": [c["type"] for c in columns],
+    }, duration_ms=int(duration * 1000))
+
     return {
         "session_id": session_id,
         **column_result,
@@ -1112,6 +1235,15 @@ async def detect_rows(session_id: str):
     logger.info(f"OCR Pipeline: rows session {session_id}: "
                 f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}")
 
+    content_rows = sum(1 for r in rows if r.row_type == "content")
+    avg_height = round(sum(r.height for r in rows) / len(rows)) if rows else 0
+    await _append_pipeline_log(session_id, "rows", {
+        "total_rows": len(rows),
+        "content_rows": content_rows,
+        "artifact_rows_removed": type_counts.get("header", 0) + type_counts.get("footer", 0),
+        "avg_row_height_px": avg_height,
+    }, duration_ms=int(duration * 1000))
+
     return {
         "session_id": session_id,
         **row_result,
@@ -1369,6 +1501,15 @@ async def detect_words(
                 f"layout={word_result['layout']}, "
                 f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}")
 
+    await _append_pipeline_log(session_id, "words", {
+        "total_cells": len(cells),
+        "non_empty_cells": word_result["summary"]["non_empty_cells"],
+        "low_confidence_count": word_result["summary"]["low_confidence"],
+        "ocr_engine": used_engine,
+        "layout": word_result["layout"],
+        "entry_count": word_result.get("entry_count", 0),
+    }, duration_ms=int(duration * 1000))
+
     return {
         "session_id": session_id,
         **word_result,
@@ -1774,6 +1915,13 @@ async def run_llm_review(session_id: str, request: Request, stream: bool = False
     logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, "
                 f"{result['duration_ms']}ms, model={result['model_used']}")
 
+    await _append_pipeline_log(session_id, "correction", {
+        "engine": "llm",
+        "model": result["model_used"],
+        "total_entries": len(entries),
+        "corrections_proposed": len(result["changes"]),
+    }, duration_ms=result["duration_ms"])
+
     return {
         "session_id": session_id,
         "changes": result["changes"],
diff --git a/klausur-service/backend/ocr_pipeline_session_store.py b/klausur-service/backend/ocr_pipeline_session_store.py
index 254d662..12f3c2a 100644
--- a/klausur-service/backend/ocr_pipeline_session_store.py
+++ b/klausur-service/backend/ocr_pipeline_session_store.py
@@ -66,7 +66,9 @@ async def init_ocr_pipeline_tables():
             ADD COLUMN IF NOT EXISTS clean_png BYTEA,
             ADD COLUMN IF NOT EXISTS handwriting_removal_meta JSONB,
             ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
-            ADD COLUMN IF NOT EXISTS doc_type_result JSONB
+            ADD COLUMN IF NOT EXISTS doc_type_result JSONB,
+            ADD COLUMN IF NOT EXISTS document_category VARCHAR(50),
+            ADD COLUMN IF NOT EXISTS pipeline_log JSONB
         """)
 
 
@@ -91,6 +93,7 @@ async def create_session_db(
                       deskew_result, dewarp_result, column_result, row_result,
                       word_result, ground_truth, auto_shear_degrees,
                       doc_type, doc_type_result,
+                      document_category, pipeline_log,
                       created_at, updated_at
         """, uuid.UUID(session_id), name, filename, original_png)
 
@@ -106,6 +109,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
                    deskew_result, dewarp_result, column_result, row_result,
                    word_result, ground_truth, auto_shear_degrees,
                    doc_type, doc_type_result,
+                   document_category, pipeline_log,
                    created_at, updated_at
             FROM ocr_pipeline_sessions WHERE id = $1
         """, uuid.UUID(session_id))
@@ -151,9 +155,10 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
         'deskew_result', 'dewarp_result', 'column_result', 'row_result',
         'word_result', 'ground_truth', 'auto_shear_degrees',
         'doc_type', 'doc_type_result',
+        'document_category', 'pipeline_log',
     }
 
-    jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result'}
+    jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
 
     for key, value in kwargs.items():
         if key in allowed_fields:
@@ -180,6 +185,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
                       deskew_result, dewarp_result, column_result, row_result,
                       word_result, ground_truth, auto_shear_degrees,
                       doc_type, doc_type_result,
+                      document_category, pipeline_log,
                       created_at, updated_at
         """, *values)
 
@@ -194,6 +200,7 @@ async def list_sessions_db(limit: int = 50) -> List[Dict[str, Any]]:
     async with pool.acquire() as conn:
         rows = await conn.fetch("""
             SELECT id, name, filename, status, current_step,
+                   document_category, doc_type,
                    created_at, updated_at
             FROM ocr_pipeline_sessions
             ORDER BY created_at DESC
@@ -213,6 +220,18 @@ async def delete_session_db(session_id: str) -> bool:
         return result == "DELETE 1"
 
 
+async def delete_all_sessions_db() -> int:
+    """Delete all sessions. Returns number of deleted rows."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        result = await conn.execute("DELETE FROM ocr_pipeline_sessions")
+        # result is e.g. "DELETE 5"
+        try:
+            return int(result.split()[-1])
+        except (ValueError, IndexError):
+            return 0
+
+
 # =============================================================================
 # HELPER
 # =============================================================================
@@ -235,7 +254,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
             result[key] = result[key].isoformat()
 
     # JSONB → parsed (asyncpg returns str for JSONB)
-    for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result']:
+    for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
         if key in result and result[key] is not None:
             if isinstance(result[key], str):
                 result[key] = json.loads(result[key])