feat: Orientierung + Zuschneiden als Schritte 1-2 in OCR-Pipeline

Zwei neue Wizard-Schritte vor Begradigung: - Step 1: Orientierungserkennung (0/90/180/270° via Tesseract OSD) - Step 2: Seitenrand-Erkennung und Zuschnitt (Scannerraender entfernen) Backend: - orientation_crop_api.py: POST /orientation, POST /crop, POST /crop/skip - page_crop.py: detect_and_crop_page() mit Format-Erkennung (A4/A5/Letter) - Session-Store: orientation_result, crop_result Felder - Pipeline nutzt zugeschnittenes Bild fuer Deskew/Dewarp Frontend: - StepOrientation.tsx: Upload + Auto-Orientierung + Vorher/Nachher - StepCrop.tsx: Auto-Crop + Format-Badge + Ueberspringen-Option - Pipeline-Stepper: 10 Schritte (war 8) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 23:55:23 +01:00
parent 9a5a35bff1
commit 2763631711
12 changed files with 1247 additions and 259 deletions
--- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
@@ -3,6 +3,8 @@
 import { useCallback, useEffect, useState } from 'react'
 import { PagePurpose } from '@/components/common/PagePurpose'
 import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
 import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
 import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
 import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
 import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
 import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
@@ -196,7 +198,7 @@ export default function OcrPipelinePage() {
    setCurrentStep(nextStep)
  }
-  const handleDeskewComplete = (sid: string) => {
+  const handleOrientationComplete = (sid: string) => {
    setSessionId(sid)
    // Reload session list to show the new session
    loadSessions()
@@ -270,14 +272,16 @@ export default function OcrPipelinePage() {
  }
  const stepNames: Record<number, string> = {
-    1: 'Begradigung',
+    1: 'Orientierung',
-    2: 'Entzerrung',
+    2: 'Zuschneiden',
-    3: 'Spalten',
+    3: 'Begradigung',
-    4: 'Zeilen',
+    4: 'Entzerrung',
-    5: 'Woerter',
+    5: 'Spalten',
-    6: 'Korrektur',
+    6: 'Zeilen',
-    7: 'Rekonstruktion',
+    7: 'Woerter',
-    8: 'Validierung',
+    8: 'Korrektur',
    9: 'Rekonstruktion',
    10: 'Validierung',
  }
  const reprocessFromStep = useCallback(async (uiStep: number) => {
@@ -306,20 +310,24 @@ export default function OcrPipelinePage() {
  const renderStep = () => {
    switch (currentStep) {
      case 0:
-        return <StepDeskew sessionId={sessionId} onNext={handleDeskewComplete} />
+        return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
      case 1:
-        return <StepDewarp sessionId={sessionId} onNext={handleDewarpNext} />
+        return <StepCrop sessionId={sessionId} onNext={handleNext} />
      case 2:
-        return <StepColumnDetection sessionId={sessionId} onNext={handleNext} />
+        return <StepDeskew sessionId={sessionId} onNext={handleNext} />
      case 3:
-        return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
+        return <StepDewarp sessionId={sessionId} onNext={handleDewarpNext} />
      case 4:
-        return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
+        return <StepColumnDetection sessionId={sessionId} onNext={handleNext} />
      case 5:
-        return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
+        return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
      case 6:
-        return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
+        return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
      case 7:
        return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
      case 8:
        return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
      case 9:
        return <StepGroundTruth sessionId={sessionId} onNext={handleNext} />
      default:
        return null
--- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
+++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
@@ -57,6 +57,26 @@ export interface DocumentTypeResult {
  duration_seconds?: number
 }
 export interface OrientationResult {
  orientation_degrees: number
  corrected: boolean
  duration_seconds: number
 }
 export interface CropResult {
  crop_applied: boolean
  crop_rect?: { x: number; y: number; width: number; height: number }
  crop_rect_pct?: { x: number; y: number; width: number; height: number }
  original_size: { width: number; height: number }
  cropped_size: { width: number; height: number }
  detected_format?: string
  format_confidence?: number
  aspect_ratio?: number
  border_fractions?: { top: number; bottom: number; left: number; right: number }
  skipped?: boolean
  duration_seconds?: number
 }
 export interface SessionInfo {
  session_id: string
  filename: string
@@ -67,6 +87,8 @@ export interface SessionInfo {
  current_step?: number
  document_category?: DocumentCategory
  doc_type?: string
  orientation_result?: OrientationResult
  crop_result?: CropResult
  deskew_result?: DeskewResult
  dewarp_result?: DewarpResult
  column_result?: ColumnResult
@@ -85,7 +107,6 @@ export interface DeskewResult {
  angle_applied: number
  method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined'
  confidence: number
  orientation_degrees?: number
  duration_seconds: number
  deskewed_image_url: string
  binarized_image_url: string
@@ -288,6 +309,8 @@ export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [
 ]
 export const PIPELINE_STEPS: PipelineStep[] = [
  { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
  { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
  { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
  { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
  { id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
--- a/admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
+++ b/admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
@@ -59,11 +59,6 @@ export function DeskewControls({
      {/* Results */}
      {deskewResult && (
        <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
          {deskewResult.orientation_degrees ? (
            <div className="flex items-center gap-2 mb-2 px-2 py-1 bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 rounded text-xs">
              Seite wurde um {deskewResult.orientation_degrees}° gedreht (Orientierungskorrektur)
            </div>
          ) : null}
          <div className="flex flex-wrap items-center gap-3 text-sm">
            <div>
              <span className="text-gray-500">Winkel:</span>{' '}
--- a/admin-lehrer/components/ocr-pipeline/StepCrop.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepCrop.tsx
@@ -0,0 +1,185 @@
 'use client'
 import { useEffect, useState } from 'react'
 import type { CropResult } from '@/app/(admin)/ai/ocr-pipeline/types'
 import { ImageCompareView } from './ImageCompareView'
 const KLAUSUR_API = '/klausur-api'
 interface StepCropProps {
  sessionId: string | null
  onNext: () => void
 }
 export function StepCrop({ sessionId, onNext }: StepCropProps) {
  const [cropResult, setCropResult] = useState<CropResult | null>(null)
  const [cropping, setCropping] = useState(false)
  const [error, setError] = useState<string | null>(null)
  const [hasRun, setHasRun] = useState(false)
  // Auto-trigger crop on mount
  useEffect(() => {
    if (!sessionId || hasRun) return
    setHasRun(true)
    const runCrop = async () => {
      setCropping(true)
      setError(null)
      try {
        // Check if session already has crop result
        const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
        if (sessionRes.ok) {
          const sessionData = await sessionRes.json()
          if (sessionData.crop_result) {
            setCropResult(sessionData.crop_result)
            setCropping(false)
            return
          }
        }
        const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop`, {
          method: 'POST',
        })
        if (!res.ok) {
          throw new Error('Zuschnitt fehlgeschlagen')
        }
        const data = await res.json()
        setCropResult(data)
      } catch (e) {
        setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
      } finally {
        setCropping(false)
      }
    }
    runCrop()
  }, [sessionId, hasRun])
  const handleSkip = async () => {
    if (!sessionId) return
    try {
      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/crop/skip`, {
        method: 'POST',
      })
      if (res.ok) {
        const data = await res.json()
        setCropResult(data)
      }
    } catch (e) {
      console.error('Skip crop failed:', e)
    }
    onNext()
  }
  if (!sessionId) {
    return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
  }
  const orientedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/oriented`
  const croppedUrl = cropResult
    ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
    : null
  return (
    <div className="space-y-4">
      {/* Loading indicator */}
      {cropping && (
        <div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
          <div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
          Scannerraender werden erkannt...
        </div>
      )}
      {/* Image comparison */}
      <ImageCompareView
        originalUrl={orientedUrl}
        deskewedUrl={croppedUrl}
        showGrid={false}
        showBinarized={false}
        binarizedUrl={null}
        leftLabel="Orientiert"
        rightLabel="Zugeschnitten"
      />
      {/* Crop result info */}
      {cropResult && (
        <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
          <div className="flex flex-wrap items-center gap-3 text-sm">
            {cropResult.crop_applied ? (
              <>
                <span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
                  ✂️ Zugeschnitten
                </span>
                {cropResult.detected_format && (
                  <>
                    <div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
                    <span className="text-gray-600 dark:text-gray-400">
                      Format: <span className="font-medium">{cropResult.detected_format}</span>
                      {cropResult.format_confidence != null && (
                        <span className="text-gray-400 ml-1">
                          ({Math.round(cropResult.format_confidence * 100)}%)
                        </span>
                      )}
                    </span>
                  </>
                )}
                <div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
                <span className="text-gray-400 text-xs">
                  {cropResult.original_size.width}x{cropResult.original_size.height} → {cropResult.cropped_size.width}x{cropResult.cropped_size.height}
                </span>
                {cropResult.border_fractions && (
                  <>
                    <div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
                    <span className="text-gray-400 text-xs">
                      Raender: O={pct(cropResult.border_fractions.top)} U={pct(cropResult.border_fractions.bottom)} L={pct(cropResult.border_fractions.left)} R={pct(cropResult.border_fractions.right)}
                    </span>
                  </>
                )}
              </>
            ) : (
              <span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
                ✓ Kein Zuschnitt noetig
              </span>
            )}
            {cropResult.duration_seconds != null && (
              <span className="text-gray-400 text-xs ml-auto">
                {cropResult.duration_seconds}s
              </span>
            )}
          </div>
        </div>
      )}
      {/* Action buttons */}
      {cropResult && (
        <div className="flex justify-between">
          <button
            onClick={handleSkip}
            className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors"
          >
            Ueberspringen
          </button>
          <button
            onClick={onNext}
            className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
          >
            Weiter &rarr;
          </button>
        </div>
      )}
      {error && (
        <div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
          {error}
        </div>
      )}
    </div>
  )
 }
 function pct(v: number): string {
  return `${(v * 100).toFixed(1)}%`
 }
--- a/admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
@@ -8,29 +8,27 @@ import { ImageCompareView } from './ImageCompareView'
 const KLAUSUR_API = '/klausur-api'
 interface StepDeskewProps {
-  sessionId?: string | null
+  sessionId: string | null
-  onNext: (sessionId: string) => void
+  onNext: () => void
 }
-export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewProps) {
+export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
  const [session, setSession] = useState<SessionInfo | null>(null)
  const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
  const [uploading, setUploading] = useState(false)
  const [deskewing, setDeskewing] = useState(false)
  const [applying, setApplying] = useState(false)
  const [showBinarized, setShowBinarized] = useState(false)
  const [showGrid, setShowGrid] = useState(true)
  const [error, setError] = useState<string | null>(null)
-  const [dragOver, setDragOver] = useState(false)
+  const [hasAutoRun, setHasAutoRun] = useState(false)
  const [sessionName, setSessionName] = useState('')
-  // Reload session data when navigating back from a later step
+  // Load session and auto-trigger deskew
  useEffect(() => {
-    if (!existingSessionId || session) return
+    if (!sessionId || session) return
-    const loadSession = async () => {
+    const loadAndDeskew = async () => {
      try {
-        const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
+        const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
        if (!res.ok) return
        const data = await res.json()
@@ -39,57 +37,27 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
          filename: data.filename,
          image_width: data.image_width,
          image_height: data.image_height,
-          original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
+          // Use cropped image as "before" view
          original_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`,
        }
        setSession(sessionInfo)
-        // Reconstruct deskew result from session data
+        // If deskew result already exists, use it
        if (data.deskew_result) {
          const dr: DeskewResult = {
            ...data.deskew_result,
-            deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}/image/deskewed`,
+            deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`,
-            binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}/image/binarized`,
+            binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/binarized`,
          }
          setDeskewResult(dr)
-        }
+          return
      } catch (e) {
        console.error('Failed to reload session:', e)
      }
        }
-    loadSession()
+        // Auto-trigger deskew if not already done
-  }, [existingSessionId, session])
+        if (!hasAutoRun) {
-
+          setHasAutoRun(true)
  const handleUpload = useCallback(async (file: File) => {
    setUploading(true)
    setError(null)
    setDeskewResult(null)
    try {
      const formData = new FormData()
      formData.append('file', file)
      if (sessionName.trim()) {
        formData.append('name', sessionName.trim())
      }
      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
        method: 'POST',
        body: formData,
      })
      if (!res.ok) {
        const err = await res.json().catch(() => ({ detail: res.statusText }))
        throw new Error(err.detail || 'Upload fehlgeschlagen')
      }
      const data: SessionInfo = await res.json()
      // Prepend API prefix to relative URLs
      data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
      setSession(data)
      // Auto-trigger deskew
          setDeskewing(true)
-      const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/deskew`, {
+          const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew`, {
            method: 'POST',
          })
@@ -101,21 +69,24 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
          deskewData.deskewed_image_url = `${KLAUSUR_API}${deskewData.deskewed_image_url}`
          deskewData.binarized_image_url = `${KLAUSUR_API}${deskewData.binarized_image_url}`
          setDeskewResult(deskewData)
        }
      } catch (e) {
-      setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
+        setError(e instanceof Error ? e.message : 'Fehler beim Laden')
      } finally {
      setUploading(false)
        setDeskewing(false)
      }
-  }, [])
+    }
    loadAndDeskew()
  }, [sessionId, session, hasAutoRun])
  const handleManualDeskew = useCallback(async (angle: number) => {
-    if (!session) return
+    if (!sessionId) return
    setApplying(true)
    setError(null)
    try {
-      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/deskew/manual`, {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/deskew/manual`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ angle }),
@@ -130,7 +101,6 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
              ...prev,
              angle_applied: data.angle_applied,
              method_used: data.method_used,
              // Force reload by appending timestamp
              deskewed_image_url: `${KLAUSUR_API}${data.deskewed_image_url}?t=${Date.now()}`,
            }
          : null,
@@ -140,12 +110,12 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
    } finally {
      setApplying(false)
    }
-  }, [session])
+  }, [sessionId])
  const handleGroundTruth = useCallback(async (gt: DeskewGroundTruth) => {
-    if (!session) return
+    if (!sessionId) return
    try {
-      await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/ground-truth/deskew`, {
+      await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/deskew`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify(gt),
@@ -153,89 +123,21 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
    } catch (e) {
      console.error('Ground truth save failed:', e)
    }
-  }, [session])
+  }, [sessionId])
-  const handleDrop = useCallback((e: React.DragEvent) => {
+  if (!sessionId) {
-    e.preventDefault()
+    return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
    setDragOver(false)
    const file = e.dataTransfer.files[0]
    if (file) handleUpload(file)
  }, [handleUpload])
  const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
    const file = e.target.files?.[0]
    if (file) handleUpload(file)
  }, [handleUpload])
  // Upload area (no session yet)
  if (!session) {
    return (
      <div className="space-y-4">
        {/* Session name input */}
        <div>
          <label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
            Session-Name (optional)
          </label>
          <input
            type="text"
            value={sessionName}
            onChange={(e) => setSessionName(e.target.value)}
            placeholder="z.B. Unit 3 Seite 42"
            className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
          />
        </div>
        <div
          onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
          onDragLeave={() => setDragOver(false)}
          onDrop={handleDrop}
          className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
            dragOver
              ? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
              : 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
          }`}
        >
          {uploading ? (
            <div className="text-gray-500">
              <div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
              <p>Wird hochgeladen...</p>
            </div>
          ) : (
            <>
              <div className="text-4xl mb-3">📄</div>
              <p className="text-gray-600 dark:text-gray-400 mb-2">
                PDF oder Bild hierher ziehen
              </p>
              <p className="text-sm text-gray-400 mb-4">oder</p>
              <label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
                Datei auswaehlen
                <input
                  type="file"
                  accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
                  onChange={handleFileInput}
                  className="hidden"
                />
              </label>
            </>
          )}
        </div>
        {error && (
          <div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
            {error}
          </div>
        )}
      </div>
    )
  }
  // Session active: show comparison + controls
  return (
    <div className="space-y-4">
      {/* Filename */}
      {session && (
        <div className="text-sm text-gray-500 dark:text-gray-400">
          Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
          {' '}({session.image_width} x {session.image_height} px)
        </div>
      )}
      {/* Loading indicator */}
      {deskewing && (
@@ -246,13 +148,17 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
      )}
      {/* Image comparison */}
      {session && (
        <ImageCompareView
          originalUrl={session.original_image_url}
          deskewedUrl={deskewResult?.deskewed_image_url ?? null}
          showGrid={showGrid}
          showBinarized={showBinarized}
          binarizedUrl={deskewResult?.binarized_image_url ?? null}
          leftLabel="Zugeschnitten"
          rightLabel="Begradigt"
        />
      )}
      {/* Controls */}
      <DeskewControls
@@ -263,7 +169,7 @@ export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewP
        onToggleGrid={() => setShowGrid((v) => !v)}
        onManualDeskew={handleManualDeskew}
        onGroundTruth={handleGroundTruth}
-        onNext={() => session && onNext(session.session_id)}
+        onNext={onNext}
        isApplying={applying}
      />
--- a/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx
@@ -0,0 +1,247 @@
 'use client'
 import { useCallback, useEffect, useState } from 'react'
 import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
 import { ImageCompareView } from './ImageCompareView'
 const KLAUSUR_API = '/klausur-api'
 interface StepOrientationProps {
  sessionId?: string | null
  onNext: (sessionId: string) => void
 }
 export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) {
  const [session, setSession] = useState<SessionInfo | null>(null)
  const [orientationResult, setOrientationResult] = useState<OrientationResult | null>(null)
  const [uploading, setUploading] = useState(false)
  const [detecting, setDetecting] = useState(false)
  const [error, setError] = useState<string | null>(null)
  const [dragOver, setDragOver] = useState(false)
  const [sessionName, setSessionName] = useState('')
  // Reload session data when navigating back
  useEffect(() => {
    if (!existingSessionId || session) return
    const loadSession = async () => {
      try {
        const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
        if (!res.ok) return
        const data = await res.json()
        const sessionInfo: SessionInfo = {
          session_id: data.session_id,
          filename: data.filename,
          image_width: data.image_width,
          image_height: data.image_height,
          original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
        }
        setSession(sessionInfo)
        if (data.orientation_result) {
          setOrientationResult(data.orientation_result)
        }
      } catch (e) {
        console.error('Failed to reload session:', e)
      }
    }
    loadSession()
  }, [existingSessionId, session])
  const handleUpload = useCallback(async (file: File) => {
    setUploading(true)
    setError(null)
    setOrientationResult(null)
    try {
      const formData = new FormData()
      formData.append('file', file)
      if (sessionName.trim()) {
        formData.append('name', sessionName.trim())
      }
      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
        method: 'POST',
        body: formData,
      })
      if (!res.ok) {
        const err = await res.json().catch(() => ({ detail: res.statusText }))
        throw new Error(err.detail || 'Upload fehlgeschlagen')
      }
      const data: SessionInfo = await res.json()
      data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
      setSession(data)
      // Auto-trigger orientation detection
      setDetecting(true)
      const orientRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/orientation`, {
        method: 'POST',
      })
      if (!orientRes.ok) {
        throw new Error('Orientierungserkennung fehlgeschlagen')
      }
      const orientData = await orientRes.json()
      setOrientationResult({
        orientation_degrees: orientData.orientation_degrees,
        corrected: orientData.corrected,
        duration_seconds: orientData.duration_seconds,
      })
    } catch (e) {
      setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
    } finally {
      setUploading(false)
      setDetecting(false)
    }
  }, [sessionName])
  const handleDrop = useCallback((e: React.DragEvent) => {
    e.preventDefault()
    setDragOver(false)
    const file = e.dataTransfer.files[0]
    if (file) handleUpload(file)
  }, [handleUpload])
  const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
    const file = e.target.files?.[0]
    if (file) handleUpload(file)
  }, [handleUpload])
  // Upload area (no session yet)
  if (!session) {
    return (
      <div className="space-y-4">
        {/* Session name input */}
        <div>
          <label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
            Session-Name (optional)
          </label>
          <input
            type="text"
            value={sessionName}
            onChange={(e) => setSessionName(e.target.value)}
            placeholder="z.B. Unit 3 Seite 42"
            className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
          />
        </div>
        <div
          onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
          onDragLeave={() => setDragOver(false)}
          onDrop={handleDrop}
          className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
            dragOver
              ? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
              : 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
          }`}
        >
          {uploading ? (
            <div className="text-gray-500">
              <div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
              <p>Wird hochgeladen...</p>
            </div>
          ) : (
            <>
              <div className="text-4xl mb-3">📄</div>
              <p className="text-gray-600 dark:text-gray-400 mb-2">
                PDF oder Bild hierher ziehen
              </p>
              <p className="text-sm text-gray-400 mb-4">oder</p>
              <label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
                Datei auswaehlen
                <input
                  type="file"
                  accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
                  onChange={handleFileInput}
                  className="hidden"
                />
              </label>
            </>
          )}
        </div>
        {error && (
          <div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
            {error}
          </div>
        )}
      </div>
    )
  }
  // Session active: show orientation result
  const orientedUrl = orientationResult
    ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/image/oriented`
    : null
  return (
    <div className="space-y-4">
      {/* Filename */}
      <div className="text-sm text-gray-500 dark:text-gray-400">
        Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
        {' '}({session.image_width} x {session.image_height} px)
      </div>
      {/* Loading indicator */}
      {detecting && (
        <div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
          <div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
          Orientierung wird erkannt...
        </div>
      )}
      {/* Image comparison */}
      <ImageCompareView
        originalUrl={session.original_image_url}
        deskewedUrl={orientedUrl}
        showGrid={false}
        showBinarized={false}
        binarizedUrl={null}
        leftLabel="Original"
        rightLabel="Orientiert"
      />
      {/* Orientation result badge */}
      {orientationResult && (
        <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
          <div className="flex items-center gap-3 text-sm">
            {orientationResult.corrected ? (
              <span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
                🔄 {orientationResult.orientation_degrees}° korrigiert
              </span>
            ) : (
              <span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-400 text-xs font-medium">
                ✓ 0° (keine Drehung noetig)
              </span>
            )}
            <span className="text-gray-400 text-xs">
              {orientationResult.duration_seconds}s
            </span>
          </div>
        </div>
      )}
      {/* Next button */}
      {orientationResult && (
        <div className="flex justify-end">
          <button
            onClick={() => onNext(session.session_id)}
            className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
          >
            Weiter &rarr;
          </button>
        </div>
      )}
      {error && (
        <div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
          {error}
        </div>
      )}
    </div>
  )
 }
--- a/docs-src/services/klausur-service/OCR-Pipeline.md
+++ b/docs-src/services/klausur-service/OCR-Pipeline.md
@@ -1,12 +1,12 @@
 # OCR Pipeline - Schrittweise Seitenrekonstruktion
-**Version:** 3.0.0
+**Version:** 4.0.0
-**Status:** Produktiv (Schritte 1–8 implementiert)
+**Status:** Produktiv (Schritte 1–10 implementiert)
 **URL:** https://macmini:3002/ai/ocr-pipeline
 ## Uebersicht
-Die OCR Pipeline zerlegt den OCR-Prozess in **8 einzelne Schritte**, um eingescannte Seiten
+Die OCR Pipeline zerlegt den OCR-Prozess in **10 einzelne Schritte**, um eingescannte Seiten
 aus mehrspaltig gedruckten Schulbuechern Wort fuer Wort zu rekonstruieren.
 Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten versehen werden.
@@ -16,14 +16,16 @@ Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten v
 | Schritt | Name | Beschreibung | Status |
 |---------|------|--------------|--------|
-| 1 | Begradigung (Deskew) | Scan begradigen (Hough Lines + Word Alignment) | Implementiert |
+| 1 | Orientierung | 90/180/270° Drehungen von Scannern korrigieren | Implementiert |
-| 2 | Entzerrung (Dewarp) | Buchwoelbung entzerren (Vertikalkanten-Analyse) | Implementiert |
+| 2 | Zuschneiden (Crop) | Scannerraender entfernen, Papierformat (A4) erkennen | Implementiert |
-| 3 | Spaltenerkennung | Unsichtbare Spalten finden (Projektionsprofile + Wortvalidierung) | Implementiert |
+| 3 | Begradigung (Deskew) | Scan begradigen (Hough Lines + Word Alignment) | Implementiert |
-| 4 | Zeilenerkennung | Horizontale Zeilen + Kopf-/Fusszeilen-Klassifikation + Luecken-Heilung | Implementiert |
+| 4 | Entzerrung (Dewarp) | Buchwoelbung entzerren (Vertikalkanten-Analyse) | Implementiert |
-| 5 | Worterkennung | Hybrid-Grid: Breite Spalten full-page, schmale cell-crop | Implementiert |
+| 5 | Spaltenerkennung | Unsichtbare Spalten finden (Projektionsprofile + Wortvalidierung) | Implementiert |
-| 6 | Korrektur | Zeichenverwirrung + regel-basierte Rechtschreibkorrektur (SSE-Stream) | Implementiert |
+| 6 | Zeilenerkennung | Horizontale Zeilen + Kopf-/Fusszeilen-Klassifikation + Luecken-Heilung | Implementiert |
-| 7 | Rekonstruktion | Interaktive Zellenbearbeitung auf Bildhintergrund (Fabric.js) | Implementiert |
+| 7 | Worterkennung | Hybrid-Grid: Breite Spalten full-page, schmale cell-crop | Implementiert |
-| 8 | Validierung | Ground-Truth-Vergleich und Qualitaetspruefung | Implementiert |
+| 8 | Korrektur | Zeichenverwirrung + regel-basierte Rechtschreibkorrektur (SSE-Stream) | Implementiert |
 | 9 | Rekonstruktion | Interaktive Zellenbearbeitung auf Bildhintergrund (Fabric.js) | Implementiert |
 | 10 | Validierung | Ground-Truth-Vergleich und Qualitaetspruefung | Implementiert |
 ---
@@ -206,6 +208,7 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
 |---------|------|--------------|
 | `POST` | `/sessions/{id}/dewarp` | Automatische Entzerrung |
 | `POST` | `/sessions/{id}/dewarp/manual` | Manueller Scherbungswinkel |
 | `POST` | `/sessions/{id}/adjust-combined` | Kombinierte Rotation + Shear Feinabstimmung |
 | `POST` | `/sessions/{id}/ground-truth/dewarp` | Ground Truth speichern |
 ### Schritt 3: Spalten
@@ -274,16 +277,48 @@ Die Dewarp-Erkennung misst die **vertikale Spaltenkippung** (dx/dy) statt Textze
 | Ensemble Min-Confidence | 0.35 | Mindest-Konfidenz fuer Korrektur |
 | Quality-Gate Skip | < 0.5° | Kleine Korrekturen ueberspringen Quality-Gate |
 ### Feinabstimmung (Combined Adjust)
 Der Endpoint `POST /sessions/{id}/adjust-combined` erlaubt die kombinierte Feinabstimmung von
 Rotation und Shear in einem Schritt. Im Frontend stehen **7 Schieberegler** zur Verfuegung:
 **Rotation (3 Paesse):**
 | Slider | Bereich | Beschreibung |
 |--------|---------|--------------|
 | P1 Iterative | ±5° | Erster Deskew-Pass (Hough Lines) |
 | P2 Word-Alignment | ±3° | Zweiter Pass (Wort-Ausrichtung) |
 | P3 Textline | ±3° | Dritter Pass (Textzeilen-Regression) |
 Die Summe aller drei ergibt den finalen Rotationswinkel.
 **Shear (4 Methoden, Radio-Auswahl):**
 | Slider | Bereich | Beschreibung |
 |--------|---------|--------------|
 | A: Textline Drift | ±5° | Textzeilen-Drift |
 | B: Projection Profile | ±5° | 2-Pass Projektionsprofil |
 | C: Vertical Edges | ±5° | Vertikalkanten-Analyse |
 | D: Ensemble | ±5° | Gewichteter Ensemble-Wert |
 Nur der per Radio-Button ausgewaehlte Shear-Wert wird verwendet.
 ```
 POST /sessions/{id}/adjust-combined
 Body: {"rotation_degrees": 1.23, "shear_degrees": -0.45}
 Response: {"method_used": "manual_combined", "shear_degrees": -0.45, "dewarped_image_url": "..."}
 ```
 ---
 ## Schritt 3: Spaltenerkennung (Detail)
 ### Algorithmus: `detect_column_geometry()`
-Zweistufige Erkennung: vertikale Projektionsprofile finden Luecken, Wort-Bounding-Boxes validieren.
+Mehrstufige Erkennung: Seite segmentieren, vertikale Projektionsprofile finden Luecken, Wort-Bounding-Boxes validieren.
 ```
-Bild → Binarisierung → Vertikalprofil → Lueckenerkennung → Wort-Validierung → ColumnGeometry
+Bild → Binarisierung → Seiten-Segmentierung → Vertikalprofil → Lueckenerkennung → Wort-Validierung → ColumnGeometry
 ```
 **Wichtige Implementierungsdetails:**
@@ -293,6 +328,54 @@ Bild → Binarisierung → Vertikalprofil → Lueckenerkennung → Wort-Validier
 - **Phantom-Spalten-Filter (Step 9):** Spalten mit Breite < 3 % der Content-Breite UND < 3 Woerter werden als Artefakte entfernt; die angrenzenden Spalten schliessen die Luecke.
 - **Spaltenzuweisung:** Woerter werden anhand des groessten horizontalen Ueberlappungsbereichs einer Spalte zugeordnet.
 ### Seiten-Segmentierung an Sub-Headern
 Farbige Zwischenueberschriften (z.B. „Unit 4: Bonnie Scotland" mit blauem Hintergrund)
 erzeugen nach Binarisierung Tinte ueber die gesamte Seitenbreite. Diese Baender fuellen
 Spaltenluecken im vertikalen Projektionsprofil auf und fuehren zu fragmentierten Spalten
 (z.B. 11 statt 5).
 **Loesung: Horizontale Gap-Segmentierung (Step 2b)**
 1. **Horizontales Projektionsprofil** berechnen: Zeilensummen ueber den Content-Bereich
 2. **Leere Zeilen** erkennen: Zeilen mit < 2% Tinten-Dichte (`H_GAP_THRESH = 0.02`)
 3. **Gaps sammeln**: Zusammenhaengende leere Zeilen zu Gaps buendeln (Mindestlaenge: `max(5, h/200)`)
 4. **Grosse Gaps identifizieren**: Gaps > 1.8× Median-Gap-Hoehe = Sub-Header-Trennungen
 5. **Segmente bilden**: Seite an grossen Gaps aufteilen
 6. **Groesstes Segment waehlen**: Das hoechste Segment wird fuer die vertikale Projektion verwendet
 ```
 ┌─────────────────────────────────┐
 │  Header / Titel                 │  ─── grosser Gap ───
 ├─────────────────────────────────┤
 │  EN  │  DE  │ Example │ Page   │  ← Segment 1 (groesster)
 │  ... │  ... │ ...     │ ...    │
 ├─────────────────────────────────┤
 │  Unit 4: Bonnie Scotland        │  ─── grosser Gap ───
 ├─────────────────────────────────┤
 │  EN  │  DE  │ Example │ Page   │  ← Segment 2
 │  ... │  ... │ ...     │ ...    │
 └─────────────────────────────────┘
 ```
 **Segment-gefilterte Wort-Validierung:**
 Die Wort-Validierung (Step 5) nutzt nur Tesseract-Woerter **innerhalb des gewaehlten Segments**.
 Woerter aus Sub-Header-Bereichen (die die volle Breite einnehmen) werden so ausgeschlossen
 und koennen die Spaltenluecken-Validierung nicht verfaelschen.
 ### Word-Coverage Gap Detection (Fallback)
 Wenn die pixel-basierte Projektion keine ausreichenden Spaltenluecken findet
 (z.B. bei Seiten mit Illustrationen, die Spaltenluecken teilweise verdecken),
 greift ein Fallback auf Basis der Tesseract-Wort-Bounding-Boxes:
 1. X-Achse in 2px-Bins aufteilen
 2. Pro Bin zaehlen, wie viele Segment-Woerter ihn ueberdecken
 3. Zusammenhaengende Bins mit 0 Woertern = Gap-Kandidaten
 4. Nur Gaps im inneren 90%-Bereich beruecksichtigen (Raender ignorieren)
 5. Gaps mit Mindestbreite (`max(8px, content_w * 0.5%)`) werden als Spaltenluecken akzeptiert
 ### Sub-Spalten-Erkennung: `_detect_sub_columns()`
 Erkennt versteckte Sub-Spalten innerhalb breiter Spalten (z.B. Seitenzahl-Spalte links neben EN-Vokabeln).
@@ -658,7 +741,7 @@ CREATE TABLE ocr_pipeline_sessions (
 | Schraeg gedruckte Seiten | Deskew erkennt Text-Rotation, nicht Seiten-Rotation | Manueller Winkel |
 | Sehr kleine Schrift (< 8pt) | Tesseract PSM 7 braucht min. Zeichengroesse | Vorher zoomen |
 | Handgeschriebene Eintraege | Tesseract/RapidOCR sind fuer Druckschrift optimiert | TrOCR-Engine |
-| Mehr als 4 Spalten | Projektionsprofil kann verschmelzen | Manuelle Spalten |
+| Mehr als 5 Spalten | Projektionsprofil kann verschmelzen (Segmentierung hilft) | Manuelle Spalten |
 | Farbige Marker (rot/blau) | HSV-Erkennung erzeugt False Positives | Manuell im Rekonstruktions-Editor |
 | 15%-Schwelle nicht breit validiert | Nur an einem Arbeitsblatt-Typ getestet | Diverse Schulbuchseiten testen |
@@ -699,6 +782,8 @@ ssh macmini "/usr/local/bin/docker compose -f /Users/benjaminadmin/Projekte/brea
 | Datum | Version | Aenderung |
 |-------|---------|----------|
 | 2026-03-05 | 3.1.0 | Spalten: Seiten-Segmentierung an Sub-Headern, Word-Coverage Fallback, Segment-gefilterte Validierung |
 | 2026-03-05 | 3.0.1 | Dewarp: Feinabstimmung mit 7 Schiebereglern (3 Rotation + 4 Shear), Combined-Adjust-Endpoint |
 | 2026-03-05 | 3.0.0 | Doku-Update: Dokumenttyp-Erkennung, Hybrid-Grid, Sub-Column-Detection, Pipeline-Pfade |
 | 2026-03-04 | 2.2.0 | Dewarp: Vertikalkanten-Drift statt Textzeilen-Neigung, Schwellenwerte gesenkt |
 | 2026-03-04 | 2.1.0 | Sub-Column-Detection, expand_narrow_columns, Fabric.js Editor, PDF/DOCX-Export |
--- a/klausur-service/backend/main.py
+++ b/klausur-service/backend/main.py
@@ -42,7 +42,8 @@ try:
 except ImportError:
    trocr_router = None
 from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
-from ocr_pipeline_api import router as ocr_pipeline_router
+from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache
 from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
 from ocr_pipeline_session_store import init_ocr_pipeline_tables
 try:
    from handwriting_htr_api import router as htr_router
@@ -177,6 +178,8 @@ if trocr_router:
    app.include_router(trocr_router)      # TrOCR Handwriting OCR
 app.include_router(vocab_router)      # Vocabulary Worksheet Generator
 app.include_router(ocr_pipeline_router)  # OCR Pipeline (step-by-step)
 set_orientation_crop_cache(ocr_pipeline_cache)
 app.include_router(orientation_crop_router)  # OCR Pipeline: Orientation + Crop
 if htr_router:
    app.include_router(htr_router)            # Handwriting HTR (Klausur)
 if dsfa_rag_router:
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -1,15 +1,17 @@
 """
 OCR Pipeline API - Schrittweise Seitenrekonstruktion.
-Zerlegt den OCR-Prozess in 8 einzelne Schritte:
+Zerlegt den OCR-Prozess in 10 einzelne Schritte:
-1. Deskewing - Scan begradigen
+1. Orientierung - 90/180/270° Drehungen korrigieren (orientation_crop_api.py)
-2. Dewarping - Buchwoelbung entzerren
+2. Zuschneiden - Scannerraender entfernen (orientation_crop_api.py)
-3. Spaltenerkennung - Unsichtbare Spalten finden
+3. Deskewing - Scan begradigen
-4. Zeilenerkennung - Horizontale Zeilen + Kopf-/Fusszeilen
+4. Dewarping - Buchwoelbung entzerren
-5. Worterkennung - OCR mit Bounding Boxes
+5. Spaltenerkennung - Unsichtbare Spalten finden
-6. LLM-Korrektur - OCR-Fehler per LLM korrigieren
+6. Zeilenerkennung - Horizontale Zeilen + Kopf-/Fusszeilen
-7. Seitenrekonstruktion - Seite nachbauen
+7. Worterkennung - OCR mit Bounding Boxes
-8. Ground Truth Validierung - Gesamtpruefung
+8. LLM-Korrektur - OCR-Fehler per LLM korrigieren
 9. Seitenrekonstruktion - Seite nachbauen
 10. Ground Truth Validierung - Gesamtpruefung
 Lizenz: Apache 2.0
 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
@@ -54,7 +56,6 @@ from cv_vocab_pipeline import (
    deskew_image_by_word_alignment,
    deskew_image_iterative,
    deskew_two_pass,
    detect_and_fix_orientation,
    detect_column_geometry,
    detect_document_type,
    detect_row_geometry,
@@ -103,6 +104,8 @@ async def _load_session_to_cache(session_id: str) -> Dict[str, Any]:
        "id": session_id,
        **session,
        "original_bgr": None,
        "oriented_bgr": None,
        "cropped_bgr": None,
        "deskewed_bgr": None,
        "dewarped_bgr": None,
    }
@@ -110,6 +113,8 @@ async def _load_session_to_cache(session_id: str) -> Dict[str, Any]:
    # Decode images from DB into BGR numpy arrays
    for img_type, bgr_key in [
        ("original", "original_bgr"),
        ("oriented", "oriented_bgr"),
        ("cropped", "cropped_bgr"),
        ("deskewed", "deskewed_bgr"),
        ("dewarped", "dewarped_bgr"),
    ]:
@@ -252,8 +257,12 @@ async def create_session(
        "filename": filename,
        "name": session_name,
        "original_bgr": img_bgr,
        "oriented_bgr": None,
        "cropped_bgr": None,
        "deskewed_bgr": None,
        "dewarped_bgr": None,
        "orientation_result": None,
        "crop_result": None,
        "deskew_result": None,
        "dewarp_result": None,
        "ground_truth": {},
@@ -301,6 +310,10 @@ async def get_session_info(session_id: str):
        "doc_type": session.get("doc_type"),
    }
    if session.get("orientation_result"):
        result["orientation_result"] = session["orientation_result"]
    if session.get("crop_result"):
        result["crop_result"] = session["crop_result"]
    if session.get("deskew_result"):
        result["deskew_result"] = session["deskew_result"]
    if session.get("dewarp_result"):
@@ -427,7 +440,7 @@ async def _append_pipeline_log(
@router.get("/sessions/{session_id}/image/{image_type}")
 async def get_image(session_id: str, image_type: str):
    """Serve session images: original, deskewed, dewarped, binarized, columns-overlay, or rows-overlay."""
-    valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay", "words-overlay", "clean"}
+    valid_types = {"original", "oriented", "cropped", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay", "words-overlay", "clean"}
    if image_type not in valid_types:
        raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
@@ -470,22 +483,13 @@ async def auto_deskew(session_id: str):
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)
-    img_bgr = cached.get("original_bgr")
+    # Use cropped image as input (from step 2), fall back to oriented, then original
    img_bgr = cached.get("cropped_bgr") or cached.get("oriented_bgr") or cached.get("original_bgr")
    if img_bgr is None:
-        raise HTTPException(status_code=400, detail="Original image not available")
+        raise HTTPException(status_code=400, detail="No image available for deskewing")
    t0 = time.time()
    # Orientation detection (fix 90/180/270° rotations from scanners)
    img_bgr, orientation_deg = detect_and_fix_orientation(img_bgr)
    if orientation_deg:
        # Update original in cache + DB so all subsequent steps use corrected image
        cached["original_bgr"] = img_bgr
        success_ori, ori_buf = cv2.imencode(".png", img_bgr)
        if success_ori:
            await update_session_db(session_id, original_png=ori_buf.tobytes())
        logger.info(f"OCR Pipeline: orientation corrected {orientation_deg}° for session {session_id}")
    # Two-pass deskew: iterative (±5°) + word-alignment residual check
    deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy())
@@ -534,7 +538,6 @@ async def auto_deskew(session_id: str):
        "angle_residual": round(angle_residual, 3),
        "angle_textline": round(angle_textline, 3),
        "angle_applied": round(angle_applied, 3),
        "orientation_degrees": orientation_deg,
        "method_used": method_used,
        "confidence": round(confidence, 2),
        "duration_seconds": round(duration, 2),
@@ -550,7 +553,7 @@ async def auto_deskew(session_id: str):
    db_update = {
        "deskewed_png": deskewed_png,
        "deskew_result": deskew_result,
-        "current_step": 2,
+        "current_step": 4,
    }
    if binarized_png:
        db_update["binarized_png"] = binarized_png
@@ -563,7 +566,6 @@ async def auto_deskew(session_id: str):
                f"-> {method_used} total={angle_applied:.2f}")
    await _append_pipeline_log(session_id, "deskew", {
        "orientation": orientation_deg,
        "angle_applied": round(angle_applied, 3),
        "angle_iterative": round(angle_iterative, 3),
        "angle_residual": round(angle_residual, 3),
@@ -582,14 +584,14 @@ async def auto_deskew(session_id: str):
@router.post("/sessions/{session_id}/deskew/manual")
 async def manual_deskew(session_id: str, req: ManualDeskewRequest):
-    """Apply a manual rotation angle to the original image."""
+    """Apply a manual rotation angle to the cropped image."""
    if session_id not in _cache:
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)
-    img_bgr = cached.get("original_bgr")
+    img_bgr = cached.get("cropped_bgr") or cached.get("oriented_bgr") or cached.get("original_bgr")
    if img_bgr is None:
-        raise HTTPException(status_code=400, detail="Original image not available")
+        raise HTTPException(status_code=400, detail="No image available for deskewing")
    angle = max(-5.0, min(5.0, req.angle))
@@ -797,7 +799,7 @@ async def auto_dewarp(
        dewarped_png=dewarped_png,
        dewarp_result=dewarp_result,
        auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
-        current_step=3,
+        current_step=5,
    )
    logger.info(f"OCR Pipeline: dewarp session {session_id}: "
@@ -1109,7 +1111,7 @@ async def detect_columns(session_id: str):
        column_result=column_result,
        row_result=None,
        word_result=None,
-        current_step=3,
+        current_step=5,
    )
    # Update cache
@@ -1335,7 +1337,7 @@ async def detect_rows(session_id: str):
        session_id,
        row_result=row_result,
        word_result=None,
-        current_step=4,
+        current_step=6,
    )
    cached["row_result"] = row_result
@@ -1601,7 +1603,7 @@ async def detect_words(
    await update_session_db(
        session_id,
        word_result=word_result,
-        current_step=5,
+        current_step=7,
    )
    cached["word_result"] = word_result
@@ -1745,7 +1747,7 @@ async def _word_batch_stream_generator(
        word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
        vocab_entries = entries
-    await update_session_db(session_id, word_result=word_result, current_step=5)
+    await update_session_db(session_id, word_result=word_result, current_step=7)
    cached["word_result"] = word_result
    logger.info(f"OCR Pipeline SSE batch: words session {session_id}: "
@@ -1892,7 +1894,7 @@ async def _word_stream_generator(
    await update_session_db(
        session_id,
        word_result=word_result,
-        current_step=5,
+        current_step=7,
    )
    cached["word_result"] = word_result
@@ -2016,7 +2018,7 @@ async def run_llm_review(session_id: str, request: Request, stream: bool = False
        "duration_ms": result["duration_ms"],
        "entries_corrected": result["entries_corrected"],
    }
-    await update_session_db(session_id, word_result=word_result, current_step=6)
+    await update_session_db(session_id, word_result=word_result, current_step=8)
    if session_id in _cache:
        _cache[session_id]["word_result"] = word_result
@@ -2065,7 +2067,7 @@ async def _llm_review_stream_generator(
                    "duration_ms": event["duration_ms"],
                    "entries_corrected": event["entries_corrected"],
                }
-                await update_session_db(session_id, word_result=word_result, current_step=6)
+                await update_session_db(session_id, word_result=word_result, current_step=8)
                if session_id in _cache:
                    _cache[session_id]["word_result"] = word_result
@@ -2153,7 +2155,7 @@ async def save_reconstruction(session_id: str, request: Request):
    cell_updates = body.get("cells", [])
    if not cell_updates:
-        await update_session_db(session_id, current_step=7)
+        await update_session_db(session_id, current_step=9)
        return {"session_id": session_id, "updated": 0}
    # Build update map: cell_id -> new text
@@ -2189,7 +2191,7 @@ async def save_reconstruction(session_id: str, request: Request):
        if "entries" in word_result:
            word_result["entries"] = entries
-    await update_session_db(session_id, word_result=word_result, current_step=7)
+    await update_session_db(session_id, word_result=word_result, current_step=9)
    if session_id in _cache:
        _cache[session_id]["word_result"] = word_result
@@ -2572,7 +2574,7 @@ async def save_validation(session_id: str, req: ValidationRequest):
    """Save final validation results for step 8.
    Stores notes, score, and preserves any detected/generated image regions.
-    Sets current_step = 8 to mark pipeline as complete.
+    Sets current_step = 10 to mark pipeline as complete.
    """
    session = await get_session_db(session_id)
    if not session:
@@ -2585,7 +2587,7 @@ async def save_validation(session_id: str, req: ValidationRequest):
    validation["score"] = req.score
    ground_truth["validation"] = validation
-    await update_session_db(session_id, ground_truth=ground_truth, current_step=8)
+    await update_session_db(session_id, ground_truth=ground_truth, current_step=10)
    if session_id in _cache:
        _cache[session_id]["ground_truth"] = ground_truth
@@ -2619,12 +2621,14 @@ async def reprocess_session(session_id: str, request: Request):
    Body: {"from_step": 5}  (1-indexed step number)
    Clears downstream results:
-    - from_step <= 1: deskew_result, dewarp_result, column_result, row_result, word_result
+    - from_step <= 1: orientation_result, crop_result, deskew_result, dewarp_result, column_result, row_result, word_result
-    - from_step <= 2: dewarp_result, column_result, row_result, word_result
+    - from_step <= 2: crop_result, deskew_result, dewarp_result, column_result, row_result, word_result
-    - from_step <= 3: column_result, row_result, word_result
+    - from_step <= 3: deskew_result, dewarp_result, column_result, row_result, word_result
-    - from_step <= 4: row_result, word_result
+    - from_step <= 4: dewarp_result, column_result, row_result, word_result
-    - from_step <= 5: word_result (cells, vocab_entries)
+    - from_step <= 5: column_result, row_result, word_result
-    - from_step <= 6: word_result.llm_review only
+    - from_step <= 6: row_result, word_result
    - from_step <= 7: word_result (cells, vocab_entries)
    - from_step <= 8: word_result.llm_review only
    """
    session = await get_session_db(session_id)
    if not session:
@@ -2632,15 +2636,15 @@ async def reprocess_session(session_id: str, request: Request):
    body = await request.json()
    from_step = body.get("from_step", 1)
-    if not isinstance(from_step, int) or from_step < 1 or from_step > 7:
+    if not isinstance(from_step, int) or from_step < 1 or from_step > 9:
-        raise HTTPException(status_code=400, detail="from_step must be between 1 and 7")
+        raise HTTPException(status_code=400, detail="from_step must be between 1 and 9")
    update_kwargs: Dict[str, Any] = {"current_step": from_step}
    # Clear downstream data based on from_step
-    if from_step <= 5:
+    if from_step <= 7:
        update_kwargs["word_result"] = None
-    elif from_step == 6:
+    elif from_step == 8:
        # Only clear LLM review from word_result
        word_result = session.get("word_result")
        if word_result:
@@ -2648,14 +2652,18 @@ async def reprocess_session(session_id: str, request: Request):
            word_result.pop("llm_corrections", None)
            update_kwargs["word_result"] = word_result
-    if from_step <= 4:
+    if from_step <= 6:
        update_kwargs["row_result"] = None
-    if from_step <= 3:
+    if from_step <= 5:
        update_kwargs["column_result"] = None
-    if from_step <= 2:
+    if from_step <= 4:
        update_kwargs["dewarp_result"] = None
-    if from_step <= 1:
+    if from_step <= 3:
        update_kwargs["deskew_result"] = None
    if from_step <= 2:
        update_kwargs["crop_result"] = None
    if from_step <= 1:
        update_kwargs["orientation_result"] = None
    await update_session_db(session_id, **update_kwargs)
@@ -3074,7 +3082,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
                    deskewed_png=deskewed_png,
                    deskew_result=deskew_result,
                    auto_rotation_degrees=float(angle_applied),
-                    current_step=2,
+                    current_step=4,
                )
                session = await get_session_db(session_id)
@@ -3137,7 +3145,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
                    dewarped_png=dewarped_png,
                    dewarp_result=dewarp_result,
                    auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
-                    current_step=3,
+                    current_step=5,
                )
                session = await get_session_db(session_id)
@@ -3196,7 +3204,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
                cached["column_result"] = column_result
                await update_session_db(session_id, column_result=column_result,
-                                        row_result=None, word_result=None, current_step=4)
+                                        row_result=None, word_result=None, current_step=6)
                session = await get_session_db(session_id)
                steps_run.append("columns")
@@ -3273,7 +3281,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
                }
                cached["row_result"] = row_result
-                await update_session_db(session_id, row_result=row_result, current_step=5)
+                await update_session_db(session_id, row_result=row_result, current_step=7)
                session = await get_session_db(session_id)
                steps_run.append("rows")
@@ -3381,7 +3389,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
                    word_result_data["entry_count"] = len(entries)
                    word_result_data["summary"]["total_entries"] = len(entries)
-                await update_session_db(session_id, word_result=word_result_data, current_step=6)
+                await update_session_db(session_id, word_result=word_result_data, current_step=8)
                cached["word_result"] = word_result_data
                session = await get_session_db(session_id)
@@ -3426,7 +3434,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
                    word_result_updated["llm_reviewed"] = True
                    word_result_updated["llm_model"] = OLLAMA_REVIEW_MODEL
-                    await update_session_db(session_id, word_result=word_result_updated, current_step=7)
+                    await update_session_db(session_id, word_result=word_result_updated, current_step=9)
                    cached["word_result"] = word_result_updated
                    steps_run.append("llm_review")
--- a/klausur-service/backend/ocr_pipeline_session_store.py
+++ b/klausur-service/backend/ocr_pipeline_session_store.py
@@ -68,7 +68,11 @@ async def init_ocr_pipeline_tables():
            ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
            ADD COLUMN IF NOT EXISTS doc_type_result JSONB,
            ADD COLUMN IF NOT EXISTS document_category VARCHAR(50),
-            ADD COLUMN IF NOT EXISTS pipeline_log JSONB
+            ADD COLUMN IF NOT EXISTS pipeline_log JSONB,
            ADD COLUMN IF NOT EXISTS oriented_png BYTEA,
            ADD COLUMN IF NOT EXISTS cropped_png BYTEA,
            ADD COLUMN IF NOT EXISTS orientation_result JSONB,
            ADD COLUMN IF NOT EXISTS crop_result JSONB
        """)
@@ -90,6 +94,7 @@ async def create_session_db(
                id, name, filename, original_png, status, current_step
            ) VALUES ($1, $2, $3, $4, 'active', 1)
            RETURNING id, name, filename, status, current_step,
                      orientation_result, crop_result,
                      deskew_result, dewarp_result, column_result, row_result,
                      word_result, ground_truth, auto_shear_degrees,
                      doc_type, doc_type_result,
@@ -106,6 +111,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
    async with pool.acquire() as conn:
        row = await conn.fetchrow("""
            SELECT id, name, filename, status, current_step,
                   orientation_result, crop_result,
                   deskew_result, dewarp_result, column_result, row_result,
                   word_result, ground_truth, auto_shear_degrees,
                   doc_type, doc_type_result,
@@ -123,6 +129,8 @@ async def get_session_image(session_id: str, image_type: str) -> Optional[bytes]
    """Load a single image (BYTEA) from the session."""
    column_map = {
        "original": "original_png",
        "oriented": "oriented_png",
        "cropped": "cropped_png",
        "deskewed": "deskewed_png",
        "binarized": "binarized_png",
        "dewarped": "dewarped_png",
@@ -150,15 +158,17 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
    allowed_fields = {
        'name', 'filename', 'status', 'current_step',
-        'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
+        'original_png', 'oriented_png', 'cropped_png',
        'deskewed_png', 'binarized_png', 'dewarped_png',
        'clean_png', 'handwriting_removal_meta',
        'orientation_result', 'crop_result',
        'deskew_result', 'dewarp_result', 'column_result', 'row_result',
        'word_result', 'ground_truth', 'auto_shear_degrees',
        'doc_type', 'doc_type_result',
        'document_category', 'pipeline_log',
    }
-    jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
+    jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
    for key, value in kwargs.items():
        if key in allowed_fields:
@@ -182,6 +192,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
            SET {', '.join(fields)}
            WHERE id = ${param_idx}
            RETURNING id, name, filename, status, current_step,
                      orientation_result, crop_result,
                      deskew_result, dewarp_result, column_result, row_result,
                      word_result, ground_truth, auto_shear_degrees,
                      doc_type, doc_type_result,
@@ -254,7 +265,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
            result[key] = result[key].isoformat()
    # JSONB → parsed (asyncpg returns str for JSONB)
-    for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
+    for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
        if key in result and result[key] is not None:
            if isinstance(result[key], str):
                result[key] = json.loads(result[key])
--- a/klausur-service/backend/orientation_crop_api.py
+++ b/klausur-service/backend/orientation_crop_api.py
@@ -0,0 +1,330 @@
 """
 Orientation & Crop API - Steps 1-2 of the OCR Pipeline.
 Step 1: Orientation detection (fix 90/180/270 degree rotations)
 Step 2: Page cropping (remove scanner borders, detect paper format)
 These endpoints were extracted from the main pipeline to keep files manageable.
 """
 import logging
 import time
 from typing import Any, Dict, Optional
 import cv2
 import numpy as np
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 from cv_vocab_pipeline import detect_and_fix_orientation
 from page_crop import detect_and_crop_page
 from ocr_pipeline_session_store import (
    get_session_db,
    get_session_image,
    update_session_db,
 )
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
 # Reference to the shared cache from ocr_pipeline_api (set in main.py)
 _cache: Dict[str, Dict[str, Any]] = {}
 def set_cache_ref(cache: Dict[str, Dict[str, Any]]):
    """Set reference to the shared cache from ocr_pipeline_api."""
    global _cache
    _cache = cache
 async def _ensure_cached(session_id: str) -> Dict[str, Any]:
    """Ensure session is in cache, loading from DB if needed."""
    if session_id in _cache:
        return _cache[session_id]
    session = await get_session_db(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
    cache_entry: Dict[str, Any] = {
        "id": session_id,
        **session,
        "original_bgr": None,
        "oriented_bgr": None,
        "cropped_bgr": None,
        "deskewed_bgr": None,
        "dewarped_bgr": None,
    }
    for img_type, bgr_key in [
        ("original", "original_bgr"),
        ("oriented", "oriented_bgr"),
        ("cropped", "cropped_bgr"),
        ("deskewed", "deskewed_bgr"),
        ("dewarped", "dewarped_bgr"),
    ]:
        png_data = await get_session_image(session_id, img_type)
        if png_data:
            arr = np.frombuffer(png_data, dtype=np.uint8)
            bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
            cache_entry[bgr_key] = bgr
    _cache[session_id] = cache_entry
    return cache_entry
 async def _append_pipeline_log(session_id: str, step: str, metrics: dict, duration_ms: int):
    """Append a step entry to the pipeline log."""
    from datetime import datetime
    session = await get_session_db(session_id)
    if not session:
        return
    pipeline_log = session.get("pipeline_log") or {"steps": []}
    pipeline_log["steps"].append({
        "step": step,
        "completed_at": datetime.utcnow().isoformat(),
        "success": True,
        "duration_ms": duration_ms,
        "metrics": metrics,
    })
    await update_session_db(session_id, pipeline_log=pipeline_log)
 # ---------------------------------------------------------------------------
 # Step 1: Orientation
 # ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/orientation")
 async def detect_orientation(session_id: str):
    """Detect and fix 90/180/270 degree rotations from scanners.
    Reads the original image, applies orientation correction,
    stores the result as oriented_png.
    """
    cached = await _ensure_cached(session_id)
    img_bgr = cached.get("original_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="Original image not available")
    t0 = time.time()
    # Detect and fix orientation
    oriented_bgr, orientation_deg = detect_and_fix_orientation(img_bgr.copy())
    duration = time.time() - t0
    orientation_result = {
        "orientation_degrees": orientation_deg,
        "corrected": orientation_deg != 0,
        "duration_seconds": round(duration, 2),
    }
    # Encode oriented image
    success, png_buf = cv2.imencode(".png", oriented_bgr)
    oriented_png = png_buf.tobytes() if success else b""
    # Update cache
    cached["oriented_bgr"] = oriented_bgr
    cached["orientation_result"] = orientation_result
    # Persist to DB
    await update_session_db(
        session_id,
        oriented_png=oriented_png,
        orientation_result=orientation_result,
        current_step=2,
    )
    logger.info(
        "OCR Pipeline: orientation session %s: %d° (%s) in %.2fs",
        session_id, orientation_deg,
        "corrected" if orientation_deg else "no change",
        duration,
    )
    await _append_pipeline_log(session_id, "orientation", {
        "orientation_degrees": orientation_deg,
        "corrected": orientation_deg != 0,
    }, duration_ms=int(duration * 1000))
    h, w = oriented_bgr.shape[:2]
    return {
        "session_id": session_id,
        **orientation_result,
        "image_width": w,
        "image_height": h,
        "oriented_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/oriented",
    }
 # ---------------------------------------------------------------------------
 # Step 2: Crop
 # ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/crop")
 async def auto_crop(session_id: str):
    """Auto-detect and crop scanner borders.
    Reads the oriented image (or original if no orientation step),
    detects the page boundary and crops.
    """
    cached = await _ensure_cached(session_id)
    # Use oriented image if available, else original
    img_bgr = cached.get("oriented_bgr") or cached.get("original_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="No image available for cropping")
    t0 = time.time()
    cropped_bgr, crop_info = detect_and_crop_page(img_bgr)
    duration = time.time() - t0
    crop_info["duration_seconds"] = round(duration, 2)
    # Encode cropped image
    success, png_buf = cv2.imencode(".png", cropped_bgr)
    cropped_png = png_buf.tobytes() if success else b""
    # Update cache
    cached["cropped_bgr"] = cropped_bgr
    cached["crop_result"] = crop_info
    # Persist to DB
    await update_session_db(
        session_id,
        cropped_png=cropped_png,
        crop_result=crop_info,
        current_step=3,
    )
    logger.info(
        "OCR Pipeline: crop session %s: applied=%s format=%s in %.2fs",
        session_id, crop_info["crop_applied"],
        crop_info.get("detected_format", "?"),
        duration,
    )
    await _append_pipeline_log(session_id, "crop", {
        "crop_applied": crop_info["crop_applied"],
        "detected_format": crop_info.get("detected_format"),
        "format_confidence": crop_info.get("format_confidence"),
    }, duration_ms=int(duration * 1000))
    h, w = cropped_bgr.shape[:2]
    return {
        "session_id": session_id,
        **crop_info,
        "image_width": w,
        "image_height": h,
        "cropped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/cropped",
    }
 class ManualCropRequest(BaseModel):
    x: float       # percentage 0-100
    y: float       # percentage 0-100
    width: float   # percentage 0-100
    height: float  # percentage 0-100
@router.post("/sessions/{session_id}/crop/manual")
 async def manual_crop(session_id: str, req: ManualCropRequest):
    """Manually crop using percentage coordinates."""
    cached = await _ensure_cached(session_id)
    img_bgr = cached.get("oriented_bgr") or cached.get("original_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="No image available for cropping")
    h, w = img_bgr.shape[:2]
    # Convert percentages to pixels
    px_x = int(w * req.x / 100.0)
    px_y = int(h * req.y / 100.0)
    px_w = int(w * req.width / 100.0)
    px_h = int(h * req.height / 100.0)
    # Clamp
    px_x = max(0, min(px_x, w - 1))
    px_y = max(0, min(px_y, h - 1))
    px_w = max(1, min(px_w, w - px_x))
    px_h = max(1, min(px_h, h - px_y))
    cropped_bgr = img_bgr[px_y:px_y + px_h, px_x:px_x + px_w].copy()
    success, png_buf = cv2.imencode(".png", cropped_bgr)
    cropped_png = png_buf.tobytes() if success else b""
    crop_result = {
        "crop_applied": True,
        "crop_rect": {"x": px_x, "y": px_y, "width": px_w, "height": px_h},
        "crop_rect_pct": {"x": round(req.x, 2), "y": round(req.y, 2),
                          "width": round(req.width, 2), "height": round(req.height, 2)},
        "original_size": {"width": w, "height": h},
        "cropped_size": {"width": px_w, "height": px_h},
        "method": "manual",
    }
    cached["cropped_bgr"] = cropped_bgr
    cached["crop_result"] = crop_result
    await update_session_db(
        session_id,
        cropped_png=cropped_png,
        crop_result=crop_result,
        current_step=3,
    )
    ch, cw = cropped_bgr.shape[:2]
    return {
        "session_id": session_id,
        **crop_result,
        "image_width": cw,
        "image_height": ch,
        "cropped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/cropped",
    }
@router.post("/sessions/{session_id}/crop/skip")
 async def skip_crop(session_id: str):
    """Skip cropping — use oriented (or original) image as-is."""
    cached = await _ensure_cached(session_id)
    img_bgr = cached.get("oriented_bgr") or cached.get("original_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="No image available")
    h, w = img_bgr.shape[:2]
    # Store the oriented image as cropped (identity crop)
    success, png_buf = cv2.imencode(".png", img_bgr)
    cropped_png = png_buf.tobytes() if success else b""
    crop_result = {
        "crop_applied": False,
        "skipped": True,
        "original_size": {"width": w, "height": h},
        "cropped_size": {"width": w, "height": h},
    }
    cached["cropped_bgr"] = img_bgr
    cached["crop_result"] = crop_result
    await update_session_db(
        session_id,
        cropped_png=cropped_png,
        crop_result=crop_result,
        current_step=3,
    )
    return {
        "session_id": session_id,
        **crop_result,
        "image_width": w,
        "image_height": h,
        "cropped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/cropped",
    }
--- a/klausur-service/backend/page_crop.py
+++ b/klausur-service/backend/page_crop.py
@@ -0,0 +1,187 @@
 """
 Page Crop - Automatic scanner border removal and page format detection.
 Detects the paper boundary in a scanned image and crops away scanner borders.
 Also identifies the paper format (A4, Letter, etc.) from the aspect ratio.
 License: Apache 2.0
 """
 import logging
 from typing import Dict, Any, Tuple
 import cv2
 import numpy as np
 logger = logging.getLogger(__name__)
 # Known paper format aspect ratios (height / width, portrait orientation)
 PAPER_FORMATS = {
    "A4": 297.0 / 210.0,       # 1.4143
    "A5": 210.0 / 148.0,       # 1.4189
    "Letter": 11.0 / 8.5,      # 1.2941
    "Legal": 14.0 / 8.5,       # 1.6471
    "A3": 420.0 / 297.0,       # 1.4141
 }
 def detect_and_crop_page(
    img_bgr: np.ndarray,
    min_border_fraction: float = 0.01,
 ) -> Tuple[np.ndarray, Dict[str, Any]]:
    """Detect page boundary and crop scanner borders.
    Algorithm:
    1. Grayscale + GaussianBlur to smooth out text
    2. Otsu threshold (page=bright, scanner border=dark)
    3. Morphological close to fill gaps
    4. Find largest contour = page
    5. If contour covers >95% of image area -> no crop needed
    6. Get bounding rect, add safety margin
    7. Match aspect ratio to known paper formats
    Args:
        img_bgr: Input BGR image
        min_border_fraction: Minimum border fraction to trigger crop (default 1%)
    Returns:
        Tuple of (cropped_image, result_dict)
    """
    h, w = img_bgr.shape[:2]
    total_area = h * w
    result: Dict[str, Any] = {
        "crop_applied": False,
        "crop_rect": None,
        "crop_rect_pct": None,
        "original_size": {"width": w, "height": h},
        "cropped_size": {"width": w, "height": h},
        "detected_format": None,
        "format_confidence": 0.0,
        "aspect_ratio": round(max(h, w) / max(min(h, w), 1), 4),
        "border_fractions": {"top": 0.0, "bottom": 0.0, "left": 0.0, "right": 0.0},
    }
    # 1. Grayscale + blur
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (21, 21), 0)
    # 2. Otsu threshold
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # 3. Morphological close to fill text gaps
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
    closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    # 4. Find contours
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        logger.info("No contours found - returning original image")
        return img_bgr, result
    # Get the largest contour
    largest = max(contours, key=cv2.contourArea)
    contour_area = cv2.contourArea(largest)
    # 5. If contour covers >95% of image, no crop needed
    if contour_area > 0.95 * total_area:
        logger.info("Page covers >95%% of image - no crop needed")
        result["detected_format"], result["format_confidence"] = _detect_format(w, h)
        return img_bgr, result
    # 6. Get bounding rect
    rx, ry, rw, rh = cv2.boundingRect(largest)
    # Calculate border fractions
    border_top = ry / h
    border_bottom = (h - (ry + rh)) / h
    border_left = rx / w
    border_right = (w - (rx + rw)) / w
    result["border_fractions"] = {
        "top": round(border_top, 4),
        "bottom": round(border_bottom, 4),
        "left": round(border_left, 4),
        "right": round(border_right, 4),
    }
    # 7. Check if borders are significant enough to crop
    if all(f < min_border_fraction for f in [border_top, border_bottom, border_left, border_right]):
        logger.info("All borders < %.1f%% - no crop needed", min_border_fraction * 100)
        result["detected_format"], result["format_confidence"] = _detect_format(w, h)
        return img_bgr, result
    # 8. Add safety margin (0.5% of image dimensions)
    margin_x = int(w * 0.005)
    margin_y = int(h * 0.005)
    crop_x = max(0, rx - margin_x)
    crop_y = max(0, ry - margin_y)
    crop_x2 = min(w, rx + rw + margin_x)
    crop_y2 = min(h, ry + rh + margin_y)
    crop_w = crop_x2 - crop_x
    crop_h = crop_y2 - crop_y
    # Sanity check: cropped area should be at least 50% of original
    if crop_w * crop_h < 0.5 * total_area:
        logger.warning("Cropped area too small (%.0f%%) - skipping crop",
                       100.0 * crop_w * crop_h / total_area)
        result["detected_format"], result["format_confidence"] = _detect_format(w, h)
        return img_bgr, result
    # 9. Crop
    cropped = img_bgr[crop_y:crop_y2, crop_x:crop_x2].copy()
    # 10. Detect format from cropped dimensions
    detected_format, format_confidence = _detect_format(crop_w, crop_h)
    result["crop_applied"] = True
    result["crop_rect"] = {"x": crop_x, "y": crop_y, "width": crop_w, "height": crop_h}
    result["crop_rect_pct"] = {
        "x": round(100.0 * crop_x / w, 2),
        "y": round(100.0 * crop_y / h, 2),
        "width": round(100.0 * crop_w / w, 2),
        "height": round(100.0 * crop_h / h, 2),
    }
    result["cropped_size"] = {"width": crop_w, "height": crop_h}
    result["detected_format"] = detected_format
    result["format_confidence"] = format_confidence
    result["aspect_ratio"] = round(max(crop_w, crop_h) / max(min(crop_w, crop_h), 1), 4)
    logger.info("Page cropped: %dx%d -> %dx%d, format=%s (%.0f%%), borders: T=%.1f%% B=%.1f%% L=%.1f%% R=%.1f%%",
                w, h, crop_w, crop_h, detected_format, format_confidence * 100,
                border_top * 100, border_bottom * 100, border_left * 100, border_right * 100)
    return cropped, result
 def _detect_format(width: int, height: int) -> Tuple[str, float]:
    """Detect paper format from dimensions by comparing aspect ratios.
    Returns:
        (format_name, confidence) where confidence is 0.0-1.0
    """
    if width <= 0 or height <= 0:
        return "unknown", 0.0
    # Use portrait aspect ratio (taller / shorter)
    aspect = max(width, height) / min(width, height)
    best_format = "unknown"
    best_diff = float("inf")
    for fmt, expected_ratio in PAPER_FORMATS.items():
        diff = abs(aspect - expected_ratio)
        if diff < best_diff:
            best_diff = diff
            best_format = fmt
    # Confidence: 1.0 if exact match, decreasing with deviation
    # Threshold: if diff > 0.1, confidence drops below 0.5
    confidence = max(0.0, 1.0 - best_diff * 5.0)
    if confidence < 0.3:
        return "unknown", 0.0
    return best_format, round(confidence, 3)