feat: OCR Pipeline mit 6-Schritt-Wizard fuer Seitenrekonstruktion
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 38s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Successful in 1m46s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 22s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 38s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Successful in 1m46s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 22s
Neue Route /ai/ocr-pipeline mit schrittweiser Begradigung (Deskew), Raster-Overlay und Ground Truth. Schritte 2-6 als Platzhalter. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
83
admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
Normal file
83
admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
Normal file
@@ -0,0 +1,83 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
||||
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||
import { PIPELINE_STEPS, type PipelineStep } from './types'
|
||||
|
||||
export default function OcrPipelinePage() {
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i === 0 ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
|
||||
const handleStepClick = (index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
}
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep < steps.length - 1) {
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === currentStep + 1) return { ...s, status: 'active' }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep((prev) => prev + 1)
|
||||
}
|
||||
}
|
||||
|
||||
const renderStep = () => {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepDeskew onNext={handleNext} />
|
||||
case 1:
|
||||
return <StepColumnDetection />
|
||||
case 2:
|
||||
return <StepWordRecognition />
|
||||
case 3:
|
||||
return <StepCoordinates />
|
||||
case 4:
|
||||
return <StepReconstruction />
|
||||
case 5:
|
||||
return <StepGroundTruth />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<PagePurpose
|
||||
title="OCR Pipeline"
|
||||
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
|
||||
audience={['Entwickler', 'Data Scientists']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||
databases: ['In-Memory Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
|
||||
]}
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
<PipelineStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} />
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
43
admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
Normal file
43
admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed'
|
||||
|
||||
export interface PipelineStep {
|
||||
id: string
|
||||
name: string
|
||||
icon: string
|
||||
status: PipelineStepStatus
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
session_id: string
|
||||
filename: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
original_image_url: string
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
session_id: string
|
||||
angle_hough: number
|
||||
angle_word_alignment: number
|
||||
angle_applied: number
|
||||
method_used: 'hough' | 'word_alignment' | 'manual'
|
||||
confidence: number
|
||||
duration_seconds: number
|
||||
deskewed_image_url: string
|
||||
binarized_image_url: string
|
||||
}
|
||||
|
||||
export interface DeskewGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_angle?: number
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'coordinates', name: 'Koordinaten', icon: '📍', status: 'pending' },
|
||||
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
|
||||
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
|
||||
]
|
||||
208
admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
Normal file
208
admin-lehrer/components/ocr-pipeline/DeskewControls.tsx
Normal file
@@ -0,0 +1,208 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface DeskewControlsProps {
|
||||
deskewResult: DeskewResult | null
|
||||
showBinarized: boolean
|
||||
onToggleBinarized: () => void
|
||||
showGrid: boolean
|
||||
onToggleGrid: () => void
|
||||
onManualDeskew: (angle: number) => void
|
||||
onGroundTruth: (gt: DeskewGroundTruth) => void
|
||||
onNext: () => void
|
||||
isApplying: boolean
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
hough: 'Hough-Linien',
|
||||
word_alignment: 'Wortausrichtung',
|
||||
manual: 'Manuell',
|
||||
}
|
||||
|
||||
export function DeskewControls({
|
||||
deskewResult,
|
||||
showBinarized,
|
||||
onToggleBinarized,
|
||||
showGrid,
|
||||
onToggleGrid,
|
||||
onManualDeskew,
|
||||
onGroundTruth,
|
||||
onNext,
|
||||
isApplying,
|
||||
}: DeskewControlsProps) {
|
||||
const [manualAngle, setManualAngle] = useState(0)
|
||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
const handleGroundTruth = (isCorrect: boolean) => {
|
||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||
if (isCorrect) {
|
||||
onGroundTruth({ is_correct: true })
|
||||
setGtSaved(true)
|
||||
}
|
||||
}
|
||||
|
||||
const handleGroundTruthIncorrect = () => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_angle: manualAngle !== 0 ? manualAngle : undefined,
|
||||
notes: gtNotes || undefined,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Results */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">Winkel:</span>{' '}
|
||||
<span className="font-mono font-medium">{deskewResult.angle_applied}°</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Methode:</span>{' '}
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||
{METHOD_LABELS[deskewResult.method_used] || deskewResult.method_used}
|
||||
</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||
<span className="font-mono">{Math.round(deskewResult.confidence * 100)}%</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div className="text-gray-400 text-xs">
|
||||
Hough: {deskewResult.angle_hough}° | WA: {deskewResult.angle_word_alignment}°
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toggles */}
|
||||
<div className="flex gap-3 mt-3">
|
||||
<button
|
||||
onClick={onToggleBinarized}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showBinarized
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Binarisiert anzeigen
|
||||
</button>
|
||||
<button
|
||||
onClick={onToggleGrid}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showGrid
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Raster anzeigen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Manual angle */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Manuelle Korrektur</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-xs text-gray-400 w-8 text-right">-5°</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-5}
|
||||
max={5}
|
||||
step={0.1}
|
||||
value={manualAngle}
|
||||
onChange={(e) => setManualAngle(parseFloat(e.target.value))}
|
||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-8">+5°</span>
|
||||
<span className="font-mono text-sm w-14 text-right">{manualAngle.toFixed(1)}°</span>
|
||||
<button
|
||||
onClick={() => onManualDeskew(manualAngle)}
|
||||
disabled={isApplying}
|
||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? '...' : 'Anwenden'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth */}
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Korrekt ausgerichtet?
|
||||
</div>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'correct'
|
||||
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'incorrect'
|
||||
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</div>
|
||||
{gtFeedback === 'incorrect' && (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
placeholder="Notizen zur Korrektur..."
|
||||
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||
rows={2}
|
||||
/>
|
||||
<button
|
||||
onClick={handleGroundTruthIncorrect}
|
||||
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||
>
|
||||
Feedback speichern
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-green-600 dark:text-green-400">
|
||||
Feedback gespeichert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{deskewResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Uebernehmen & Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
134
admin-lehrer/components/ocr-pipeline/ImageCompareView.tsx
Normal file
134
admin-lehrer/components/ocr-pipeline/ImageCompareView.tsx
Normal file
@@ -0,0 +1,134 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
|
||||
const A4_WIDTH_MM = 210
|
||||
const A4_HEIGHT_MM = 297
|
||||
|
||||
interface ImageCompareViewProps {
|
||||
originalUrl: string | null
|
||||
deskewedUrl: string | null
|
||||
showGrid: boolean
|
||||
showBinarized: boolean
|
||||
binarizedUrl: string | null
|
||||
}
|
||||
|
||||
function MmGridOverlay() {
|
||||
const lines: React.ReactNode[] = []
|
||||
|
||||
// Vertical lines every 10mm
|
||||
for (let mm = 0; mm <= A4_WIDTH_MM; mm += 10) {
|
||||
const x = (mm / A4_WIDTH_MM) * 100
|
||||
const is50 = mm % 50 === 0
|
||||
lines.push(
|
||||
<line
|
||||
key={`v-${mm}`}
|
||||
x1={x} y1={0} x2={x} y2={100}
|
||||
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||
strokeWidth={is50 ? 0.12 : 0.05}
|
||||
/>
|
||||
)
|
||||
// Label every 50mm
|
||||
if (is50 && mm > 0) {
|
||||
lines.push(
|
||||
<text key={`vl-${mm}`} x={x} y={1.2} fill="rgba(59,130,246,0.6)" fontSize="1.2" textAnchor="middle">
|
||||
{mm}
|
||||
</text>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Horizontal lines every 10mm
|
||||
for (let mm = 0; mm <= A4_HEIGHT_MM; mm += 10) {
|
||||
const y = (mm / A4_HEIGHT_MM) * 100
|
||||
const is50 = mm % 50 === 0
|
||||
lines.push(
|
||||
<line
|
||||
key={`h-${mm}`}
|
||||
x1={0} y1={y} x2={100} y2={y}
|
||||
stroke={is50 ? 'rgba(59, 130, 246, 0.4)' : 'rgba(59, 130, 246, 0.15)'}
|
||||
strokeWidth={is50 ? 0.12 : 0.05}
|
||||
/>
|
||||
)
|
||||
if (is50 && mm > 0) {
|
||||
lines.push(
|
||||
<text key={`hl-${mm}`} x={0.5} y={y + 0.6} fill="rgba(59,130,246,0.6)" fontSize="1.2">
|
||||
{mm}
|
||||
</text>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<svg
|
||||
viewBox="0 0 100 100"
|
||||
preserveAspectRatio="none"
|
||||
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||
style={{ zIndex: 10 }}
|
||||
>
|
||||
<g style={{ pointerEvents: 'none' }}>{lines}</g>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function ImageCompareView({
|
||||
originalUrl,
|
||||
deskewedUrl,
|
||||
showGrid,
|
||||
showBinarized,
|
||||
binarizedUrl,
|
||||
}: ImageCompareViewProps) {
|
||||
const [leftError, setLeftError] = useState(false)
|
||||
const [rightError, setRightError] = useState(false)
|
||||
|
||||
const rightUrl = showBinarized && binarizedUrl ? binarizedUrl : deskewedUrl
|
||||
|
||||
return (
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">Original (unbearbeitet)</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{originalUrl && !leftError ? (
|
||||
<img
|
||||
src={originalUrl}
|
||||
alt="Original Scan"
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setLeftError(true)}
|
||||
/>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{leftError ? 'Fehler beim Laden' : 'Noch kein Bild'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Deskewed with Grid */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
||||
{showBinarized ? 'Binarisiert' : 'Begradigt'} {showGrid && '+ Raster (mm)'}
|
||||
</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{rightUrl && !rightError ? (
|
||||
<>
|
||||
<img
|
||||
src={rightUrl}
|
||||
alt="Begradigtes Bild"
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setRightError(true)}
|
||||
/>
|
||||
{showGrid && <MmGridOverlay />}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{rightError ? 'Fehler beim Laden' : 'Begradigung laeuft...'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
53
admin-lehrer/components/ocr-pipeline/PipelineStepper.tsx
Normal file
53
admin-lehrer/components/ocr-pipeline/PipelineStepper.tsx
Normal file
@@ -0,0 +1,53 @@
|
||||
'use client'
|
||||
|
||||
import { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface PipelineStepperProps {
|
||||
steps: PipelineStep[]
|
||||
currentStep: number
|
||||
onStepClick: (index: number) => void
|
||||
}
|
||||
|
||||
export function PipelineStepper({ steps, currentStep, onStepClick }: PipelineStepperProps) {
|
||||
return (
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isClickable = index <= currentStep || isCompleted
|
||||
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isCompleted ? '✓' : isFailed ? '✗' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
19
admin-lehrer/components/ocr-pipeline/StepColumnDetection.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepColumnDetection.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
'use client'
|
||||
|
||||
export function StepColumnDetection() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📊</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 2: Spaltenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Erkennung unsichtbarer Spaltentrennungen in der Vokabelseite.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
19
admin-lehrer/components/ocr-pipeline/StepCoordinates.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepCoordinates.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
'use client'
|
||||
|
||||
export function StepCoordinates() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📍</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 4: Koordinatenzuweisung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
222
admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
Normal file
222
admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
Normal file
@@ -0,0 +1,222 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useState } from 'react'
|
||||
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DeskewControls } from './DeskewControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDeskewProps {
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||
const [uploading, setUploading] = useState(false)
|
||||
const [deskewing, setDeskewing] = useState(false)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [showBinarized, setShowBinarized] = useState(false)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dragOver, setDragOver] = useState(false)
|
||||
|
||||
const handleUpload = useCallback(async (file: File) => {
|
||||
setUploading(true)
|
||||
setError(null)
|
||||
setDeskewResult(null)
|
||||
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Upload fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data: SessionInfo = await res.json()
|
||||
// Prepend API prefix to relative URLs
|
||||
data.original_image_url = `${KLAUSUR_API}${data.original_image_url}`
|
||||
setSession(data)
|
||||
|
||||
// Auto-trigger deskew
|
||||
setDeskewing(true)
|
||||
const deskewRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/deskew`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!deskewRes.ok) {
|
||||
throw new Error('Begradigung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const deskewData: DeskewResult = await deskewRes.json()
|
||||
deskewData.deskewed_image_url = `${KLAUSUR_API}${deskewData.deskewed_image_url}`
|
||||
deskewData.binarized_image_url = `${KLAUSUR_API}${deskewData.binarized_image_url}`
|
||||
setDeskewResult(deskewData)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setUploading(false)
|
||||
setDeskewing(false)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const handleManualDeskew = useCallback(async (angle: number) => {
|
||||
if (!session) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/deskew/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ angle }),
|
||||
})
|
||||
|
||||
if (!res.ok) throw new Error('Manuelle Begradigung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDeskewResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
angle_applied: data.angle_applied,
|
||||
method_used: data.method_used,
|
||||
// Force reload by appending timestamp
|
||||
deskewed_image_url: `${KLAUSUR_API}${data.deskewed_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [session])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: DeskewGroundTruth) => {
|
||||
if (!session) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.session_id}/ground-truth/deskew`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [session])
|
||||
|
||||
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||
e.preventDefault()
|
||||
setDragOver(false)
|
||||
const file = e.dataTransfer.files[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
const handleFileInput = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
// Upload area (no session yet)
|
||||
if (!session) {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div
|
||||
onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
|
||||
onDragLeave={() => setDragOver(false)}
|
||||
onDrop={handleDrop}
|
||||
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
|
||||
dragOver
|
||||
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
|
||||
: 'border-gray-300 dark:border-gray-600 hover:border-teal-400'
|
||||
}`}
|
||||
>
|
||||
{uploading ? (
|
||||
<div className="text-gray-500">
|
||||
<div className="animate-spin inline-block w-8 h-8 border-2 border-teal-500 border-t-transparent rounded-full mb-3" />
|
||||
<p>Wird hochgeladen...</p>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-4xl mb-3">📄</div>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-2">
|
||||
PDF oder Bild hierher ziehen
|
||||
</p>
|
||||
<p className="text-sm text-gray-400 mb-4">oder</p>
|
||||
<label className="inline-block px-4 py-2 bg-teal-600 text-white rounded-lg cursor-pointer hover:bg-teal-700 transition-colors">
|
||||
Datei auswaehlen
|
||||
<input
|
||||
type="file"
|
||||
accept=".pdf,.png,.jpg,.jpeg,.tiff,.tif"
|
||||
onChange={handleFileInput}
|
||||
className="hidden"
|
||||
/>
|
||||
</label>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Session active: show comparison + controls
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Filename */}
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Datei: <span className="font-medium text-gray-700 dark:text-gray-300">{session.filename}</span>
|
||||
{' '}({session.image_width} x {session.image_height} px)
|
||||
</div>
|
||||
|
||||
{/* Loading indicator */}
|
||||
{deskewing && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Begradigung laeuft (beide Methoden)...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison */}
|
||||
<ImageCompareView
|
||||
originalUrl={session.original_image_url}
|
||||
deskewedUrl={deskewResult?.deskewed_image_url ?? null}
|
||||
showGrid={showGrid}
|
||||
showBinarized={showBinarized}
|
||||
binarizedUrl={deskewResult?.binarized_image_url ?? null}
|
||||
/>
|
||||
|
||||
{/* Controls */}
|
||||
<DeskewControls
|
||||
deskewResult={deskewResult}
|
||||
showBinarized={showBinarized}
|
||||
onToggleBinarized={() => setShowBinarized((v) => !v)}
|
||||
showGrid={showGrid}
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDeskew={handleManualDeskew}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
19
admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepGroundTruth.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
'use client'
|
||||
|
||||
export function StepGroundTruth() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 6: Ground Truth Validierung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Gesamtpruefung der rekonstruierten Seite gegen das Original.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
19
admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
'use client'
|
||||
|
||||
export function StepReconstruction() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🏗️</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Seitenrekonstruktion
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Nachbau der Originalseite aus erkannten Woertern und Positionen.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
19
admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
Normal file
19
admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx
Normal file
@@ -0,0 +1,19 @@
|
||||
'use client'
|
||||
|
||||
export function StepWordRecognition() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔤</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 3: Worterkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -127,6 +127,15 @@ export const navigation: NavCategory[] = [
|
||||
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-pipeline',
|
||||
name: 'OCR Pipeline',
|
||||
href: '/ai/ocr-pipeline',
|
||||
description: 'Schrittweise Seitenrekonstruktion',
|
||||
purpose: 'Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. 6-Schritt-Pipeline mit Ground Truth Validierung.',
|
||||
audience: ['Entwickler', 'Data Scientists'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'test-quality',
|
||||
name: 'Test Quality (BQAS)',
|
||||
|
||||
@@ -42,6 +42,7 @@ try:
|
||||
except ImportError:
|
||||
trocr_router = None
|
||||
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
||||
from ocr_pipeline_api import router as ocr_pipeline_router
|
||||
try:
|
||||
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
||||
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
||||
@@ -150,6 +151,7 @@ app.include_router(mail_router) # Unified Inbox Mail
|
||||
if trocr_router:
|
||||
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
||||
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
||||
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
|
||||
if dsfa_rag_router:
|
||||
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
||||
|
||||
|
||||
301
klausur-service/backend/ocr_pipeline_api.py
Normal file
301
klausur-service/backend/ocr_pipeline_api.py
Normal file
@@ -0,0 +1,301 @@
|
||||
"""
|
||||
OCR Pipeline API - Schrittweise Seitenrekonstruktion.
|
||||
|
||||
Zerlegt den OCR-Prozess in 6 einzelne Schritte:
|
||||
1. Deskewing - Scan begradigen
|
||||
2. Spaltenerkennung - Unsichtbare Spalten finden
|
||||
3. Worterkennung - OCR mit Bounding Boxes
|
||||
4. Koordinatenzuweisung - Exakte Positionen
|
||||
5. Seitenrekonstruktion - Seite nachbauen
|
||||
6. Ground Truth Validierung - Gesamtpruefung
|
||||
|
||||
Lizenz: Apache 2.0
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, File, HTTPException, UploadFile
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
create_ocr_image,
|
||||
deskew_image,
|
||||
deskew_image_by_word_alignment,
|
||||
render_image_high_res,
|
||||
render_pdf_high_res,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-memory session store (24h TTL)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_sessions: Dict[str, Dict[str, Any]] = {}
|
||||
SESSION_TTL_HOURS = 24
|
||||
|
||||
|
||||
def _cleanup_expired():
|
||||
"""Remove sessions older than TTL."""
|
||||
cutoff = datetime.utcnow() - timedelta(hours=SESSION_TTL_HOURS)
|
||||
expired = [sid for sid, s in _sessions.items() if s.get("created_at", datetime.utcnow()) < cutoff]
|
||||
for sid in expired:
|
||||
del _sessions[sid]
|
||||
logger.info(f"OCR Pipeline: expired session {sid}")
|
||||
|
||||
|
||||
def _get_session(session_id: str) -> Dict[str, Any]:
|
||||
"""Get session or raise 404."""
|
||||
session = _sessions.get(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
return session
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pydantic Models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ManualDeskewRequest(BaseModel):
|
||||
angle: float
|
||||
|
||||
|
||||
class DeskewGroundTruthRequest(BaseModel):
|
||||
is_correct: bool
|
||||
corrected_angle: Optional[float] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions")
|
||||
async def create_session(file: UploadFile = File(...)):
|
||||
"""Upload a PDF or image file and create a pipeline session."""
|
||||
_cleanup_expired()
|
||||
|
||||
file_data = await file.read()
|
||||
filename = file.filename or "upload"
|
||||
content_type = file.content_type or ""
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
is_pdf = content_type == "application/pdf" or filename.lower().endswith(".pdf")
|
||||
|
||||
try:
|
||||
if is_pdf:
|
||||
img_bgr = render_pdf_high_res(file_data, page_number=0, zoom=3.0)
|
||||
else:
|
||||
img_bgr = render_image_high_res(file_data)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Could not process file: {e}")
|
||||
|
||||
# Encode original as PNG bytes for serving
|
||||
success, png_buf = cv2.imencode(".png", img_bgr)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode image")
|
||||
|
||||
_sessions[session_id] = {
|
||||
"id": session_id,
|
||||
"filename": filename,
|
||||
"created_at": datetime.utcnow(),
|
||||
"original_bgr": img_bgr,
|
||||
"original_png": png_buf.tobytes(),
|
||||
"deskewed_bgr": None,
|
||||
"deskewed_png": None,
|
||||
"binarized_png": None,
|
||||
"deskew_result": None,
|
||||
"ground_truth": {},
|
||||
"current_step": 1,
|
||||
}
|
||||
|
||||
logger.info(f"OCR Pipeline: created session {session_id} from {filename} "
|
||||
f"({img_bgr.shape[1]}x{img_bgr.shape[0]})")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"filename": filename,
|
||||
"image_width": img_bgr.shape[1],
|
||||
"image_height": img_bgr.shape[0],
|
||||
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/deskew")
|
||||
async def auto_deskew(session_id: str):
|
||||
"""Run both deskew methods and pick the best one."""
|
||||
session = _get_session(session_id)
|
||||
img_bgr = session["original_bgr"]
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# Method 1: Hough Lines
|
||||
try:
|
||||
deskewed_hough, angle_hough = deskew_image(img_bgr.copy())
|
||||
except Exception as e:
|
||||
logger.warning(f"Hough deskew failed: {e}")
|
||||
deskewed_hough, angle_hough = img_bgr, 0.0
|
||||
|
||||
# Method 2: Word Alignment (needs image bytes)
|
||||
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
||||
orig_bytes = png_orig.tobytes() if success_enc else b""
|
||||
|
||||
try:
|
||||
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Word alignment deskew failed: {e}")
|
||||
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
# Pick method with larger detected angle (more correction needed = more skew found)
|
||||
# If both are ~0, prefer word alignment as it's more robust
|
||||
if abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
||||
method_used = "word_alignment"
|
||||
angle_applied = angle_wa
|
||||
# Decode word alignment result to BGR
|
||||
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
||||
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
|
||||
if deskewed_bgr is None:
|
||||
deskewed_bgr = deskewed_hough
|
||||
method_used = "hough"
|
||||
angle_applied = angle_hough
|
||||
else:
|
||||
method_used = "hough"
|
||||
angle_applied = angle_hough
|
||||
deskewed_bgr = deskewed_hough
|
||||
|
||||
# Encode deskewed as PNG
|
||||
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
deskewed_png = deskewed_png_buf.tobytes() if success else session["original_png"]
|
||||
|
||||
# Create binarized version
|
||||
try:
|
||||
binarized = create_ocr_image(deskewed_bgr)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception as e:
|
||||
logger.warning(f"Binarization failed: {e}")
|
||||
binarized_png = None
|
||||
|
||||
# Confidence: higher angle = lower confidence that we got it right
|
||||
confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
|
||||
|
||||
deskew_result = {
|
||||
"angle_hough": round(angle_hough, 3),
|
||||
"angle_word_alignment": round(angle_wa, 3),
|
||||
"angle_applied": round(angle_applied, 3),
|
||||
"method_used": method_used,
|
||||
"confidence": round(confidence, 2),
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
session["deskewed_bgr"] = deskewed_bgr
|
||||
session["deskewed_png"] = deskewed_png
|
||||
session["binarized_png"] = binarized_png
|
||||
session["deskew_result"] = deskew_result
|
||||
|
||||
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
||||
f"hough={angle_hough:.2f}° wa={angle_wa:.2f}° → {method_used} {angle_applied:.2f}°")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**deskew_result,
|
||||
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
||||
"binarized_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/binarized",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/deskew/manual")
|
||||
async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
||||
"""Apply a manual rotation angle to the original image."""
|
||||
session = _get_session(session_id)
|
||||
img_bgr = session["original_bgr"]
|
||||
angle = max(-5.0, min(5.0, req.angle))
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
rotated = cv2.warpAffine(img_bgr, M, (w, h),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
success, png_buf = cv2.imencode(".png", rotated)
|
||||
deskewed_png = png_buf.tobytes() if success else session["original_png"]
|
||||
|
||||
# Binarize
|
||||
try:
|
||||
binarized = create_ocr_image(rotated)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception:
|
||||
binarized_png = None
|
||||
|
||||
session["deskewed_bgr"] = rotated
|
||||
session["deskewed_png"] = deskewed_png
|
||||
session["binarized_png"] = binarized_png
|
||||
session["deskew_result"] = {
|
||||
**(session.get("deskew_result") or {}),
|
||||
"angle_applied": round(angle, 3),
|
||||
"method_used": "manual",
|
||||
}
|
||||
|
||||
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}°")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"angle_applied": round(angle, 3),
|
||||
"method_used": "manual",
|
||||
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||
async def get_image(session_id: str, image_type: str):
|
||||
"""Serve session images: original, deskewed, or binarized."""
|
||||
session = _get_session(session_id)
|
||||
|
||||
if image_type == "original":
|
||||
data = session.get("original_png")
|
||||
elif image_type == "deskewed":
|
||||
data = session.get("deskewed_png")
|
||||
elif image_type == "binarized":
|
||||
data = session.get("binarized_png")
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||
|
||||
if not data:
|
||||
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
||||
|
||||
return Response(content=data, media_type="image/png")
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/deskew")
|
||||
async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
|
||||
"""Save ground truth feedback for the deskew step."""
|
||||
session = _get_session(session_id)
|
||||
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_angle": req.corrected_angle,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"deskew_result": session.get("deskew_result"),
|
||||
}
|
||||
session["ground_truth"]["deskew"] = gt
|
||||
|
||||
logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
|
||||
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
Reference in New Issue
Block a user