feat: add Kombi-Modus (PaddleOCR + Tesseract) for OCR Overlay
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 35s
CI / test-go-edu-search (push) Successful in 33s
CI / test-python-klausur (push) Failing after 2m20s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 41s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 35s
CI / test-go-edu-search (push) Successful in 33s
CI / test-python-klausur (push) Failing after 2m20s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 41s
Runs both OCR engines on the preprocessed image and merges results: word boxes matched by IoU, coordinates averaged by confidence weight. Unmatched Tesseract words (bullets, symbols) are added for better coverage. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,12 +11,12 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
|||||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||||
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
export default function OcrOverlayPage() {
|
export default function OcrOverlayPage() {
|
||||||
const [mode, setMode] = useState<'pipeline' | 'paddle-direct'>('pipeline')
|
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
|
||||||
const [currentStep, setCurrentStep] = useState(0)
|
const [currentStep, setCurrentStep] = useState(0)
|
||||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||||
const [sessionName, setSessionName] = useState<string>('')
|
const [sessionName, setSessionName] = useState<string>('')
|
||||||
@@ -63,13 +63,17 @@ export default function OcrOverlayPage() {
|
|||||||
setSessionName(data.name || data.filename || '')
|
setSessionName(data.name || data.filename || '')
|
||||||
setActiveCategory(data.document_category || undefined)
|
setActiveCategory(data.document_category || undefined)
|
||||||
|
|
||||||
// Check if this session was processed with paddle_direct
|
// Check if this session was processed with paddle_direct or kombi
|
||||||
const isPaddleDirect = data.word_result?.ocr_engine === 'paddle_direct'
|
const ocrEngine = data.word_result?.ocr_engine
|
||||||
|
const isPaddleDirect = ocrEngine === 'paddle_direct'
|
||||||
|
const isKombi = ocrEngine === 'kombi'
|
||||||
|
|
||||||
if (isPaddleDirect) {
|
if (isPaddleDirect || isKombi) {
|
||||||
setMode('paddle-direct')
|
const m = isKombi ? 'kombi' : 'paddle-direct'
|
||||||
|
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
|
||||||
|
setMode(m)
|
||||||
setSteps(
|
setSteps(
|
||||||
PADDLE_DIRECT_STEPS.map((s, i) => ({
|
baseSteps.map((s, i) => ({
|
||||||
...s,
|
...s,
|
||||||
status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending',
|
status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending',
|
||||||
})),
|
})),
|
||||||
@@ -101,7 +105,7 @@ export default function OcrOverlayPage() {
|
|||||||
if (sessionId === sid) {
|
if (sessionId === sid) {
|
||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -158,7 +162,7 @@ export default function OcrOverlayPage() {
|
|||||||
const handleNext = () => {
|
const handleNext = () => {
|
||||||
if (currentStep >= steps.length - 1) {
|
if (currentStep >= steps.length - 1) {
|
||||||
// Last step completed — return to session list
|
// Last step completed — return to session list
|
||||||
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
@@ -187,7 +191,7 @@ export default function OcrOverlayPage() {
|
|||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
setSessionName('')
|
setSessionName('')
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,7 +230,7 @@ export default function OcrOverlayPage() {
|
|||||||
}, [sessionId, goToStep])
|
}, [sessionId, goToStep])
|
||||||
|
|
||||||
const renderStep = () => {
|
const renderStep = () => {
|
||||||
if (mode === 'paddle-direct') {
|
if (mode === 'paddle-direct' || mode === 'kombi') {
|
||||||
switch (currentStep) {
|
switch (currentStep) {
|
||||||
case 0:
|
case 0:
|
||||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||||
@@ -237,7 +241,21 @@ export default function OcrOverlayPage() {
|
|||||||
case 3:
|
case 3:
|
||||||
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
||||||
case 4:
|
case 4:
|
||||||
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
return mode === 'kombi' ? (
|
||||||
|
<PaddleDirectStep
|
||||||
|
sessionId={sessionId}
|
||||||
|
onNext={handleNext}
|
||||||
|
endpoint="paddle-kombi"
|
||||||
|
title="Kombi-Modus"
|
||||||
|
description="PaddleOCR und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
||||||
|
icon="🔀"
|
||||||
|
buttonLabel="Paddle + Tesseract starten"
|
||||||
|
runningLabel="Paddle + Tesseract laufen..."
|
||||||
|
engineKey="kombi"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
||||||
|
)
|
||||||
default:
|
default:
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
@@ -480,13 +498,29 @@ export default function OcrOverlayPage() {
|
|||||||
>
|
>
|
||||||
Paddle Direct (5 Schritte)
|
Paddle Direct (5 Schritte)
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => {
|
||||||
|
if (mode === 'kombi') return
|
||||||
|
setMode('kombi')
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSessionId(null)
|
||||||
|
setSteps(KOMBI_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
|
}}
|
||||||
|
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||||
|
mode === 'kombi'
|
||||||
|
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||||
|
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Kombi (5 Schritte)
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<PipelineStepper
|
<PipelineStepper
|
||||||
steps={steps}
|
steps={steps}
|
||||||
currentStep={currentStep}
|
currentStep={currentStep}
|
||||||
onStepClick={handleStepClick}
|
onStepClick={handleStepClick}
|
||||||
onReprocess={mode === 'pipeline' && sessionId ? reprocessFromStep : undefined}
|
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<div className="min-h-[400px]">{renderStep()}</div>
|
<div className="min-h-[400px]">{renderStep()}</div>
|
||||||
|
|||||||
@@ -60,6 +60,18 @@ export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
|
|||||||
{ id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' },
|
{ id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 5-step pipeline for Kombi mode (PaddleOCR + Tesseract).
|
||||||
|
* Same preprocessing, then both engines run and results are merged.
|
||||||
|
*/
|
||||||
|
export const KOMBI_STEPS: PipelineStep[] = [
|
||||||
|
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||||
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
|
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||||
|
{ id: 'kombi', name: 'Paddle + Tesseract', icon: '🔀', status: 'pending' },
|
||||||
|
]
|
||||||
|
|
||||||
/** Map from DB step to overlay UI step index */
|
/** Map from DB step to overlay UI step index */
|
||||||
export function dbStepToOverlayUi(dbStep: number): number {
|
export function dbStepToOverlayUi(dbStep: number): number {
|
||||||
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
||||||
|
|||||||
@@ -10,14 +10,38 @@ type Phase = 'idle' | 'running' | 'overlay'
|
|||||||
interface PaddleDirectStepProps {
|
interface PaddleDirectStepProps {
|
||||||
sessionId: string | null
|
sessionId: string | null
|
||||||
onNext: () => void
|
onNext: () => void
|
||||||
|
/** Backend endpoint suffix, default: 'paddle-direct' */
|
||||||
|
endpoint?: string
|
||||||
|
/** Title shown in idle state */
|
||||||
|
title?: string
|
||||||
|
/** Description shown in idle state */
|
||||||
|
description?: string
|
||||||
|
/** Icon shown in idle state */
|
||||||
|
icon?: string
|
||||||
|
/** Button label */
|
||||||
|
buttonLabel?: string
|
||||||
|
/** Running label */
|
||||||
|
runningLabel?: string
|
||||||
|
/** OCR engine key to check for auto-detect */
|
||||||
|
engineKey?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
export function PaddleDirectStep({
|
||||||
|
sessionId,
|
||||||
|
onNext,
|
||||||
|
endpoint = 'paddle-direct',
|
||||||
|
title = 'Paddle Direct',
|
||||||
|
description = 'PaddleOCR erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
|
||||||
|
icon = '⚡',
|
||||||
|
buttonLabel = 'PaddleOCR starten',
|
||||||
|
runningLabel = 'PaddleOCR laeuft...',
|
||||||
|
engineKey = 'paddle_direct',
|
||||||
|
}: PaddleDirectStepProps) {
|
||||||
const [phase, setPhase] = useState<Phase>('idle')
|
const [phase, setPhase] = useState<Phase>('idle')
|
||||||
const [error, setError] = useState<string | null>(null)
|
const [error, setError] = useState<string | null>(null)
|
||||||
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
|
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
|
||||||
|
|
||||||
// Auto-detect: if session already has paddle_direct word_result → show overlay
|
// Auto-detect: if session already has matching word_result → show overlay
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
let cancelled = false
|
let cancelled = false
|
||||||
@@ -26,7 +50,7 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
|||||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||||
if (!res.ok || cancelled) return
|
if (!res.ok || cancelled) return
|
||||||
const data = await res.json()
|
const data = await res.json()
|
||||||
if (data.word_result?.ocr_engine === 'paddle_direct') {
|
if (data.word_result?.ocr_engine === engineKey) {
|
||||||
setPhase('overlay')
|
setPhase('overlay')
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
@@ -34,14 +58,14 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
|||||||
}
|
}
|
||||||
})()
|
})()
|
||||||
return () => { cancelled = true }
|
return () => { cancelled = true }
|
||||||
}, [sessionId])
|
}, [sessionId, engineKey])
|
||||||
|
|
||||||
const runPaddleDirect = useCallback(async () => {
|
const runOcr = useCallback(async () => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
setPhase('running')
|
setPhase('running')
|
||||||
setError(null)
|
setError(null)
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/paddle-direct`, {
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
})
|
})
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
@@ -59,7 +83,7 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
|||||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
setPhase('idle')
|
setPhase('idle')
|
||||||
}
|
}
|
||||||
}, [sessionId])
|
}, [sessionId, endpoint])
|
||||||
|
|
||||||
if (!sessionId) {
|
if (!sessionId) {
|
||||||
return (
|
return (
|
||||||
@@ -91,7 +115,7 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
|||||||
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
|
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
|
||||||
<div className="text-center space-y-1">
|
<div className="text-center space-y-1">
|
||||||
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
PaddleOCR laeuft...
|
{runningLabel}
|
||||||
</p>
|
</p>
|
||||||
<p className="text-xs text-gray-400">
|
<p className="text-xs text-gray-400">
|
||||||
Bild wird analysiert (ca. 5-30s)
|
Bild wird analysiert (ca. 5-30s)
|
||||||
@@ -101,12 +125,12 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
|||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
<div className="text-center space-y-2">
|
<div className="text-center space-y-2">
|
||||||
<div className="text-4xl">⚡</div>
|
<div className="text-4xl">{icon}</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
|
||||||
Paddle Direct
|
{title}
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
|
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
PaddleOCR erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.
|
{description}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -117,10 +141,10 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
<button
|
<button
|
||||||
onClick={runPaddleDirect}
|
onClick={runOcr}
|
||||||
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
|
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
|
||||||
>
|
>
|
||||||
PaddleOCR starten
|
{buttonLabel}
|
||||||
</button>
|
</button>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -2599,6 +2599,189 @@ async def paddle_direct(session_id: str):
|
|||||||
return {"session_id": session_id, **word_result}
|
return {"session_id": session_id, **word_result}
|
||||||
|
|
||||||
|
|
||||||
|
def _box_iou(a: dict, b: dict) -> float:
|
||||||
|
"""Compute IoU between two word boxes (each has left, top, width, height)."""
|
||||||
|
ax1, ay1 = a["left"], a["top"]
|
||||||
|
ax2, ay2 = ax1 + a["width"], ay1 + a["height"]
|
||||||
|
bx1, by1 = b["left"], b["top"]
|
||||||
|
bx2, by2 = bx1 + b["width"], by1 + b["height"]
|
||||||
|
|
||||||
|
ix1, iy1 = max(ax1, bx1), max(ay1, by1)
|
||||||
|
ix2, iy2 = min(ax2, bx2), min(ay2, by2)
|
||||||
|
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
||||||
|
if inter == 0:
|
||||||
|
return 0.0
|
||||||
|
area_a = (ax2 - ax1) * (ay2 - ay1)
|
||||||
|
area_b = (bx2 - bx1) * (by2 - by1)
|
||||||
|
return inter / (area_a + area_b - inter) if (area_a + area_b - inter) > 0 else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_paddle_tesseract(paddle_words: list, tess_words: list) -> list:
|
||||||
|
"""Merge word boxes from PaddleOCR and Tesseract.
|
||||||
|
|
||||||
|
Matching: IoU > 0.3 between bounding boxes.
|
||||||
|
Merging: Weighted average of coordinates by confidence.
|
||||||
|
"""
|
||||||
|
merged = []
|
||||||
|
used_tess: set = set()
|
||||||
|
|
||||||
|
for pw in paddle_words:
|
||||||
|
best_iou, best_ti = 0.0, -1
|
||||||
|
for ti, tw in enumerate(tess_words):
|
||||||
|
if ti in used_tess:
|
||||||
|
continue
|
||||||
|
iou = _box_iou(pw, tw)
|
||||||
|
if iou > best_iou:
|
||||||
|
best_iou, best_ti = iou, ti
|
||||||
|
|
||||||
|
if best_iou > 0.3 and best_ti >= 0:
|
||||||
|
tw = tess_words[best_ti]
|
||||||
|
used_tess.add(best_ti)
|
||||||
|
pc = pw.get("conf", 80)
|
||||||
|
tc = tw.get("conf", 50)
|
||||||
|
total = pc + tc
|
||||||
|
if total == 0:
|
||||||
|
total = 1
|
||||||
|
merged.append({
|
||||||
|
"text": pw["text"], # Paddle text usually better
|
||||||
|
"left": round((pw["left"] * pc + tw["left"] * tc) / total),
|
||||||
|
"top": round((pw["top"] * pc + tw["top"] * tc) / total),
|
||||||
|
"width": round((pw["width"] * pc + tw["width"] * tc) / total),
|
||||||
|
"height": round((pw["height"] * pc + tw["height"] * tc) / total),
|
||||||
|
"conf": max(pc, tc),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
merged.append(pw)
|
||||||
|
|
||||||
|
# Add unmatched Tesseract words (bullet points, symbols, etc.)
|
||||||
|
for ti, tw in enumerate(tess_words):
|
||||||
|
if ti not in used_tess and tw.get("conf", 0) >= 40:
|
||||||
|
merged.append(tw)
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/paddle-kombi")
|
||||||
|
async def paddle_kombi(session_id: str):
|
||||||
|
"""Run PaddleOCR + Tesseract on the preprocessed image and merge results.
|
||||||
|
|
||||||
|
Both engines run on the same preprocessed (cropped/dewarped) image.
|
||||||
|
Word boxes are matched by IoU and coordinates are averaged weighted by
|
||||||
|
confidence. Unmatched Tesseract words (bullets, symbols) are added.
|
||||||
|
"""
|
||||||
|
img_png = await get_session_image(session_id, "cropped")
|
||||||
|
if not img_png:
|
||||||
|
img_png = await get_session_image(session_id, "dewarped")
|
||||||
|
if not img_png:
|
||||||
|
img_png = await get_session_image(session_id, "original")
|
||||||
|
if not img_png:
|
||||||
|
raise HTTPException(status_code=404, detail="No image found for this session")
|
||||||
|
|
||||||
|
img_arr = np.frombuffer(img_png, dtype=np.uint8)
|
||||||
|
img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR)
|
||||||
|
if img_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Failed to decode image")
|
||||||
|
|
||||||
|
img_h, img_w = img_bgr.shape[:2]
|
||||||
|
|
||||||
|
from cv_ocr_engines import ocr_region_paddle
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
# --- PaddleOCR ---
|
||||||
|
paddle_words = await ocr_region_paddle(img_bgr, region=None)
|
||||||
|
if not paddle_words:
|
||||||
|
paddle_words = []
|
||||||
|
|
||||||
|
# --- Tesseract ---
|
||||||
|
from PIL import Image
|
||||||
|
import pytesseract
|
||||||
|
|
||||||
|
pil_img = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
|
||||||
|
data = pytesseract.image_to_data(
|
||||||
|
pil_img, lang="eng+deu",
|
||||||
|
config="--psm 6 --oem 3",
|
||||||
|
output_type=pytesseract.Output.DICT,
|
||||||
|
)
|
||||||
|
tess_words = []
|
||||||
|
for i in range(len(data["text"])):
|
||||||
|
text = str(data["text"][i]).strip()
|
||||||
|
conf_raw = str(data["conf"][i])
|
||||||
|
conf = int(conf_raw) if conf_raw.lstrip("-").isdigit() else -1
|
||||||
|
if not text or conf < 20:
|
||||||
|
continue
|
||||||
|
tess_words.append({
|
||||||
|
"text": text,
|
||||||
|
"left": data["left"][i],
|
||||||
|
"top": data["top"][i],
|
||||||
|
"width": data["width"][i],
|
||||||
|
"height": data["height"][i],
|
||||||
|
"conf": conf,
|
||||||
|
})
|
||||||
|
|
||||||
|
# --- Merge ---
|
||||||
|
if not paddle_words and not tess_words:
|
||||||
|
raise HTTPException(status_code=400, detail="Both OCR engines returned no words")
|
||||||
|
|
||||||
|
merged_words = _merge_paddle_tesseract(paddle_words, tess_words)
|
||||||
|
|
||||||
|
cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h)
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
for cell in cells:
|
||||||
|
cell["ocr_engine"] = "kombi"
|
||||||
|
|
||||||
|
n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
|
||||||
|
n_cols = len(columns_meta)
|
||||||
|
col_types = {c.get("type") for c in columns_meta}
|
||||||
|
is_vocab = bool(col_types & {"column_en", "column_de"})
|
||||||
|
|
||||||
|
word_result = {
|
||||||
|
"cells": cells,
|
||||||
|
"grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
|
||||||
|
"columns_used": columns_meta,
|
||||||
|
"layout": "vocab" if is_vocab else "generic",
|
||||||
|
"image_width": img_w,
|
||||||
|
"image_height": img_h,
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
"ocr_engine": "kombi",
|
||||||
|
"grid_method": "kombi",
|
||||||
|
"summary": {
|
||||||
|
"total_cells": len(cells),
|
||||||
|
"non_empty_cells": sum(1 for c in cells if c.get("text")),
|
||||||
|
"low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
|
||||||
|
"paddle_words": len(paddle_words),
|
||||||
|
"tesseract_words": len(tess_words),
|
||||||
|
"merged_words": len(merged_words),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
await update_session_db(
|
||||||
|
session_id,
|
||||||
|
word_result=word_result,
|
||||||
|
cropped_png=img_png,
|
||||||
|
current_step=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"paddle_kombi session %s: %d cells (%d rows, %d cols) in %.2fs "
|
||||||
|
"[paddle=%d, tess=%d, merged=%d]",
|
||||||
|
session_id, len(cells), n_rows, n_cols, duration,
|
||||||
|
len(paddle_words), len(tess_words), len(merged_words),
|
||||||
|
)
|
||||||
|
|
||||||
|
await _append_pipeline_log(session_id, "paddle_kombi", {
|
||||||
|
"total_cells": len(cells),
|
||||||
|
"non_empty_cells": word_result["summary"]["non_empty_cells"],
|
||||||
|
"paddle_words": len(paddle_words),
|
||||||
|
"tesseract_words": len(tess_words),
|
||||||
|
"merged_words": len(merged_words),
|
||||||
|
"ocr_engine": "kombi",
|
||||||
|
}, duration_ms=int(duration * 1000))
|
||||||
|
|
||||||
|
return {"session_id": session_id, **word_result}
|
||||||
|
|
||||||
|
|
||||||
class WordGroundTruthRequest(BaseModel):
|
class WordGroundTruthRequest(BaseModel):
|
||||||
is_correct: bool
|
is_correct: bool
|
||||||
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user