feat: Paddle Direct — 1-click OCR without deskew/dewarp/crop
Some checks failed
CI / go-lint (push) Has been cancelled
CI / python-lint (push) Has been cancelled
CI / nodejs-lint (push) Has been cancelled
CI / test-go-school (push) Has been cancelled
CI / test-go-edu-search (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
CI / test-nodejs-website (push) Has been cancelled
Some checks failed
CI / go-lint (push) Has been cancelled
CI / python-lint (push) Has been cancelled
CI / nodejs-lint (push) Has been cancelled
CI / test-go-school (push) Has been cancelled
CI / test-go-edu-search (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
CI / test-nodejs-website (push) Has been cancelled
New 2-step mode (Upload → PaddleOCR+Overlay) alongside the existing 7-step pipeline. Backend endpoint runs PaddleOCR on the original image and clusters words into rows/cells directly. Frontend adds a mode toggle and PaddleDirectStep component. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,11 +10,13 @@ import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
|||||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||||
import { OVERLAY_PIPELINE_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||||
|
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
export default function OcrOverlayPage() {
|
export default function OcrOverlayPage() {
|
||||||
|
const [mode, setMode] = useState<'pipeline' | 'paddle-direct'>('pipeline')
|
||||||
const [currentStep, setCurrentStep] = useState(0)
|
const [currentStep, setCurrentStep] = useState(0)
|
||||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||||
const [sessionName, setSessionName] = useState<string>('')
|
const [sessionName, setSessionName] = useState<string>('')
|
||||||
@@ -61,17 +63,32 @@ export default function OcrOverlayPage() {
|
|||||||
setSessionName(data.name || data.filename || '')
|
setSessionName(data.name || data.filename || '')
|
||||||
setActiveCategory(data.document_category || undefined)
|
setActiveCategory(data.document_category || undefined)
|
||||||
|
|
||||||
// Map DB step to overlay UI step
|
// Check if this session was processed with paddle_direct
|
||||||
const dbStep = data.current_step || 1
|
const isPaddleDirect = data.word_result?.ocr_engine === 'paddle_direct'
|
||||||
const uiStep = dbStepToOverlayUi(dbStep)
|
|
||||||
|
|
||||||
setSteps(
|
if (isPaddleDirect) {
|
||||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
setMode('paddle-direct')
|
||||||
...s,
|
setSteps(
|
||||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
PADDLE_DIRECT_STEPS.map((s, i) => ({
|
||||||
})),
|
...s,
|
||||||
)
|
status: i < 1 ? 'completed' : i === 1 ? 'active' : 'pending',
|
||||||
setCurrentStep(uiStep)
|
})),
|
||||||
|
)
|
||||||
|
setCurrentStep(1)
|
||||||
|
} else {
|
||||||
|
setMode('pipeline')
|
||||||
|
// Map DB step to overlay UI step
|
||||||
|
const dbStep = data.current_step || 1
|
||||||
|
const uiStep = dbStepToOverlayUi(dbStep)
|
||||||
|
|
||||||
|
setSteps(
|
||||||
|
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
setCurrentStep(uiStep)
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Failed to open session:', e)
|
console.error('Failed to open session:', e)
|
||||||
}
|
}
|
||||||
@@ -84,12 +101,13 @@ export default function OcrOverlayPage() {
|
|||||||
if (sessionId === sid) {
|
if (sessionId === sid) {
|
||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Failed to delete session:', e)
|
console.error('Failed to delete session:', e)
|
||||||
}
|
}
|
||||||
}, [sessionId])
|
}, [sessionId, mode])
|
||||||
|
|
||||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||||
try {
|
try {
|
||||||
@@ -140,7 +158,8 @@ export default function OcrOverlayPage() {
|
|||||||
const handleNext = () => {
|
const handleNext = () => {
|
||||||
if (currentStep >= steps.length - 1) {
|
if (currentStep >= steps.length - 1) {
|
||||||
// Last step completed — return to session list
|
// Last step completed — return to session list
|
||||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
loadSessions()
|
loadSessions()
|
||||||
@@ -168,7 +187,8 @@ export default function OcrOverlayPage() {
|
|||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
setSessionName('')
|
setSessionName('')
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
}
|
}
|
||||||
|
|
||||||
const stepNames: Record<number, string> = {
|
const stepNames: Record<number, string> = {
|
||||||
@@ -206,6 +226,16 @@ export default function OcrOverlayPage() {
|
|||||||
}, [sessionId, goToStep])
|
}, [sessionId, goToStep])
|
||||||
|
|
||||||
const renderStep = () => {
|
const renderStep = () => {
|
||||||
|
if (mode === 'paddle-direct') {
|
||||||
|
switch (currentStep) {
|
||||||
|
case 0:
|
||||||
|
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||||
|
case 1:
|
||||||
|
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
||||||
|
default:
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
switch (currentStep) {
|
switch (currentStep) {
|
||||||
case 0:
|
case 0:
|
||||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||||
@@ -410,11 +440,47 @@ export default function OcrOverlayPage() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Mode Toggle */}
|
||||||
|
<div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 rounded-lg p-1 w-fit">
|
||||||
|
<button
|
||||||
|
onClick={() => {
|
||||||
|
if (mode === 'pipeline') return
|
||||||
|
setMode('pipeline')
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSessionId(null)
|
||||||
|
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
|
}}
|
||||||
|
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||||
|
mode === 'pipeline'
|
||||||
|
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||||
|
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Pipeline (7 Schritte)
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => {
|
||||||
|
if (mode === 'paddle-direct') return
|
||||||
|
setMode('paddle-direct')
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSessionId(null)
|
||||||
|
setSteps(PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
|
}}
|
||||||
|
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||||
|
mode === 'paddle-direct'
|
||||||
|
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||||
|
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Paddle Direct (2 Schritte)
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
<PipelineStepper
|
<PipelineStepper
|
||||||
steps={steps}
|
steps={steps}
|
||||||
currentStep={currentStep}
|
currentStep={currentStep}
|
||||||
onStepClick={handleStepClick}
|
onStepClick={handleStepClick}
|
||||||
onReprocess={sessionId ? reprocessFromStep : undefined}
|
onReprocess={mode === 'pipeline' && sessionId ? reprocessFromStep : undefined}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<div className="min-h-[400px]">{renderStep()}</div>
|
<div className="min-h-[400px]">{renderStep()}</div>
|
||||||
|
|||||||
@@ -48,6 +48,15 @@ export const OVERLAY_UI_TO_DB: Record<number, number> = {
|
|||||||
6: 9, // reconstruction
|
6: 9, // reconstruction
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 2-step pipeline for Paddle Direct mode.
|
||||||
|
* Upload → PaddleOCR+Overlay (skips deskew/dewarp/crop/rows)
|
||||||
|
*/
|
||||||
|
export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
|
||||||
|
{ id: 'orientation', name: 'Upload', icon: '📤', status: 'pending' },
|
||||||
|
{ id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' },
|
||||||
|
]
|
||||||
|
|
||||||
/** Map from DB step to overlay UI step index */
|
/** Map from DB step to overlay UI step index */
|
||||||
export function dbStepToOverlayUi(dbStep: number): number {
|
export function dbStepToOverlayUi(dbStep: number): number {
|
||||||
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
||||||
|
|||||||
129
admin-lehrer/components/ocr-overlay/PaddleDirectStep.tsx
Normal file
129
admin-lehrer/components/ocr-overlay/PaddleDirectStep.tsx
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
import { OverlayReconstruction } from './OverlayReconstruction'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
type Phase = 'idle' | 'running' | 'overlay'
|
||||||
|
|
||||||
|
interface PaddleDirectStepProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
||||||
|
const [phase, setPhase] = useState<Phase>('idle')
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
|
||||||
|
|
||||||
|
// Auto-detect: if session already has paddle_direct word_result → show overlay
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId) return
|
||||||
|
let cancelled = false
|
||||||
|
;(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||||
|
if (!res.ok || cancelled) return
|
||||||
|
const data = await res.json()
|
||||||
|
if (data.word_result?.ocr_engine === 'paddle_direct') {
|
||||||
|
setPhase('overlay')
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
return () => { cancelled = true }
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const runPaddleDirect = useCallback(async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setPhase('running')
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/paddle-direct`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}))
|
||||||
|
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
setStats({
|
||||||
|
cells: data.summary?.total_cells || 0,
|
||||||
|
rows: data.grid_shape?.rows || 0,
|
||||||
|
duration: data.duration_seconds || 0,
|
||||||
|
})
|
||||||
|
setPhase('overlay')
|
||||||
|
} catch (e: unknown) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
setPhase('idle')
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
if (!sessionId) {
|
||||||
|
return (
|
||||||
|
<div className="text-sm text-gray-400 py-8 text-center">
|
||||||
|
Bitte zuerst ein Bild hochladen.
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (phase === 'overlay') {
|
||||||
|
return (
|
||||||
|
<div className="space-y-3">
|
||||||
|
{stats && (
|
||||||
|
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||||
|
<span>{stats.cells} Woerter erkannt</span>
|
||||||
|
<span>{stats.rows} Zeilen</span>
|
||||||
|
<span>{stats.duration.toFixed(1)}s</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<OverlayReconstruction sessionId={sessionId} onNext={onNext} />
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 space-y-6">
|
||||||
|
{phase === 'running' ? (
|
||||||
|
<>
|
||||||
|
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
|
||||||
|
<div className="text-center space-y-1">
|
||||||
|
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
PaddleOCR laeuft...
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-gray-400">
|
||||||
|
Bild wird analysiert (ca. 5-30s)
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<div className="text-center space-y-2">
|
||||||
|
<div className="text-4xl">⚡</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
Paddle Direct
|
||||||
|
</h3>
|
||||||
|
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
PaddleOCR erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 px-4 py-2 rounded-lg">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={runPaddleDirect}
|
||||||
|
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
|
||||||
|
>
|
||||||
|
PaddleOCR starten
|
||||||
|
</button>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -2509,6 +2509,189 @@ async def _word_stream_generator(
|
|||||||
yield f"data: {json.dumps(complete_event)}\n\n"
|
yield f"data: {json.dumps(complete_event)}\n\n"
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/paddle-direct")
|
||||||
|
async def paddle_direct(session_id: str):
|
||||||
|
"""Run PaddleOCR on the original image and build a word grid directly.
|
||||||
|
|
||||||
|
Skips deskew/dewarp/crop/rows — just Upload → PaddleOCR → Overlay.
|
||||||
|
The original image is stored as cropped_png so OverlayReconstruction
|
||||||
|
can display it as the background.
|
||||||
|
"""
|
||||||
|
original_png = await get_session_image(session_id, "original")
|
||||||
|
if not original_png:
|
||||||
|
raise HTTPException(status_code=404, detail="No original image found for this session")
|
||||||
|
|
||||||
|
img_arr = np.frombuffer(original_png, dtype=np.uint8)
|
||||||
|
img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR)
|
||||||
|
if img_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Failed to decode original image")
|
||||||
|
|
||||||
|
img_h, img_w = img_bgr.shape[:2]
|
||||||
|
|
||||||
|
from cv_ocr_engines import ocr_region_paddle
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
word_dicts = await ocr_region_paddle(img_bgr, region=None)
|
||||||
|
if not word_dicts:
|
||||||
|
raise HTTPException(status_code=400, detail="PaddleOCR returned no words")
|
||||||
|
|
||||||
|
cells, columns_meta = _paddle_words_to_grid_cells(word_dicts, img_w, img_h)
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
|
||||||
|
n_cols = len(columns_meta)
|
||||||
|
|
||||||
|
word_result = {
|
||||||
|
"cells": cells,
|
||||||
|
"grid_shape": {
|
||||||
|
"rows": n_rows,
|
||||||
|
"cols": n_cols,
|
||||||
|
"total_cells": len(cells),
|
||||||
|
},
|
||||||
|
"columns_used": columns_meta,
|
||||||
|
"layout": "generic",
|
||||||
|
"image_width": img_w,
|
||||||
|
"image_height": img_h,
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
"ocr_engine": "paddle_direct",
|
||||||
|
"grid_method": "paddle_direct",
|
||||||
|
"summary": {
|
||||||
|
"total_cells": len(cells),
|
||||||
|
"non_empty_cells": sum(1 for c in cells if c.get("text")),
|
||||||
|
"low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store original image as cropped_png so OverlayReconstruction shows it
|
||||||
|
await update_session_db(
|
||||||
|
session_id,
|
||||||
|
word_result=word_result,
|
||||||
|
cropped_png=original_png,
|
||||||
|
current_step=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"paddle_direct session %s: %d cells (%d rows, %d cols) in %.2fs",
|
||||||
|
session_id, len(cells), n_rows, n_cols, duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
await _append_pipeline_log(session_id, "paddle_direct", {
|
||||||
|
"total_cells": len(cells),
|
||||||
|
"non_empty_cells": word_result["summary"]["non_empty_cells"],
|
||||||
|
"ocr_engine": "paddle_direct",
|
||||||
|
}, duration_ms=int(duration * 1000))
|
||||||
|
|
||||||
|
return {"session_id": session_id, **word_result}
|
||||||
|
|
||||||
|
|
||||||
|
def _paddle_words_to_grid_cells(
|
||||||
|
word_dicts: List[Dict[str, Any]],
|
||||||
|
img_w: int,
|
||||||
|
img_h: int,
|
||||||
|
) -> tuple:
|
||||||
|
"""Convert PaddleOCR word dicts into GridCell dicts + columns_meta.
|
||||||
|
|
||||||
|
1. Sort words by (top, left).
|
||||||
|
2. Cluster into rows by Y-proximity (threshold = 50% of median word height).
|
||||||
|
3. Within each row, sort left→right and assign col_index.
|
||||||
|
4. Each word → 1 GridCell with word_boxes and bbox_pct.
|
||||||
|
|
||||||
|
Returns (cells, columns_meta) in the same format as build_grid_from_words.
|
||||||
|
"""
|
||||||
|
if not word_dicts:
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
# Sort by top then left
|
||||||
|
sorted_words = sorted(word_dicts, key=lambda w: (w["top"], w["left"]))
|
||||||
|
|
||||||
|
# Compute median word height for row clustering threshold
|
||||||
|
heights = [w["height"] for w in sorted_words if w.get("height", 0) > 0]
|
||||||
|
median_h = sorted(heights)[len(heights) // 2] if heights else 30
|
||||||
|
row_threshold = max(median_h * 0.5, 8)
|
||||||
|
|
||||||
|
# Cluster into rows
|
||||||
|
rows: List[List[Dict]] = []
|
||||||
|
current_row: List[Dict] = []
|
||||||
|
current_y = -9999.0
|
||||||
|
|
||||||
|
for w in sorted_words:
|
||||||
|
center_y = w["top"] + w["height"] / 2
|
||||||
|
if current_row and abs(center_y - current_y) > row_threshold:
|
||||||
|
rows.append(current_row)
|
||||||
|
current_row = []
|
||||||
|
current_row.append(w)
|
||||||
|
# Running average Y center for the row
|
||||||
|
current_y = sum(ww["top"] + ww["height"] / 2 for ww in current_row) / len(current_row)
|
||||||
|
|
||||||
|
if current_row:
|
||||||
|
rows.append(current_row)
|
||||||
|
|
||||||
|
# Sort each row left→right and build cells
|
||||||
|
cells: List[Dict[str, Any]] = []
|
||||||
|
max_col = 0
|
||||||
|
|
||||||
|
for row_idx, row_words in enumerate(rows):
|
||||||
|
row_words.sort(key=lambda w: w["left"])
|
||||||
|
for col_idx, w in enumerate(row_words):
|
||||||
|
left = w["left"]
|
||||||
|
top = w["top"]
|
||||||
|
width = w["width"]
|
||||||
|
height = w["height"]
|
||||||
|
conf = w.get("confidence", 0)
|
||||||
|
if isinstance(conf, float) and conf <= 1.0:
|
||||||
|
conf = conf * 100 # normalize to 0-100
|
||||||
|
|
||||||
|
cell = {
|
||||||
|
"cell_id": f"PD_R{row_idx:02d}_W{col_idx:02d}",
|
||||||
|
"x": left,
|
||||||
|
"y": top,
|
||||||
|
"width": width,
|
||||||
|
"height": height,
|
||||||
|
"text": w.get("text", ""),
|
||||||
|
"confidence": round(conf, 1),
|
||||||
|
"column_index": col_idx,
|
||||||
|
"row_index": row_idx,
|
||||||
|
"zone_index": 0,
|
||||||
|
"ocr_engine": "paddle_direct",
|
||||||
|
"word_boxes": [{
|
||||||
|
"text": w.get("text", ""),
|
||||||
|
"left": left,
|
||||||
|
"top": top,
|
||||||
|
"width": width,
|
||||||
|
"height": height,
|
||||||
|
"confidence": round(conf, 1),
|
||||||
|
}],
|
||||||
|
"bbox_pct": {
|
||||||
|
"x": round(left / img_w * 100, 3),
|
||||||
|
"y": round(top / img_h * 100, 3),
|
||||||
|
"w": round(width / img_w * 100, 3),
|
||||||
|
"h": round(height / img_h * 100, 3),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cells.append(cell)
|
||||||
|
if col_idx > max_col:
|
||||||
|
max_col = col_idx
|
||||||
|
|
||||||
|
# Build columns_meta — one pseudo-column per column index
|
||||||
|
columns_meta = []
|
||||||
|
for ci in range(max_col + 1):
|
||||||
|
col_cells = [c for c in cells if c["column_index"] == ci]
|
||||||
|
if col_cells:
|
||||||
|
min_x = min(c["x"] for c in col_cells)
|
||||||
|
max_right = max(c["x"] + c["width"] for c in col_cells)
|
||||||
|
columns_meta.append({
|
||||||
|
"type": "column_text",
|
||||||
|
"x": min_x,
|
||||||
|
"y": 0,
|
||||||
|
"width": max_right - min_x,
|
||||||
|
"height": img_h,
|
||||||
|
"classification_confidence": 1.0,
|
||||||
|
"classification_method": "paddle_direct",
|
||||||
|
})
|
||||||
|
|
||||||
|
return cells, columns_meta
|
||||||
|
|
||||||
|
|
||||||
class WordGroundTruthRequest(BaseModel):
|
class WordGroundTruthRequest(BaseModel):
|
||||||
is_correct: bool
|
is_correct: bool
|
||||||
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user