feat: improved OCR pipeline session manager with categories, thumbnails, pipeline logging
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m48s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 20s

- Add document_category (10 types) and pipeline_log JSONB columns
- Session list: thumbnails, copyable IDs, category/doc_type badges
- Inline category dropdown, bulk delete, pipeline step logging
- New endpoints: thumbnail, delete-all, pipeline-log, categories
- Cleared all 22 old test sessions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-05 09:44:38 +01:00
parent a58dfca1d8
commit 293e7914d8
4 changed files with 411 additions and 77 deletions

View File

@@ -11,7 +11,7 @@ import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecogniti
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview' import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction' import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth' import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem, type DocumentTypeResult } from './types' import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory } from './types'
const KLAUSUR_API = '/klausur-api' const KLAUSUR_API = '/klausur-api'
@@ -23,7 +23,9 @@ export default function OcrPipelinePage() {
const [loadingSessions, setLoadingSessions] = useState(true) const [loadingSessions, setLoadingSessions] = useState(true)
const [editingName, setEditingName] = useState<string | null>(null) const [editingName, setEditingName] = useState<string | null>(null)
const [editNameValue, setEditNameValue] = useState('') const [editNameValue, setEditNameValue] = useState('')
const [editingCategory, setEditingCategory] = useState<string | null>(null)
const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null) const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
const [steps, setSteps] = useState<PipelineStep[]>( const [steps, setSteps] = useState<PipelineStep[]>(
PIPELINE_STEPS.map((s, i) => ({ PIPELINE_STEPS.map((s, i) => ({
...s, ...s,
@@ -59,6 +61,7 @@ export default function OcrPipelinePage() {
setSessionId(sid) setSessionId(sid)
setSessionName(data.name || data.filename || '') setSessionName(data.name || data.filename || '')
setActiveCategory(data.document_category || undefined)
// Restore doc type result if available // Restore doc type result if available
const savedDocType: DocumentTypeResult | null = data.doc_type_result || null const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
@@ -115,6 +118,36 @@ export default function OcrPipelinePage() {
setEditingName(null) setEditingName(null)
}, [sessionId]) }, [sessionId])
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_category: category }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
if (sessionId === sid) setActiveCategory(category)
} catch (e) {
console.error('Failed to update category:', e)
}
setEditingCategory(null)
}, [sessionId])
const deleteAllSessions = useCallback(async () => {
if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
setSessions([])
setSessionId(null)
setCurrentStep(0)
setDocTypeResult(null)
setActiveCategory(undefined)
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
} catch (e) {
console.error('Failed to delete all sessions:', e)
}
}, [])
const handleStepClick = (index: number) => { const handleStepClick = (index: number) => {
if (index <= currentStep || steps[index].status === 'completed') { if (index <= currentStep || steps[index].status === 'completed') {
setCurrentStep(index) setCurrentStep(index)
@@ -307,14 +340,25 @@ export default function OcrPipelinePage() {
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4"> <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
<div className="flex items-center justify-between mb-3"> <div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300"> <h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Sessions Sessions ({sessions.length})
</h3> </h3>
<button <div className="flex gap-2">
onClick={handleNewSession} {sessions.length > 0 && (
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors" <button
> onClick={deleteAllSessions}
+ Neue Session className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
</button> title="Alle Sessions loeschen"
>
Alle loeschen
</button>
)}
<button
onClick={handleNewSession}
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
>
+ Neue Session
</button>
</div>
</div> </div>
{loadingSessions ? ( {loadingSessions ? (
@@ -322,75 +366,165 @@ export default function OcrPipelinePage() {
) : sessions.length === 0 ? ( ) : sessions.length === 0 ? (
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div> <div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
) : ( ) : (
<div className="space-y-1 max-h-48 overflow-y-auto"> <div className="space-y-1.5 max-h-[320px] overflow-y-auto">
{sessions.map((s) => ( {sessions.map((s) => {
<div const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
key={s.id} return (
className={`flex items-center gap-2 px-3 py-2 rounded-lg text-sm transition-colors cursor-pointer ${ <div
sessionId === s.id key={s.id}
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700' className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50' sessionId === s.id
}`} ? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
> : 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}> }`}
{editingName === s.id ? ( >
<input {/* Thumbnail */}
autoFocus <div
value={editNameValue} className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
onChange={(e) => setEditNameValue(e.target.value)} onClick={() => openSession(s.id)}
onBlur={() => renameSession(s.id, editNameValue)} >
onKeyDown={(e) => { {/* eslint-disable-next-line @next/next/no-img-element */}
if (e.key === 'Enter') renameSession(s.id, editNameValue) <img
if (e.key === 'Escape') setEditingName(null) src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
}} alt=""
onClick={(e) => e.stopPropagation()} className="w-full h-full object-cover"
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600" loading="lazy"
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
/> />
) : ( </div>
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
{s.name || s.filename} {/* Info */}
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
{editingName === s.id ? (
<input
autoFocus
value={editNameValue}
onChange={(e) => setEditNameValue(e.target.value)}
onBlur={() => renameSession(s.id, editNameValue)}
onKeyDown={(e) => {
if (e.key === 'Enter') renameSession(s.id, editNameValue)
if (e.key === 'Escape') setEditingName(null)
}}
onClick={(e) => e.stopPropagation()}
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
/>
) : (
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
{s.name || s.filename}
</div>
)}
{/* ID row */}
<button
onClick={(e) => {
e.stopPropagation()
navigator.clipboard.writeText(s.id)
const btn = e.currentTarget
btn.textContent = 'Kopiert!'
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
}}
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
>
ID: {s.id.slice(0, 8)}
</button>
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
</div>
</div>
{/* Badges */}
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
{/* Category Badge */}
<button
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
catInfo
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
}`}
title="Kategorie setzen"
>
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
</button>
{/* Doc Type Badge (read-only) */}
{s.doc_type && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
{s.doc_type}
</span>
)}
</div>
{/* Action buttons */}
<div className="flex flex-col gap-0.5 flex-shrink-0">
<button
onClick={(e) => {
e.stopPropagation()
setEditNameValue(s.name || s.filename)
setEditingName(s.id)
}}
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
title="Umbenennen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
</svg>
</button>
<button
onClick={(e) => {
e.stopPropagation()
if (confirm('Session loeschen?')) deleteSession(s.id)
}}
className="p-1 text-gray-400 hover:text-red-500"
title="Loeschen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
{/* Category dropdown (inline) */}
{editingCategory === s.id && (
<div
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
onClick={(e) => e.stopPropagation()}
>
{DOCUMENT_CATEGORIES.map((cat) => (
<button
key={cat.value}
onClick={() => updateCategory(s.id, cat.value)}
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
s.document_category === cat.value
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
{cat.icon} {cat.label}
</button>
))}
</div> </div>
)} )}
<div className="text-xs text-gray-400 flex gap-2">
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
</div>
</div> </div>
<button )
onClick={(e) => { })}
e.stopPropagation()
setEditNameValue(s.name || s.filename)
setEditingName(s.id)
}}
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
title="Umbenennen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
</svg>
</button>
<button
onClick={(e) => {
e.stopPropagation()
if (confirm('Session loeschen?')) deleteSession(s.id)
}}
className="p-1 text-gray-400 hover:text-red-500"
title="Loeschen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
))}
</div> </div>
)} )}
</div> </div>
{/* Active session name */} {/* Active session info */}
{sessionId && sessionName && ( {sessionId && sessionName && (
<div className="text-sm text-gray-500 dark:text-gray-400"> <div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span> <span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
{activeCategory && (() => {
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
})()}
{docTypeResult && (
<span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
{docTypeResult.doc_type}
</span>
)}
</div> </div>
)} )}

View File

@@ -7,16 +7,47 @@ export interface PipelineStep {
status: PipelineStepStatus status: PipelineStepStatus
} }
export type DocumentCategory =
| 'vokabelseite' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
| 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
{ value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
{ value: 'buchseite', label: 'Buchseite', icon: '📚' },
{ value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
{ value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
{ value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
{ value: 'statistik', label: 'Statistik', icon: '📊' },
{ value: 'zeitung', label: 'Zeitung', icon: '📰' },
{ value: 'formular', label: 'Formular', icon: '📋' },
{ value: 'handschrift', label: 'Handschrift', icon: '✍️' },
{ value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
]
export interface SessionListItem { export interface SessionListItem {
id: string id: string
name: string name: string
filename: string filename: string
status: string status: string
current_step: number current_step: number
document_category?: DocumentCategory
doc_type?: string
created_at: string created_at: string
updated_at?: string updated_at?: string
} }
export interface PipelineLogEntry {
step: string
completed_at: string
success: boolean
duration_ms?: number
metrics: Record<string, unknown>
}
export interface PipelineLog {
steps: PipelineLogEntry[]
}
export interface DocumentTypeResult { export interface DocumentTypeResult {
doc_type: 'vocab_table' | 'full_text' | 'generic_table' doc_type: 'vocab_table' | 'full_text' | 'generic_table'
confidence: number confidence: number
@@ -34,6 +65,8 @@ export interface SessionInfo {
image_height: number image_height: number
original_image_url: string original_image_url: string
current_step?: number current_step?: number
document_category?: DocumentCategory
doc_type?: string
deskew_result?: DeskewResult deskew_result?: DeskewResult
dewarp_result?: DewarpResult dewarp_result?: DewarpResult
column_result?: ColumnResult column_result?: ColumnResult

View File

@@ -66,6 +66,7 @@ from cv_vocab_pipeline import (
) )
from ocr_pipeline_session_store import ( from ocr_pipeline_session_store import (
create_session_db, create_session_db,
delete_all_sessions_db,
delete_session_db, delete_session_db,
get_session_db, get_session_db,
get_session_image, get_session_image,
@@ -151,8 +152,15 @@ class DewarpGroundTruthRequest(BaseModel):
notes: Optional[str] = None notes: Optional[str] = None
class RenameSessionRequest(BaseModel): VALID_DOCUMENT_CATEGORIES = {
name: str 'vokabelseite', 'buchseite', 'arbeitsblatt', 'klausurseite',
'mathearbeit', 'statistik', 'zeitung', 'formular', 'handschrift', 'sonstiges',
}
class UpdateSessionRequest(BaseModel):
name: Optional[str] = None
document_category: Optional[str] = None
class ManualColumnsRequest(BaseModel): class ManualColumnsRequest(BaseModel):
@@ -281,6 +289,8 @@ async def get_session_info(session_id: str):
"image_height": img_h, "image_height": img_h,
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original", "original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
"current_step": session.get("current_step", 1), "current_step": session.get("current_step", 1),
"document_category": session.get("document_category"),
"doc_type": session.get("doc_type"),
} }
if session.get("deskew_result"): if session.get("deskew_result"):
@@ -293,17 +303,31 @@ async def get_session_info(session_id: str):
result["row_result"] = session["row_result"] result["row_result"] = session["row_result"]
if session.get("word_result"): if session.get("word_result"):
result["word_result"] = session["word_result"] result["word_result"] = session["word_result"]
if session.get("doc_type_result"):
result["doc_type_result"] = session["doc_type_result"]
return result return result
@router.put("/sessions/{session_id}") @router.put("/sessions/{session_id}")
async def rename_session(session_id: str, req: RenameSessionRequest): async def update_session(session_id: str, req: UpdateSessionRequest):
"""Rename a session.""" """Update session name and/or document category."""
updated = await update_session_db(session_id, name=req.name) kwargs: Dict[str, Any] = {}
if req.name is not None:
kwargs["name"] = req.name
if req.document_category is not None:
if req.document_category not in VALID_DOCUMENT_CATEGORIES:
raise HTTPException(
status_code=400,
detail=f"Invalid category '{req.document_category}'. Valid: {sorted(VALID_DOCUMENT_CATEGORIES)}",
)
kwargs["document_category"] = req.document_category
if not kwargs:
raise HTTPException(status_code=400, detail="Nothing to update")
updated = await update_session_db(session_id, **kwargs)
if not updated: if not updated:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found") raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
return {"session_id": session_id, "name": req.name} return {"session_id": session_id, **kwargs}
@router.delete("/sessions/{session_id}") @router.delete("/sessions/{session_id}")
@@ -316,6 +340,78 @@ async def delete_session(session_id: str):
return {"session_id": session_id, "deleted": True} return {"session_id": session_id, "deleted": True}
@router.delete("/sessions")
async def delete_all_sessions():
"""Delete ALL sessions (cleanup)."""
_cache.clear()
count = await delete_all_sessions_db()
return {"deleted_count": count}
@router.get("/sessions/{session_id}/thumbnail")
async def get_session_thumbnail(session_id: str, size: int = Query(default=80, ge=16, le=400)):
"""Return a small thumbnail of the original image."""
original_png = await get_session_image(session_id, "original")
if not original_png:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found or no image")
arr = np.frombuffer(original_png, dtype=np.uint8)
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
if img is None:
raise HTTPException(status_code=500, detail="Failed to decode image")
h, w = img.shape[:2]
scale = size / max(h, w)
new_w, new_h = int(w * scale), int(h * scale)
thumb = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
_, png_bytes = cv2.imencode(".png", thumb)
return Response(content=png_bytes.tobytes(), media_type="image/png",
headers={"Cache-Control": "public, max-age=3600"})
@router.get("/sessions/{session_id}/pipeline-log")
async def get_pipeline_log(session_id: str):
"""Get the pipeline execution log for a session."""
session = await get_session_db(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
return {"session_id": session_id, "pipeline_log": session.get("pipeline_log") or {"steps": []}}
@router.get("/categories")
async def list_categories():
"""List valid document categories."""
return {"categories": sorted(VALID_DOCUMENT_CATEGORIES)}
# ---------------------------------------------------------------------------
# Pipeline Log Helper
# ---------------------------------------------------------------------------
async def _append_pipeline_log(
session_id: str,
step_name: str,
metrics: Dict[str, Any],
success: bool = True,
duration_ms: Optional[int] = None,
):
"""Append a step entry to the session's pipeline_log JSONB."""
session = await get_session_db(session_id)
if not session:
return
log = session.get("pipeline_log") or {"steps": []}
if not isinstance(log, dict):
log = {"steps": []}
entry = {
"step": step_name,
"completed_at": datetime.utcnow().isoformat(),
"success": success,
"metrics": metrics,
}
if duration_ms is not None:
entry["duration_ms"] = duration_ms
log.setdefault("steps", []).append(entry)
await update_session_db(session_id, pipeline_log=log)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Image Endpoints # Image Endpoints
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -448,6 +544,12 @@ async def auto_deskew(session_id: str):
logger.info(f"OCR Pipeline: deskew session {session_id}: " logger.info(f"OCR Pipeline: deskew session {session_id}: "
f"hough={angle_hough:.2f} wa={angle_wa:.2f} -> {method_used} {angle_applied:.2f}") f"hough={angle_hough:.2f} wa={angle_wa:.2f} -> {method_used} {angle_applied:.2f}")
await _append_pipeline_log(session_id, "deskew", {
"angle_applied": round(angle_applied, 3),
"confidence": round(confidence, 2),
"method": method_used,
}, duration_ms=int(duration * 1000))
return { return {
"session_id": session_id, "session_id": session_id,
**deskew_result, **deskew_result,
@@ -680,6 +782,13 @@ async def auto_dewarp(
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} " f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)") f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
await _append_pipeline_log(session_id, "dewarp", {
"shear_degrees": dewarp_info["shear_degrees"],
"confidence": dewarp_info["confidence"],
"method": dewarp_info["method"],
"ensemble_methods": [d.get("method", "") for d in dewarp_info.get("detections", [])],
}, duration_ms=int(duration * 1000))
return { return {
"session_id": session_id, "session_id": session_id,
**dewarp_result, **dewarp_result,
@@ -808,6 +917,13 @@ async def detect_type(session_id: str):
logger.info(f"OCR Pipeline: detect-type session {session_id}: " logger.info(f"OCR Pipeline: detect-type session {session_id}: "
f"{result.doc_type} (confidence={result.confidence}, {duration:.2f}s)") f"{result.doc_type} (confidence={result.confidence}, {duration:.2f}s)")
await _append_pipeline_log(session_id, "detect_type", {
"doc_type": result.doc_type,
"pipeline": result.pipeline,
"confidence": result.confidence,
**{k: v for k, v in (result.features or {}).items() if isinstance(v, (int, float, str, bool))},
}, duration_ms=int(duration * 1000))
return {"session_id": session_id, **result_dict} return {"session_id": session_id, **result_dict}
@@ -896,6 +1012,13 @@ async def detect_columns(session_id: str):
logger.info(f"OCR Pipeline: columns session {session_id}: " logger.info(f"OCR Pipeline: columns session {session_id}: "
f"{col_count} columns detected ({duration:.2f}s)") f"{col_count} columns detected ({duration:.2f}s)")
img_w = dewarped_bgr.shape[1]
await _append_pipeline_log(session_id, "columns", {
"total_columns": len(columns),
"column_widths_pct": [round(c["width"] / img_w * 100, 1) for c in columns],
"column_types": [c["type"] for c in columns],
}, duration_ms=int(duration * 1000))
return { return {
"session_id": session_id, "session_id": session_id,
**column_result, **column_result,
@@ -1112,6 +1235,15 @@ async def detect_rows(session_id: str):
logger.info(f"OCR Pipeline: rows session {session_id}: " logger.info(f"OCR Pipeline: rows session {session_id}: "
f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}") f"{len(rows)} rows detected ({duration:.2f}s): {type_counts}")
content_rows = sum(1 for r in rows if r.row_type == "content")
avg_height = round(sum(r.height for r in rows) / len(rows)) if rows else 0
await _append_pipeline_log(session_id, "rows", {
"total_rows": len(rows),
"content_rows": content_rows,
"artifact_rows_removed": type_counts.get("header", 0) + type_counts.get("footer", 0),
"avg_row_height_px": avg_height,
}, duration_ms=int(duration * 1000))
return { return {
"session_id": session_id, "session_id": session_id,
**row_result, **row_result,
@@ -1369,6 +1501,15 @@ async def detect_words(
f"layout={word_result['layout']}, " f"layout={word_result['layout']}, "
f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}") f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}")
await _append_pipeline_log(session_id, "words", {
"total_cells": len(cells),
"non_empty_cells": word_result["summary"]["non_empty_cells"],
"low_confidence_count": word_result["summary"]["low_confidence"],
"ocr_engine": used_engine,
"layout": word_result["layout"],
"entry_count": word_result.get("entry_count", 0),
}, duration_ms=int(duration * 1000))
return { return {
"session_id": session_id, "session_id": session_id,
**word_result, **word_result,
@@ -1774,6 +1915,13 @@ async def run_llm_review(session_id: str, request: Request, stream: bool = False
logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, " logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, "
f"{result['duration_ms']}ms, model={result['model_used']}") f"{result['duration_ms']}ms, model={result['model_used']}")
await _append_pipeline_log(session_id, "correction", {
"engine": "llm",
"model": result["model_used"],
"total_entries": len(entries),
"corrections_proposed": len(result["changes"]),
}, duration_ms=result["duration_ms"])
return { return {
"session_id": session_id, "session_id": session_id,
"changes": result["changes"], "changes": result["changes"],

View File

@@ -66,7 +66,9 @@ async def init_ocr_pipeline_tables():
ADD COLUMN IF NOT EXISTS clean_png BYTEA, ADD COLUMN IF NOT EXISTS clean_png BYTEA,
ADD COLUMN IF NOT EXISTS handwriting_removal_meta JSONB, ADD COLUMN IF NOT EXISTS handwriting_removal_meta JSONB,
ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50), ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
ADD COLUMN IF NOT EXISTS doc_type_result JSONB ADD COLUMN IF NOT EXISTS doc_type_result JSONB,
ADD COLUMN IF NOT EXISTS document_category VARCHAR(50),
ADD COLUMN IF NOT EXISTS pipeline_log JSONB
""") """)
@@ -91,6 +93,7 @@ async def create_session_db(
deskew_result, dewarp_result, column_result, row_result, deskew_result, dewarp_result, column_result, row_result,
word_result, ground_truth, auto_shear_degrees, word_result, ground_truth, auto_shear_degrees,
doc_type, doc_type_result, doc_type, doc_type_result,
document_category, pipeline_log,
created_at, updated_at created_at, updated_at
""", uuid.UUID(session_id), name, filename, original_png) """, uuid.UUID(session_id), name, filename, original_png)
@@ -106,6 +109,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
deskew_result, dewarp_result, column_result, row_result, deskew_result, dewarp_result, column_result, row_result,
word_result, ground_truth, auto_shear_degrees, word_result, ground_truth, auto_shear_degrees,
doc_type, doc_type_result, doc_type, doc_type_result,
document_category, pipeline_log,
created_at, updated_at created_at, updated_at
FROM ocr_pipeline_sessions WHERE id = $1 FROM ocr_pipeline_sessions WHERE id = $1
""", uuid.UUID(session_id)) """, uuid.UUID(session_id))
@@ -151,9 +155,10 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result',
'word_result', 'ground_truth', 'auto_shear_degrees', 'word_result', 'ground_truth', 'auto_shear_degrees',
'doc_type', 'doc_type_result', 'doc_type', 'doc_type_result',
'document_category', 'pipeline_log',
} }
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result'} jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in allowed_fields: if key in allowed_fields:
@@ -180,6 +185,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
deskew_result, dewarp_result, column_result, row_result, deskew_result, dewarp_result, column_result, row_result,
word_result, ground_truth, auto_shear_degrees, word_result, ground_truth, auto_shear_degrees,
doc_type, doc_type_result, doc_type, doc_type_result,
document_category, pipeline_log,
created_at, updated_at created_at, updated_at
""", *values) """, *values)
@@ -194,6 +200,7 @@ async def list_sessions_db(limit: int = 50) -> List[Dict[str, Any]]:
async with pool.acquire() as conn: async with pool.acquire() as conn:
rows = await conn.fetch(""" rows = await conn.fetch("""
SELECT id, name, filename, status, current_step, SELECT id, name, filename, status, current_step,
document_category, doc_type,
created_at, updated_at created_at, updated_at
FROM ocr_pipeline_sessions FROM ocr_pipeline_sessions
ORDER BY created_at DESC ORDER BY created_at DESC
@@ -213,6 +220,18 @@ async def delete_session_db(session_id: str) -> bool:
return result == "DELETE 1" return result == "DELETE 1"
async def delete_all_sessions_db() -> int:
"""Delete all sessions. Returns number of deleted rows."""
pool = await get_pool()
async with pool.acquire() as conn:
result = await conn.execute("DELETE FROM ocr_pipeline_sessions")
# result is e.g. "DELETE 5"
try:
return int(result.split()[-1])
except (ValueError, IndexError):
return 0
# ============================================================================= # =============================================================================
# HELPER # HELPER
# ============================================================================= # =============================================================================
@@ -235,7 +254,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
result[key] = result[key].isoformat() result[key] = result[key].isoformat()
# JSONB → parsed (asyncpg returns str for JSONB) # JSONB → parsed (asyncpg returns str for JSONB)
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result']: for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
if key in result and result[key] is not None: if key in result and result[key] is not None:
if isinstance(result[key], str): if isinstance(result[key], str):
result[key] = json.loads(result[key]) result[key] = json.loads(result[key])