Add OCR Kombi Pipeline: modular 11-step architecture with multi-page support
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 20s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 20s
Phase 1 of the clean architecture refactor: Replaces the 751-line ocr-overlay monolith with a modular pipeline. Each step gets its own component file. Frontend: /ai/ocr-kombi route with 11 steps (Upload, Orientation, PageSplit, Deskew, Dewarp, ContentCrop, OCR, Structure, GridBuild, GridReview, GroundTruth). Session list supports document grouping for multi-page uploads. Backend: New ocr_kombi/ module with multi-page PDF upload (splits PDF into N sessions with shared document_group_id). DB migration adds document_group_id and page_number columns. Old /ai/ocr-overlay remains fully functional for A/B testing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
174
admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx
Normal file
174
admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { Suspense } from 'react'
|
||||||
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||||
|
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
|
||||||
|
import { KombiStepper } from '@/components/ocr-kombi/KombiStepper'
|
||||||
|
import { SessionList } from '@/components/ocr-kombi/SessionList'
|
||||||
|
import { SessionHeader } from '@/components/ocr-kombi/SessionHeader'
|
||||||
|
import { StepUpload } from '@/components/ocr-kombi/StepUpload'
|
||||||
|
import { StepOrientation } from '@/components/ocr-kombi/StepOrientation'
|
||||||
|
import { StepPageSplit } from '@/components/ocr-kombi/StepPageSplit'
|
||||||
|
import { StepDeskew } from '@/components/ocr-kombi/StepDeskew'
|
||||||
|
import { StepDewarp } from '@/components/ocr-kombi/StepDewarp'
|
||||||
|
import { StepContentCrop } from '@/components/ocr-kombi/StepContentCrop'
|
||||||
|
import { StepOcr } from '@/components/ocr-kombi/StepOcr'
|
||||||
|
import { StepStructure } from '@/components/ocr-kombi/StepStructure'
|
||||||
|
import { StepGridBuild } from '@/components/ocr-kombi/StepGridBuild'
|
||||||
|
import { StepGridReview } from '@/components/ocr-kombi/StepGridReview'
|
||||||
|
import { StepGroundTruth } from '@/components/ocr-kombi/StepGroundTruth'
|
||||||
|
import { useKombiPipeline } from './useKombiPipeline'
|
||||||
|
|
||||||
|
function OcrKombiContent() {
|
||||||
|
const {
|
||||||
|
currentStep,
|
||||||
|
sessionId,
|
||||||
|
sessionName,
|
||||||
|
loadingSessions,
|
||||||
|
activeCategory,
|
||||||
|
isGroundTruth,
|
||||||
|
subSessions,
|
||||||
|
parentSessionId,
|
||||||
|
steps,
|
||||||
|
gridSaveRef,
|
||||||
|
groupedSessions,
|
||||||
|
loadSessions,
|
||||||
|
openSession,
|
||||||
|
handleStepClick,
|
||||||
|
handleNext,
|
||||||
|
handleNewSession,
|
||||||
|
deleteSession,
|
||||||
|
renameSession,
|
||||||
|
updateCategory,
|
||||||
|
handleOrientationComplete,
|
||||||
|
handleSessionChange,
|
||||||
|
setSessionId,
|
||||||
|
setSubSessions,
|
||||||
|
setParentSessionId,
|
||||||
|
setIsGroundTruth,
|
||||||
|
} = useKombiPipeline()
|
||||||
|
|
||||||
|
const renderStep = () => {
|
||||||
|
switch (currentStep) {
|
||||||
|
case 0:
|
||||||
|
return (
|
||||||
|
<StepUpload
|
||||||
|
onUploaded={(sid) => {
|
||||||
|
setSessionId(sid)
|
||||||
|
loadSessions()
|
||||||
|
openSession(sid)
|
||||||
|
handleNext()
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
case 1:
|
||||||
|
return (
|
||||||
|
<StepOrientation
|
||||||
|
sessionId={sessionId}
|
||||||
|
onNext={handleOrientationComplete}
|
||||||
|
onSessionList={() => { loadSessions(); handleNewSession() }}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
case 2:
|
||||||
|
return (
|
||||||
|
<StepPageSplit
|
||||||
|
sessionId={sessionId}
|
||||||
|
onNext={handleNext}
|
||||||
|
onSubSessionsCreated={(subs) => {
|
||||||
|
setSubSessions(subs)
|
||||||
|
if (sessionId) setParentSessionId(sessionId)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
case 3:
|
||||||
|
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 4:
|
||||||
|
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 5:
|
||||||
|
return <StepContentCrop sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 6:
|
||||||
|
return <StepOcr sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 7:
|
||||||
|
return <StepStructure sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 8:
|
||||||
|
return <StepGridBuild sessionId={sessionId} onNext={handleNext} />
|
||||||
|
case 9:
|
||||||
|
return <StepGridReview sessionId={sessionId} onNext={handleNext} saveRef={gridSaveRef} />
|
||||||
|
case 10:
|
||||||
|
return (
|
||||||
|
<StepGroundTruth
|
||||||
|
sessionId={sessionId}
|
||||||
|
isGroundTruth={isGroundTruth}
|
||||||
|
onMarked={() => setIsGroundTruth(true)}
|
||||||
|
gridSaveRef={gridSaveRef}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
default:
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
<PagePurpose
|
||||||
|
title="OCR Kombi Pipeline"
|
||||||
|
purpose="Modulare 11-Schritt-Pipeline: Upload, Vorverarbeitung, Dual-Engine-OCR (PP-OCRv5 + Tesseract), Strukturerkennung, Grid-Aufbau und Review. Multi-Page-Dokument-Unterstuetzung."
|
||||||
|
audience={['Entwickler']}
|
||||||
|
architecture={{
|
||||||
|
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract', 'PaddleOCR'],
|
||||||
|
databases: ['PostgreSQL Sessions'],
|
||||||
|
}}
|
||||||
|
relatedPages={[
|
||||||
|
{ name: 'OCR Overlay (Legacy)', href: '/ai/ocr-overlay', description: 'Alter 3-Modi-Monolith' },
|
||||||
|
{ name: 'OCR Regression', href: '/ai/ocr-regression', description: 'Regressionstests' },
|
||||||
|
]}
|
||||||
|
defaultCollapsed
|
||||||
|
/>
|
||||||
|
|
||||||
|
<SessionList
|
||||||
|
items={groupedSessions()}
|
||||||
|
loading={loadingSessions}
|
||||||
|
activeSessionId={sessionId}
|
||||||
|
onOpenSession={(sid) => openSession(sid)}
|
||||||
|
onNewSession={handleNewSession}
|
||||||
|
onDeleteSession={deleteSession}
|
||||||
|
onRenameSession={renameSession}
|
||||||
|
onUpdateCategory={updateCategory}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{sessionId && sessionName && (
|
||||||
|
<SessionHeader
|
||||||
|
sessionName={sessionName}
|
||||||
|
activeCategory={activeCategory}
|
||||||
|
isGroundTruth={isGroundTruth}
|
||||||
|
onUpdateCategory={(cat) => updateCategory(sessionId, cat)}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<KombiStepper
|
||||||
|
steps={steps}
|
||||||
|
currentStep={currentStep}
|
||||||
|
onStepClick={handleStepClick}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{subSessions.length > 0 && parentSessionId && sessionId && (
|
||||||
|
<BoxSessionTabs
|
||||||
|
parentSessionId={parentSessionId}
|
||||||
|
subSessions={subSessions}
|
||||||
|
activeSessionId={sessionId}
|
||||||
|
onSessionChange={handleSessionChange}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className="min-h-[400px]">{renderStep()}</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function OcrKombiPage() {
|
||||||
|
return (
|
||||||
|
<Suspense fallback={<div className="p-4 text-sm text-gray-400">Lade...</div>}>
|
||||||
|
<OcrKombiContent />
|
||||||
|
</Suspense>
|
||||||
|
)
|
||||||
|
}
|
||||||
118
admin-lehrer/app/(admin)/ai/ocr-kombi/types.ts
Normal file
118
admin-lehrer/app/(admin)/ai/ocr-kombi/types.ts
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
import type { PipelineStep, PipelineStepStatus, DocumentCategory } from '../ocr-pipeline/types'
|
||||||
|
|
||||||
|
// Re-export shared types
|
||||||
|
export type { PipelineStep, PipelineStepStatus, DocumentCategory }
|
||||||
|
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
|
||||||
|
|
||||||
|
// Re-export grid/structure types used by later steps
|
||||||
|
export type {
|
||||||
|
SessionListItem,
|
||||||
|
SessionInfo,
|
||||||
|
SubSession,
|
||||||
|
OrientationResult,
|
||||||
|
CropResult,
|
||||||
|
DeskewResult,
|
||||||
|
DewarpResult,
|
||||||
|
GridResult,
|
||||||
|
GridCell,
|
||||||
|
OcrWordBox,
|
||||||
|
WordBbox,
|
||||||
|
ColumnMeta,
|
||||||
|
StructureResult,
|
||||||
|
StructureBox,
|
||||||
|
StructureZone,
|
||||||
|
StructureGraphic,
|
||||||
|
ExcludeRegion,
|
||||||
|
} from '../ocr-pipeline/types'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 11-step Kombi V2 pipeline.
|
||||||
|
* Each step has its own component file in components/ocr-kombi/.
|
||||||
|
*/
|
||||||
|
export const KOMBI_V2_STEPS: PipelineStep[] = [
|
||||||
|
{ id: 'upload', name: 'Upload', icon: '📤', status: 'pending' },
|
||||||
|
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||||
|
{ id: 'page-split', name: 'Seitentrennung', icon: '📖', status: 'pending' },
|
||||||
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
|
{ id: 'content-crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||||
|
{ id: 'ocr', name: 'OCR', icon: '🔀', status: 'pending' },
|
||||||
|
{ id: 'structure', name: 'Strukturerkennung', icon: '🔍', status: 'pending' },
|
||||||
|
{ id: 'grid-build', name: 'Grid-Aufbau', icon: '🧱', status: 'pending' },
|
||||||
|
{ id: 'grid-review', name: 'Grid-Review', icon: '📊', status: 'pending' },
|
||||||
|
{ id: 'ground-truth', name: 'Ground Truth', icon: '✅', status: 'pending' },
|
||||||
|
]
|
||||||
|
|
||||||
|
/** Map from Kombi V2 UI step index to DB step number */
|
||||||
|
export const KOMBI_V2_UI_TO_DB: Record<number, number> = {
|
||||||
|
0: 1, // upload
|
||||||
|
1: 2, // orientation
|
||||||
|
2: 2, // page-split (same DB step as orientation)
|
||||||
|
3: 3, // deskew
|
||||||
|
4: 4, // dewarp
|
||||||
|
5: 5, // content-crop
|
||||||
|
6: 8, // ocr (word_result)
|
||||||
|
7: 9, // structure
|
||||||
|
8: 10, // grid-build
|
||||||
|
9: 11, // grid-review
|
||||||
|
10: 12, // ground-truth
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Map from DB step to Kombi V2 UI step index */
|
||||||
|
export function dbStepToKombiV2Ui(dbStep: number): number {
|
||||||
|
if (dbStep <= 1) return 0 // upload
|
||||||
|
if (dbStep === 2) return 1 // orientation
|
||||||
|
if (dbStep === 3) return 3 // deskew
|
||||||
|
if (dbStep === 4) return 4 // dewarp
|
||||||
|
if (dbStep === 5) return 5 // content-crop
|
||||||
|
if (dbStep <= 8) return 6 // ocr
|
||||||
|
if (dbStep === 9) return 7 // structure
|
||||||
|
if (dbStep === 10) return 8 // grid-build
|
||||||
|
if (dbStep === 11) return 9 // grid-review
|
||||||
|
return 10 // ground-truth
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Document group: groups multiple sessions from a multi-page upload */
|
||||||
|
export interface DocumentGroup {
|
||||||
|
group_id: string
|
||||||
|
title: string
|
||||||
|
page_count: number
|
||||||
|
sessions: DocumentGroupSession[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DocumentGroupSession {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
page_number: number
|
||||||
|
current_step: number
|
||||||
|
status: string
|
||||||
|
document_category?: DocumentCategory
|
||||||
|
created_at: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Engine source for OCR transparency */
|
||||||
|
export type OcrEngineSource = 'both' | 'paddle_only' | 'tesseract_only' | 'conflict_paddle' | 'conflict_tesseract'
|
||||||
|
|
||||||
|
export interface OcrTransparentWord {
|
||||||
|
text: string
|
||||||
|
left: number
|
||||||
|
top: number
|
||||||
|
width: number
|
||||||
|
height: number
|
||||||
|
conf: number
|
||||||
|
engine_source: OcrEngineSource
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface OcrTransparentResult {
|
||||||
|
raw_tesseract: { words: OcrTransparentWord[] }
|
||||||
|
raw_paddle: { words: OcrTransparentWord[] }
|
||||||
|
merged: { words: OcrTransparentWord[] }
|
||||||
|
stats: {
|
||||||
|
total_words: number
|
||||||
|
both_agree: number
|
||||||
|
paddle_only: number
|
||||||
|
tesseract_only: number
|
||||||
|
conflict_paddle_wins: number
|
||||||
|
conflict_tesseract_wins: number
|
||||||
|
}
|
||||||
|
}
|
||||||
361
admin-lehrer/app/(admin)/ai/ocr-kombi/useKombiPipeline.ts
Normal file
361
admin-lehrer/app/(admin)/ai/ocr-kombi/useKombiPipeline.ts
Normal file
@@ -0,0 +1,361 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState, useRef } from 'react'
|
||||||
|
import { useSearchParams } from 'next/navigation'
|
||||||
|
import type { PipelineStep, DocumentCategory } from './types'
|
||||||
|
import { KOMBI_V2_STEPS, dbStepToKombiV2Ui } from './types'
|
||||||
|
import type { SubSession, SessionListItem } from '../ocr-pipeline/types'
|
||||||
|
|
||||||
|
export type { SessionListItem }
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
/** Groups sessions by document_group_id for the session list */
|
||||||
|
export interface DocumentGroupView {
|
||||||
|
group_id: string
|
||||||
|
title: string
|
||||||
|
sessions: SessionListItem[]
|
||||||
|
page_count: number
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSteps(): PipelineStep[] {
|
||||||
|
return KOMBI_V2_STEPS.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: i === 0 ? 'active' : 'pending',
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useKombiPipeline() {
|
||||||
|
const [currentStep, setCurrentStep] = useState(0)
|
||||||
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||||
|
const [sessionName, setSessionName] = useState('')
|
||||||
|
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||||
|
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||||
|
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
||||||
|
const [isGroundTruth, setIsGroundTruth] = useState(false)
|
||||||
|
const [subSessions, setSubSessions] = useState<SubSession[]>([])
|
||||||
|
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
|
||||||
|
const [steps, setSteps] = useState<PipelineStep[]>(initSteps())
|
||||||
|
|
||||||
|
const searchParams = useSearchParams()
|
||||||
|
const deepLinkHandled = useRef(false)
|
||||||
|
const gridSaveRef = useRef<(() => Promise<void>) | null>(null)
|
||||||
|
|
||||||
|
// ---- Session loading ----
|
||||||
|
|
||||||
|
const loadSessions = useCallback(async () => {
|
||||||
|
setLoadingSessions(true)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id))
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load sessions:', e)
|
||||||
|
} finally {
|
||||||
|
setLoadingSessions(false)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => { loadSessions() }, [loadSessions])
|
||||||
|
|
||||||
|
// ---- Group sessions by document_group_id ----
|
||||||
|
|
||||||
|
const groupedSessions = useCallback((): (SessionListItem | DocumentGroupView)[] => {
|
||||||
|
const groups = new Map<string, SessionListItem[]>()
|
||||||
|
const ungrouped: SessionListItem[] = []
|
||||||
|
|
||||||
|
for (const s of sessions) {
|
||||||
|
if (s.document_group_id) {
|
||||||
|
const existing = groups.get(s.document_group_id) || []
|
||||||
|
existing.push(s)
|
||||||
|
groups.set(s.document_group_id, existing)
|
||||||
|
} else {
|
||||||
|
ungrouped.push(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result: (SessionListItem | DocumentGroupView)[] = []
|
||||||
|
|
||||||
|
// Sort groups by earliest created_at
|
||||||
|
const sortedGroups = Array.from(groups.entries()).sort((a, b) => {
|
||||||
|
const aTime = Math.min(...a[1].map(s => new Date(s.created_at).getTime()))
|
||||||
|
const bTime = Math.min(...b[1].map(s => new Date(s.created_at).getTime()))
|
||||||
|
return bTime - aTime
|
||||||
|
})
|
||||||
|
|
||||||
|
for (const [groupId, groupSessions] of sortedGroups) {
|
||||||
|
groupSessions.sort((a, b) => (a.page_number || 0) - (b.page_number || 0))
|
||||||
|
// Extract base title (remove " — S. X" suffix)
|
||||||
|
const baseName = groupSessions[0]?.name?.replace(/ — S\. \d+$/, '') || 'Dokument'
|
||||||
|
result.push({
|
||||||
|
group_id: groupId,
|
||||||
|
title: baseName,
|
||||||
|
sessions: groupSessions,
|
||||||
|
page_count: groupSessions.length,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const s of ungrouped) {
|
||||||
|
result.push(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by creation time (most recent first)
|
||||||
|
const getTime = (item: SessionListItem | DocumentGroupView): number => {
|
||||||
|
if ('group_id' in item) {
|
||||||
|
return Math.min(...item.sessions.map((s: SessionListItem) => new Date(s.created_at).getTime()))
|
||||||
|
}
|
||||||
|
return new Date(item.created_at).getTime()
|
||||||
|
}
|
||||||
|
result.sort((a, b) => getTime(b) - getTime(a))
|
||||||
|
|
||||||
|
return result
|
||||||
|
}, [sessions])
|
||||||
|
|
||||||
|
// ---- Open session ----
|
||||||
|
|
||||||
|
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||||
|
if (!res.ok) return
|
||||||
|
const data = await res.json()
|
||||||
|
|
||||||
|
setSessionId(sid)
|
||||||
|
setSessionName(data.name || data.filename || '')
|
||||||
|
setActiveCategory(data.document_category || undefined)
|
||||||
|
setIsGroundTruth(!!data.ground_truth?.build_grid_reference)
|
||||||
|
|
||||||
|
// Sub-session handling
|
||||||
|
if (data.sub_sessions?.length > 0) {
|
||||||
|
setSubSessions(data.sub_sessions)
|
||||||
|
setParentSessionId(sid)
|
||||||
|
} else if (data.parent_session_id) {
|
||||||
|
setParentSessionId(data.parent_session_id)
|
||||||
|
} else if (!keepSubSessions) {
|
||||||
|
setSubSessions([])
|
||||||
|
setParentSessionId(null)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine UI step from DB state
|
||||||
|
const dbStep = data.current_step || 1
|
||||||
|
const hasGrid = !!data.grid_editor_result
|
||||||
|
const hasStructure = !!data.structure_result
|
||||||
|
const hasWords = !!data.word_result
|
||||||
|
|
||||||
|
let uiStep: number
|
||||||
|
if (hasGrid) {
|
||||||
|
uiStep = 9 // grid-review
|
||||||
|
} else if (hasStructure) {
|
||||||
|
uiStep = 8 // grid-build
|
||||||
|
} else if (hasWords) {
|
||||||
|
uiStep = 7 // structure
|
||||||
|
} else {
|
||||||
|
uiStep = dbStepToKombiV2Ui(dbStep)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For sessions that already have an upload, skip the upload step
|
||||||
|
if (uiStep === 0 && dbStep >= 2) {
|
||||||
|
uiStep = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
const skipIds: string[] = []
|
||||||
|
const isSubSession = !!data.parent_session_id
|
||||||
|
if (isSubSession && dbStep >= 5) {
|
||||||
|
skipIds.push('upload', 'orientation', 'page-split', 'deskew', 'dewarp', 'content-crop')
|
||||||
|
if (uiStep < 6) uiStep = 6
|
||||||
|
} else if (isSubSession && dbStep >= 2) {
|
||||||
|
skipIds.push('upload', 'orientation')
|
||||||
|
if (uiStep < 2) uiStep = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
setSteps(
|
||||||
|
KOMBI_V2_STEPS.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: skipIds.includes(s.id)
|
||||||
|
? 'skipped'
|
||||||
|
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
setCurrentStep(uiStep)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to open session:', e)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// ---- Deep link handling ----
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (deepLinkHandled.current) return
|
||||||
|
const urlSession = searchParams.get('session')
|
||||||
|
const urlStep = searchParams.get('step')
|
||||||
|
if (urlSession) {
|
||||||
|
deepLinkHandled.current = true
|
||||||
|
openSession(urlSession).then(() => {
|
||||||
|
if (urlStep) {
|
||||||
|
const stepIdx = parseInt(urlStep, 10)
|
||||||
|
if (!isNaN(stepIdx) && stepIdx >= 0 && stepIdx < KOMBI_V2_STEPS.length) {
|
||||||
|
setCurrentStep(stepIdx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}, [searchParams, openSession])
|
||||||
|
|
||||||
|
// ---- Step navigation ----
|
||||||
|
|
||||||
|
const goToStep = useCallback((step: number) => {
|
||||||
|
setCurrentStep(step)
|
||||||
|
setSteps(prev =>
|
||||||
|
prev.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const handleStepClick = useCallback((index: number) => {
|
||||||
|
if (index <= currentStep || steps[index].status === 'completed') {
|
||||||
|
setCurrentStep(index)
|
||||||
|
}
|
||||||
|
}, [currentStep, steps])
|
||||||
|
|
||||||
|
const handleNext = useCallback(() => {
|
||||||
|
if (currentStep >= steps.length - 1) {
|
||||||
|
// Last step → return to session list
|
||||||
|
setSteps(initSteps())
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSessionId(null)
|
||||||
|
setSubSessions([])
|
||||||
|
setParentSessionId(null)
|
||||||
|
loadSessions()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const nextStep = currentStep + 1
|
||||||
|
setSteps(prev =>
|
||||||
|
prev.map((s, i) => {
|
||||||
|
if (i === currentStep) return { ...s, status: 'completed' }
|
||||||
|
if (i === nextStep) return { ...s, status: 'active' }
|
||||||
|
return s
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
setCurrentStep(nextStep)
|
||||||
|
}, [currentStep, steps, loadSessions])
|
||||||
|
|
||||||
|
// ---- Session CRUD ----
|
||||||
|
|
||||||
|
const handleNewSession = useCallback(() => {
|
||||||
|
setSessionId(null)
|
||||||
|
setSessionName('')
|
||||||
|
setCurrentStep(0)
|
||||||
|
setSubSessions([])
|
||||||
|
setParentSessionId(null)
|
||||||
|
setSteps(initSteps())
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const deleteSession = useCallback(async (sid: string) => {
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||||
|
setSessions(prev => prev.filter(s => s.id !== sid))
|
||||||
|
if (sessionId === sid) handleNewSession()
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to delete session:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId, handleNewSession])
|
||||||
|
|
||||||
|
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ name: newName }),
|
||||||
|
})
|
||||||
|
setSessions(prev => prev.map(s => s.id === sid ? { ...s, name: newName } : s))
|
||||||
|
if (sessionId === sid) setSessionName(newName)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to rename session:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ document_category: category }),
|
||||||
|
})
|
||||||
|
setSessions(prev => prev.map(s => s.id === sid ? { ...s, document_category: category } : s))
|
||||||
|
if (sessionId === sid) setActiveCategory(category)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to update category:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
// ---- Orientation completion (checks for page-split sub-sessions) ----
|
||||||
|
|
||||||
|
const handleOrientationComplete = useCallback(async (sid: string) => {
|
||||||
|
setSessionId(sid)
|
||||||
|
loadSessions()
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
if (data.sub_sessions?.length > 0) {
|
||||||
|
const subs: SubSession[] = data.sub_sessions.map((s: SubSession) => ({
|
||||||
|
id: s.id,
|
||||||
|
name: s.name,
|
||||||
|
box_index: s.box_index,
|
||||||
|
current_step: s.current_step,
|
||||||
|
}))
|
||||||
|
setSubSessions(subs)
|
||||||
|
setParentSessionId(sid)
|
||||||
|
openSession(subs[0].id, true)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to check for sub-sessions:', e)
|
||||||
|
}
|
||||||
|
|
||||||
|
handleNext()
|
||||||
|
}, [loadSessions, openSession, handleNext])
|
||||||
|
|
||||||
|
const handleSessionChange = useCallback((newSessionId: string) => {
|
||||||
|
openSession(newSessionId, true)
|
||||||
|
}, [openSession])
|
||||||
|
|
||||||
|
return {
|
||||||
|
// State
|
||||||
|
currentStep,
|
||||||
|
sessionId,
|
||||||
|
sessionName,
|
||||||
|
sessions,
|
||||||
|
loadingSessions,
|
||||||
|
activeCategory,
|
||||||
|
isGroundTruth,
|
||||||
|
subSessions,
|
||||||
|
parentSessionId,
|
||||||
|
steps,
|
||||||
|
gridSaveRef,
|
||||||
|
// Computed
|
||||||
|
groupedSessions,
|
||||||
|
// Actions
|
||||||
|
loadSessions,
|
||||||
|
openSession,
|
||||||
|
goToStep,
|
||||||
|
handleStepClick,
|
||||||
|
handleNext,
|
||||||
|
handleNewSession,
|
||||||
|
deleteSession,
|
||||||
|
renameSession,
|
||||||
|
updateCategory,
|
||||||
|
handleOrientationComplete,
|
||||||
|
handleSessionChange,
|
||||||
|
setSessionId,
|
||||||
|
setSubSessions,
|
||||||
|
setParentSessionId,
|
||||||
|
setIsGroundTruth,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,6 +33,9 @@ export interface SessionListItem {
|
|||||||
current_step: number
|
current_step: number
|
||||||
document_category?: DocumentCategory
|
document_category?: DocumentCategory
|
||||||
doc_type?: string
|
doc_type?: string
|
||||||
|
parent_session_id?: string
|
||||||
|
document_group_id?: string
|
||||||
|
page_number?: number
|
||||||
created_at: string
|
created_at: string
|
||||||
updated_at?: string
|
updated_at?: string
|
||||||
}
|
}
|
||||||
@@ -108,6 +111,8 @@ export interface SessionInfo {
|
|||||||
sub_sessions?: SubSession[]
|
sub_sessions?: SubSession[]
|
||||||
parent_session_id?: string
|
parent_session_id?: string
|
||||||
box_index?: number
|
box_index?: number
|
||||||
|
document_group_id?: string
|
||||||
|
page_number?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DeskewResult {
|
export interface DeskewResult {
|
||||||
|
|||||||
59
admin-lehrer/components/ocr-kombi/KombiStepper.tsx
Normal file
59
admin-lehrer/components/ocr-kombi/KombiStepper.tsx
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import type { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface KombiStepperProps {
|
||||||
|
steps: PipelineStep[]
|
||||||
|
currentStep: number
|
||||||
|
onStepClick: (index: number) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function KombiStepper({ steps, currentStep, onStepClick }: KombiStepperProps) {
|
||||||
|
return (
|
||||||
|
<div className="flex items-center gap-0.5 px-3 py-2.5 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-x-auto">
|
||||||
|
{steps.map((step, index) => {
|
||||||
|
const isActive = index === currentStep
|
||||||
|
const isCompleted = step.status === 'completed'
|
||||||
|
const isFailed = step.status === 'failed'
|
||||||
|
const isSkipped = step.status === 'skipped'
|
||||||
|
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={step.id} className="flex items-center flex-shrink-0">
|
||||||
|
{index > 0 && (
|
||||||
|
<div
|
||||||
|
className={`h-0.5 w-4 mx-0.5 ${
|
||||||
|
isSkipped
|
||||||
|
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
||||||
|
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||||
|
}`}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={() => isClickable && onStepClick(index)}
|
||||||
|
disabled={!isClickable}
|
||||||
|
className={`flex items-center gap-1 px-2 py-1 rounded-full text-xs font-medium transition-all whitespace-nowrap ${
|
||||||
|
isSkipped
|
||||||
|
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
||||||
|
: isActive
|
||||||
|
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||||
|
: isCompleted
|
||||||
|
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||||
|
: isFailed
|
||||||
|
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||||
|
: 'text-gray-400 dark:text-gray-500'
|
||||||
|
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||||
|
title={step.name}
|
||||||
|
>
|
||||||
|
<span className="text-sm">
|
||||||
|
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||||
|
</span>
|
||||||
|
<span className="hidden lg:inline">{step.name}</span>
|
||||||
|
<span className="lg:hidden">{index + 1}</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
66
admin-lehrer/components/ocr-kombi/SessionHeader.tsx
Normal file
66
admin-lehrer/components/ocr-kombi/SessionHeader.tsx
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface SessionHeaderProps {
|
||||||
|
sessionName: string
|
||||||
|
activeCategory?: DocumentCategory
|
||||||
|
isGroundTruth: boolean
|
||||||
|
onUpdateCategory: (category: DocumentCategory) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function SessionHeader({
|
||||||
|
sessionName,
|
||||||
|
activeCategory,
|
||||||
|
isGroundTruth,
|
||||||
|
onUpdateCategory,
|
||||||
|
}: SessionHeaderProps) {
|
||||||
|
const [showCategoryPicker, setShowCategoryPicker] = useState(false)
|
||||||
|
|
||||||
|
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="relative flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
||||||
|
<span>
|
||||||
|
Aktive Session:{' '}
|
||||||
|
<span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span>
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={() => setShowCategoryPicker(!showCategoryPicker)}
|
||||||
|
className={`text-xs px-2.5 py-1 rounded-full border transition-colors ${
|
||||||
|
activeCategory
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300 hover:bg-teal-100'
|
||||||
|
: 'bg-amber-50 dark:bg-amber-900/20 border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300 hover:bg-amber-100 animate-pulse'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{catInfo ? `${catInfo.icon} ${catInfo.label}` : 'Kategorie setzen'}
|
||||||
|
</button>
|
||||||
|
{isGroundTruth && (
|
||||||
|
<span className="text-xs px-2 py-0.5 rounded-full bg-amber-50 dark:bg-amber-900/20 border border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300">
|
||||||
|
GT
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{showCategoryPicker && (
|
||||||
|
<div className="absolute left-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64">
|
||||||
|
{DOCUMENT_CATEGORIES.map(cat => (
|
||||||
|
<button
|
||||||
|
key={cat.value}
|
||||||
|
onClick={() => {
|
||||||
|
onUpdateCategory(cat.value)
|
||||||
|
setShowCategoryPicker(false)
|
||||||
|
}}
|
||||||
|
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||||
|
activeCategory === cat.value
|
||||||
|
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||||
|
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{cat.icon} {cat.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
361
admin-lehrer/components/ocr-kombi/SessionList.tsx
Normal file
361
admin-lehrer/components/ocr-kombi/SessionList.tsx
Normal file
@@ -0,0 +1,361 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import type { SessionListItem, DocumentGroupView } from '@/app/(admin)/ai/ocr-kombi/useKombiPipeline'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface SessionListProps {
|
||||||
|
items: (SessionListItem | DocumentGroupView)[]
|
||||||
|
loading: boolean
|
||||||
|
activeSessionId: string | null
|
||||||
|
onOpenSession: (sid: string) => void
|
||||||
|
onNewSession: () => void
|
||||||
|
onDeleteSession: (sid: string) => void
|
||||||
|
onRenameSession: (sid: string, newName: string) => void
|
||||||
|
onUpdateCategory: (sid: string, category: DocumentCategory) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
function isGroup(item: SessionListItem | DocumentGroupView): item is DocumentGroupView {
|
||||||
|
return 'group_id' in item
|
||||||
|
}
|
||||||
|
|
||||||
|
export function SessionList({
|
||||||
|
items,
|
||||||
|
loading,
|
||||||
|
activeSessionId,
|
||||||
|
onOpenSession,
|
||||||
|
onNewSession,
|
||||||
|
onDeleteSession,
|
||||||
|
onRenameSession,
|
||||||
|
onUpdateCategory,
|
||||||
|
}: SessionListProps) {
|
||||||
|
const [editingName, setEditingName] = useState<string | null>(null)
|
||||||
|
const [editNameValue, setEditNameValue] = useState('')
|
||||||
|
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
||||||
|
const [expandedGroups, setExpandedGroups] = useState<Set<string>>(new Set())
|
||||||
|
|
||||||
|
const toggleGroup = (groupId: string) => {
|
||||||
|
setExpandedGroups(prev => {
|
||||||
|
const next = new Set(prev)
|
||||||
|
if (next.has(groupId)) next.delete(groupId)
|
||||||
|
else next.add(groupId)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="flex items-center justify-between mb-3">
|
||||||
|
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
Sessions ({items.length})
|
||||||
|
</h3>
|
||||||
|
<button
|
||||||
|
onClick={onNewSession}
|
||||||
|
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||||
|
>
|
||||||
|
+ Neue Session
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{loading ? (
|
||||||
|
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||||
|
) : items.length === 0 ? (
|
||||||
|
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
||||||
|
{items.map(item =>
|
||||||
|
isGroup(item) ? (
|
||||||
|
<GroupRow
|
||||||
|
key={item.group_id}
|
||||||
|
group={item}
|
||||||
|
expanded={expandedGroups.has(item.group_id)}
|
||||||
|
activeSessionId={activeSessionId}
|
||||||
|
onToggle={() => toggleGroup(item.group_id)}
|
||||||
|
onOpenSession={onOpenSession}
|
||||||
|
onDeleteSession={onDeleteSession}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<SessionRow
|
||||||
|
key={item.id}
|
||||||
|
session={item}
|
||||||
|
isActive={activeSessionId === item.id}
|
||||||
|
editingName={editingName}
|
||||||
|
editNameValue={editNameValue}
|
||||||
|
editingCategory={editingCategory}
|
||||||
|
onOpenSession={() => onOpenSession(item.id)}
|
||||||
|
onStartRename={() => {
|
||||||
|
setEditNameValue(item.name || item.filename)
|
||||||
|
setEditingName(item.id)
|
||||||
|
}}
|
||||||
|
onFinishRename={(newName) => {
|
||||||
|
onRenameSession(item.id, newName)
|
||||||
|
setEditingName(null)
|
||||||
|
}}
|
||||||
|
onCancelRename={() => setEditingName(null)}
|
||||||
|
onEditNameChange={setEditNameValue}
|
||||||
|
onToggleCategory={() => setEditingCategory(editingCategory === item.id ? null : item.id)}
|
||||||
|
onUpdateCategory={(cat) => {
|
||||||
|
onUpdateCategory(item.id, cat)
|
||||||
|
setEditingCategory(null)
|
||||||
|
}}
|
||||||
|
onDelete={() => {
|
||||||
|
if (confirm('Session loeschen?')) onDeleteSession(item.id)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Group row (multi-page document) ----
|
||||||
|
|
||||||
|
function GroupRow({
|
||||||
|
group,
|
||||||
|
expanded,
|
||||||
|
activeSessionId,
|
||||||
|
onToggle,
|
||||||
|
onOpenSession,
|
||||||
|
onDeleteSession,
|
||||||
|
}: {
|
||||||
|
group: DocumentGroupView
|
||||||
|
expanded: boolean
|
||||||
|
activeSessionId: string | null
|
||||||
|
onToggle: () => void
|
||||||
|
onOpenSession: (sid: string) => void
|
||||||
|
onDeleteSession: (sid: string) => void
|
||||||
|
}) {
|
||||||
|
const isActive = group.sessions.some(s => s.id === activeSessionId)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
<div
|
||||||
|
onClick={onToggle}
|
||||||
|
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm cursor-pointer transition-colors ${
|
||||||
|
isActive
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<span className="text-base">{expanded ? '\u25BC' : '\u25B6'}</span>
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
{group.title}
|
||||||
|
</div>
|
||||||
|
<div className="text-xs text-gray-400">
|
||||||
|
{group.page_count} Seiten
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<span className="text-xs px-2 py-0.5 rounded-full bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 text-blue-600 dark:text-blue-400">
|
||||||
|
Dokument
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{expanded && (
|
||||||
|
<div className="ml-6 mt-1 space-y-1 border-l-2 border-gray-200 dark:border-gray-700 pl-3">
|
||||||
|
{group.sessions.map(s => (
|
||||||
|
<div
|
||||||
|
key={s.id}
|
||||||
|
className={`flex items-center gap-2 px-2 py-1.5 rounded text-xs cursor-pointer transition-colors ${
|
||||||
|
activeSessionId === s.id
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 text-teal-700 dark:text-teal-300'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50 text-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
onClick={() => onOpenSession(s.id)}
|
||||||
|
>
|
||||||
|
{/* Thumbnail */}
|
||||||
|
<div className="flex-shrink-0 w-8 h-8 rounded overflow-hidden bg-gray-100 dark:bg-gray-700">
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=64`}
|
||||||
|
alt=""
|
||||||
|
className="w-full h-full object-cover"
|
||||||
|
loading="lazy"
|
||||||
|
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<span className="truncate flex-1">S. {s.page_number || '?'}</span>
|
||||||
|
<span className="text-[10px] text-gray-400">Step {s.current_step}</span>
|
||||||
|
<button
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
if (confirm('Seite loeschen?')) onDeleteSession(s.id)
|
||||||
|
}}
|
||||||
|
className="p-0.5 text-gray-400 hover:text-red-500"
|
||||||
|
title="Loeschen"
|
||||||
|
>
|
||||||
|
<svg className="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Single session row ----
|
||||||
|
|
||||||
|
function SessionRow({
|
||||||
|
session,
|
||||||
|
isActive,
|
||||||
|
editingName,
|
||||||
|
editNameValue,
|
||||||
|
editingCategory,
|
||||||
|
onOpenSession,
|
||||||
|
onStartRename,
|
||||||
|
onFinishRename,
|
||||||
|
onCancelRename,
|
||||||
|
onEditNameChange,
|
||||||
|
onToggleCategory,
|
||||||
|
onUpdateCategory,
|
||||||
|
onDelete,
|
||||||
|
}: {
|
||||||
|
session: SessionListItem
|
||||||
|
isActive: boolean
|
||||||
|
editingName: string | null
|
||||||
|
editNameValue: string
|
||||||
|
editingCategory: string | null
|
||||||
|
onOpenSession: () => void
|
||||||
|
onStartRename: () => void
|
||||||
|
onFinishRename: (name: string) => void
|
||||||
|
onCancelRename: () => void
|
||||||
|
onEditNameChange: (val: string) => void
|
||||||
|
onToggleCategory: () => void
|
||||||
|
onUpdateCategory: (cat: DocumentCategory) => void
|
||||||
|
onDelete: () => void
|
||||||
|
}) {
|
||||||
|
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === session.document_category)
|
||||||
|
const isEditing = editingName === session.id
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||||
|
isActive
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{/* Thumbnail */}
|
||||||
|
<div
|
||||||
|
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
||||||
|
onClick={onOpenSession}
|
||||||
|
>
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.id}/thumbnail?size=96`}
|
||||||
|
alt=""
|
||||||
|
className="w-full h-full object-cover"
|
||||||
|
loading="lazy"
|
||||||
|
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Info */}
|
||||||
|
<div className="flex-1 min-w-0" onClick={onOpenSession}>
|
||||||
|
{isEditing ? (
|
||||||
|
<input
|
||||||
|
autoFocus
|
||||||
|
value={editNameValue}
|
||||||
|
onChange={(e) => onEditNameChange(e.target.value)}
|
||||||
|
onBlur={() => onFinishRename(editNameValue)}
|
||||||
|
onKeyDown={(e) => {
|
||||||
|
if (e.key === 'Enter') onFinishRename(editNameValue)
|
||||||
|
if (e.key === 'Escape') onCancelRename()
|
||||||
|
}}
|
||||||
|
onClick={(e) => e.stopPropagation()}
|
||||||
|
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
{session.name || session.filename}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
navigator.clipboard.writeText(session.id)
|
||||||
|
const btn = e.currentTarget
|
||||||
|
btn.textContent = 'Kopiert!'
|
||||||
|
setTimeout(() => { btn.textContent = `ID: ${session.id.slice(0, 8)}` }, 1500)
|
||||||
|
}}
|
||||||
|
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
||||||
|
title={`Volle ID: ${session.id} — Klick zum Kopieren`}
|
||||||
|
>
|
||||||
|
ID: {session.id.slice(0, 8)}
|
||||||
|
</button>
|
||||||
|
<div className="text-xs text-gray-400 mt-0.5">
|
||||||
|
{new Date(session.created_at).toLocaleDateString('de-DE', {
|
||||||
|
day: '2-digit', month: '2-digit', year: '2-digit',
|
||||||
|
hour: '2-digit', minute: '2-digit',
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Category badge */}
|
||||||
|
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
||||||
|
<button
|
||||||
|
onClick={onToggleCategory}
|
||||||
|
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
||||||
|
catInfo
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||||
|
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600'
|
||||||
|
}`}
|
||||||
|
title="Kategorie setzen"
|
||||||
|
>
|
||||||
|
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Actions */}
|
||||||
|
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
||||||
|
<button
|
||||||
|
onClick={(e) => { e.stopPropagation(); onStartRename() }}
|
||||||
|
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||||
|
title="Umbenennen"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={(e) => { e.stopPropagation(); onDelete() }}
|
||||||
|
className="p-1 text-gray-400 hover:text-red-500"
|
||||||
|
title="Loeschen"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Category dropdown */}
|
||||||
|
{editingCategory === session.id && (
|
||||||
|
<div
|
||||||
|
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
||||||
|
onClick={(e) => e.stopPropagation()}
|
||||||
|
>
|
||||||
|
{DOCUMENT_CATEGORIES.map(cat => (
|
||||||
|
<button
|
||||||
|
key={cat.value}
|
||||||
|
onClick={() => onUpdateCategory(cat.value)}
|
||||||
|
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||||
|
session.document_category === cat.value
|
||||||
|
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||||
|
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{cat.icon} {cat.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
13
admin-lehrer/components/ocr-kombi/StepContentCrop.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepContentCrop.tsx
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { StepCrop as BaseStepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||||
|
|
||||||
|
interface StepContentCropProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Thin wrapper around the shared StepCrop component */
|
||||||
|
export function StepContentCrop({ sessionId, onNext }: StepContentCropProps) {
|
||||||
|
return <BaseStepCrop key={sessionId} sessionId={sessionId} onNext={onNext} />
|
||||||
|
}
|
||||||
13
admin-lehrer/components/ocr-kombi/StepDeskew.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepDeskew.tsx
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { StepDeskew as BaseStepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||||
|
|
||||||
|
interface StepDeskewProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Thin wrapper around the shared StepDeskew component */
|
||||||
|
export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
|
||||||
|
return <BaseStepDeskew key={sessionId} sessionId={sessionId} onNext={onNext} />
|
||||||
|
}
|
||||||
13
admin-lehrer/components/ocr-kombi/StepDewarp.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepDewarp.tsx
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { StepDewarp as BaseStepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||||
|
|
||||||
|
interface StepDewarpProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Thin wrapper around the shared StepDewarp component */
|
||||||
|
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||||
|
return <BaseStepDewarp key={sessionId} sessionId={sessionId} onNext={onNext} />
|
||||||
|
}
|
||||||
109
admin-lehrer/components/ocr-kombi/StepGridBuild.tsx
Normal file
109
admin-lehrer/components/ocr-kombi/StepGridBuild.tsx
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepGridBuildProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 9: Grid Build.
|
||||||
|
* Triggers the build-grid endpoint and shows progress.
|
||||||
|
*/
|
||||||
|
export function StepGridBuild({ sessionId, onNext }: StepGridBuildProps) {
|
||||||
|
const [building, setBuilding] = useState(false)
|
||||||
|
const [result, setResult] = useState<{ rows: number; cols: number; cells: number } | null>(null)
|
||||||
|
const [error, setError] = useState('')
|
||||||
|
const [autoTriggered, setAutoTriggered] = useState(false)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId || autoTriggered) return
|
||||||
|
// Check if grid already exists
|
||||||
|
checkExistingGrid()
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const checkExistingGrid = async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
if (data.grid_shape) {
|
||||||
|
setResult({ rows: data.grid_shape.rows, cols: data.grid_shape.cols, cells: data.grid_shape.total_cells })
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch { /* no existing grid */ }
|
||||||
|
|
||||||
|
// Auto-trigger build
|
||||||
|
setAutoTriggered(true)
|
||||||
|
buildGrid()
|
||||||
|
}
|
||||||
|
|
||||||
|
const buildGrid = async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setBuilding(true)
|
||||||
|
setError('')
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}))
|
||||||
|
throw new Error(data.detail || `Grid-Build fehlgeschlagen (${res.status})`)
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
const shape = data.grid_shape || { rows: 0, cols: 0, total_cells: 0 }
|
||||||
|
setResult({ rows: shape.rows, cols: shape.cols, cells: shape.total_cells })
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setBuilding(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{building && (
|
||||||
|
<div className="flex items-center gap-3 p-6 bg-blue-50 dark:bg-blue-900/20 rounded-xl border border-blue-200 dark:border-blue-800">
|
||||||
|
<div className="animate-spin w-5 h-5 border-2 border-blue-400 border-t-transparent rounded-full" />
|
||||||
|
<span className="text-sm text-blue-600 dark:text-blue-400">Grid wird aufgebaut...</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{result && (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="p-4 bg-green-50 dark:bg-green-900/20 rounded-xl border border-green-200 dark:border-green-800">
|
||||||
|
<div className="text-sm font-medium text-green-700 dark:text-green-300">
|
||||||
|
Grid erstellt: {result.rows} Zeilen, {result.cols} Spalten, {result.cells} Zellen
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700"
|
||||||
|
>
|
||||||
|
Weiter zum Review
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 p-3 rounded-lg">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={buildGrid}
|
||||||
|
className="px-4 py-2 bg-orange-600 text-white text-sm rounded-lg hover:bg-orange-700"
|
||||||
|
>
|
||||||
|
Erneut versuchen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
15
admin-lehrer/components/ocr-kombi/StepGridReview.tsx
Normal file
15
admin-lehrer/components/ocr-kombi/StepGridReview.tsx
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { StepGridReview as BaseStepGridReview } from '@/components/ocr-pipeline/StepGridReview'
|
||||||
|
import type { MutableRefObject } from 'react'
|
||||||
|
|
||||||
|
interface StepGridReviewProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
saveRef: MutableRefObject<(() => Promise<void>) | null>
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Thin wrapper around the shared StepGridReview component */
|
||||||
|
export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewProps) {
|
||||||
|
return <BaseStepGridReview sessionId={sessionId} onNext={onNext} saveRef={saveRef} />
|
||||||
|
}
|
||||||
74
admin-lehrer/components/ocr-kombi/StepGroundTruth.tsx
Normal file
74
admin-lehrer/components/ocr-kombi/StepGroundTruth.tsx
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepGroundTruthProps {
|
||||||
|
sessionId: string | null
|
||||||
|
isGroundTruth: boolean
|
||||||
|
onMarked: () => void
|
||||||
|
gridSaveRef: React.MutableRefObject<(() => Promise<void>) | null>
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 11: Ground Truth marking.
|
||||||
|
* Saves the current grid as reference data for regression tests.
|
||||||
|
*/
|
||||||
|
export function StepGroundTruth({ sessionId, isGroundTruth, onMarked, gridSaveRef }: StepGroundTruthProps) {
|
||||||
|
const [saving, setSaving] = useState(false)
|
||||||
|
const [message, setMessage] = useState('')
|
||||||
|
|
||||||
|
const handleMark = async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setSaving(true)
|
||||||
|
setMessage('')
|
||||||
|
try {
|
||||||
|
// Auto-save grid editor before marking
|
||||||
|
if (gridSaveRef.current) {
|
||||||
|
await gridSaveRef.current()
|
||||||
|
}
|
||||||
|
const res = await fetch(
|
||||||
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=kombi`,
|
||||||
|
{ method: 'POST' },
|
||||||
|
)
|
||||||
|
if (!res.ok) {
|
||||||
|
const body = await res.text().catch(() => '')
|
||||||
|
throw new Error(`Ground Truth fehlgeschlagen (${res.status}): ${body}`)
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
setMessage(`Ground Truth gespeichert (${data.cells_saved} Zellen)`)
|
||||||
|
onMarked()
|
||||||
|
} catch (e) {
|
||||||
|
setMessage(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setSaving(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4 p-6 bg-amber-50 dark:bg-amber-900/10 rounded-xl border border-amber-200 dark:border-amber-800">
|
||||||
|
<h3 className="text-sm font-medium text-amber-700 dark:text-amber-300">
|
||||||
|
Ground Truth
|
||||||
|
</h3>
|
||||||
|
<p className="text-sm text-amber-600 dark:text-amber-400">
|
||||||
|
Markiert die aktuelle Grid-Ausgabe als Referenz fuer Regressionstests.
|
||||||
|
{isGroundTruth && ' Diese Session ist bereits als Ground Truth markiert.'}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={handleMark}
|
||||||
|
disabled={saving}
|
||||||
|
className="px-4 py-2 text-sm bg-amber-600 text-white rounded-lg hover:bg-amber-700 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{saving ? 'Speichere...' : isGroundTruth ? 'Ground Truth aktualisieren' : 'Als Ground Truth markieren'}
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{message && (
|
||||||
|
<div className={`text-sm ${message.includes('fehlgeschlagen') ? 'text-red-500' : 'text-amber-600 dark:text-amber-400'}`}>
|
||||||
|
{message}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
30
admin-lehrer/components/ocr-kombi/StepOcr.tsx
Normal file
30
admin-lehrer/components/ocr-kombi/StepOcr.tsx
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||||
|
|
||||||
|
interface StepOcrProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 7: OCR (Kombi mode = PaddleOCR + Tesseract).
|
||||||
|
*
|
||||||
|
* Phase 1: Uses the existing PaddleDirectStep with kombi endpoint.
|
||||||
|
* Phase 3 (later) will add transparent 3-phase progress + engine comparison.
|
||||||
|
*/
|
||||||
|
export function StepOcr({ sessionId, onNext }: StepOcrProps) {
|
||||||
|
return (
|
||||||
|
<PaddleDirectStep
|
||||||
|
sessionId={sessionId}
|
||||||
|
onNext={onNext}
|
||||||
|
endpoint="paddle-kombi"
|
||||||
|
title="Kombi-Modus"
|
||||||
|
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
||||||
|
icon="🔀"
|
||||||
|
buttonLabel="PP-OCRv5 + Tesseract starten"
|
||||||
|
runningLabel="PP-OCRv5 + Tesseract laufen..."
|
||||||
|
engineKey="kombi"
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
21
admin-lehrer/components/ocr-kombi/StepOrientation.tsx
Normal file
21
admin-lehrer/components/ocr-kombi/StepOrientation.tsx
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { StepOrientation as BaseStepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||||
|
|
||||||
|
interface StepOrientationProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: (sessionId: string) => void
|
||||||
|
onSessionList: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Thin wrapper around the shared StepOrientation component */
|
||||||
|
export function StepOrientation({ sessionId, onNext, onSessionList }: StepOrientationProps) {
|
||||||
|
return (
|
||||||
|
<BaseStepOrientation
|
||||||
|
key={sessionId}
|
||||||
|
sessionId={sessionId}
|
||||||
|
onNext={onNext}
|
||||||
|
onSessionList={onSessionList}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
123
admin-lehrer/components/ocr-kombi/StepPageSplit.tsx
Normal file
123
admin-lehrer/components/ocr-kombi/StepPageSplit.tsx
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepPageSplitProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
onSubSessionsCreated: (subs: SubSession[]) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step 3: Page split detection.
|
||||||
|
* Checks if the image is a double-page spread and offers to split it.
|
||||||
|
* If no split needed, auto-advances.
|
||||||
|
*/
|
||||||
|
export function StepPageSplit({ sessionId, onNext, onSubSessionsCreated }: StepPageSplitProps) {
|
||||||
|
const [checking, setChecking] = useState(false)
|
||||||
|
const [splitResult, setSplitResult] = useState<{ is_double_page: boolean; pages?: number } | null>(null)
|
||||||
|
const [splitting, setSplitting] = useState(false)
|
||||||
|
const [error, setError] = useState('')
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId) return
|
||||||
|
// Auto-check for page split
|
||||||
|
checkPageSplit()
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const checkPageSplit = async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setChecking(true)
|
||||||
|
setError('')
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||||
|
if (!res.ok) throw new Error('Session nicht gefunden')
|
||||||
|
const data = await res.json()
|
||||||
|
|
||||||
|
// If sub-sessions already exist, this was already split
|
||||||
|
if (data.sub_sessions?.length > 0) {
|
||||||
|
onSubSessionsCreated(data.sub_sessions)
|
||||||
|
onNext()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check aspect ratio to guess if double-page
|
||||||
|
// For now, just auto-advance (page-split detection happens in orientation step)
|
||||||
|
setSplitResult({ is_double_page: false })
|
||||||
|
// Auto-advance if single page
|
||||||
|
onNext()
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setChecking(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleSplit = async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setSplitting(true)
|
||||||
|
setError('')
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/page-split`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}))
|
||||||
|
throw new Error(data.detail || 'Split fehlgeschlagen')
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
if (data.sub_sessions?.length > 0) {
|
||||||
|
onSubSessionsCreated(data.sub_sessions)
|
||||||
|
}
|
||||||
|
onNext()
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setSplitting(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (checking) {
|
||||||
|
return <div className="text-sm text-gray-500 py-8 text-center">Pruefe Seitenformat...</div>
|
||||||
|
}
|
||||||
|
|
||||||
|
if (splitResult?.is_double_page) {
|
||||||
|
return (
|
||||||
|
<div className="space-y-4 p-6 bg-blue-50 dark:bg-blue-900/20 rounded-xl border border-blue-200 dark:border-blue-800">
|
||||||
|
<h3 className="text-sm font-medium text-blue-700 dark:text-blue-300">
|
||||||
|
Doppelseite erkannt
|
||||||
|
</h3>
|
||||||
|
<p className="text-sm text-blue-600 dark:text-blue-400">
|
||||||
|
Das Bild scheint eine Doppelseite zu sein. Soll es in zwei Einzelseiten aufgeteilt werden?
|
||||||
|
</p>
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
onClick={handleSplit}
|
||||||
|
disabled={splitting}
|
||||||
|
className="px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{splitting ? 'Wird aufgeteilt...' : 'Aufteilen'}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-4 py-2 bg-gray-200 dark:bg-gray-700 text-sm rounded-lg hover:bg-gray-300"
|
||||||
|
>
|
||||||
|
Einzelseite beibehalten
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{error && <div className="text-sm text-red-500">{error}</div>}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="text-sm text-gray-500 py-8 text-center">
|
||||||
|
Einzelseite erkannt — weiter zum naechsten Schritt.
|
||||||
|
{error && <div className="text-sm text-red-500 mt-2">{error}</div>}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
13
admin-lehrer/components/ocr-kombi/StepStructure.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepStructure.tsx
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||||
|
|
||||||
|
interface StepStructureProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Thin wrapper around the shared StepStructureDetection component */
|
||||||
|
export function StepStructure({ sessionId, onNext }: StepStructureProps) {
|
||||||
|
return <StepStructureDetection sessionId={sessionId} onNext={onNext} />
|
||||||
|
}
|
||||||
147
admin-lehrer/components/ocr-kombi/StepUpload.tsx
Normal file
147
admin-lehrer/components/ocr-kombi/StepUpload.tsx
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useCallback } from 'react'
|
||||||
|
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepUploadProps {
|
||||||
|
onUploaded: (sessionId: string) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function StepUpload({ onUploaded }: StepUploadProps) {
|
||||||
|
const [dragging, setDragging] = useState(false)
|
||||||
|
const [uploading, setUploading] = useState(false)
|
||||||
|
const [title, setTitle] = useState('')
|
||||||
|
const [category, setCategory] = useState<DocumentCategory>('vokabelseite')
|
||||||
|
const [error, setError] = useState('')
|
||||||
|
|
||||||
|
const handleUpload = useCallback(async (file: File) => {
|
||||||
|
setUploading(true)
|
||||||
|
setError('')
|
||||||
|
|
||||||
|
try {
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('file', file)
|
||||||
|
if (title.trim()) formData.append('name', title.trim())
|
||||||
|
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}))
|
||||||
|
throw new Error(data.detail || `Upload fehlgeschlagen (${res.status})`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json()
|
||||||
|
const sid = data.session_id || data.id
|
||||||
|
|
||||||
|
// Set category
|
||||||
|
if (category) {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ document_category: category }),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
onUploaded(sid)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setUploading(false)
|
||||||
|
}
|
||||||
|
}, [title, category, onUploaded])
|
||||||
|
|
||||||
|
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
setDragging(false)
|
||||||
|
const file = e.dataTransfer.files[0]
|
||||||
|
if (file) handleUpload(file)
|
||||||
|
}, [handleUpload])
|
||||||
|
|
||||||
|
const handleFileSelect = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const file = e.target.files?.[0]
|
||||||
|
if (file) handleUpload(file)
|
||||||
|
}, [handleUpload])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Title input */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||||
|
Titel (optional)
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={title}
|
||||||
|
onChange={(e) => setTitle(e.target.value)}
|
||||||
|
placeholder="z.B. Vokabeln Unit 3"
|
||||||
|
className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-gray-800 text-sm"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Category selector */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||||
|
Kategorie
|
||||||
|
</label>
|
||||||
|
<div className="grid grid-cols-4 gap-1.5">
|
||||||
|
{DOCUMENT_CATEGORIES.map(cat => (
|
||||||
|
<button
|
||||||
|
key={cat.value}
|
||||||
|
onClick={() => setCategory(cat.value)}
|
||||||
|
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||||
|
category === cat.value
|
||||||
|
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300 ring-1 ring-teal-400'
|
||||||
|
: 'bg-gray-50 dark:bg-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-100'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{cat.icon} {cat.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Drop zone */}
|
||||||
|
<div
|
||||||
|
onDragOver={(e) => { e.preventDefault(); setDragging(true) }}
|
||||||
|
onDragLeave={() => setDragging(false)}
|
||||||
|
onDrop={handleDrop}
|
||||||
|
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
|
||||||
|
dragging
|
||||||
|
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
|
||||||
|
: 'border-gray-300 dark:border-gray-600 hover:border-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{uploading ? (
|
||||||
|
<div className="text-sm text-gray-500">Wird hochgeladen...</div>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<div className="text-4xl mb-3">📤</div>
|
||||||
|
<div className="text-sm text-gray-600 dark:text-gray-400 mb-2">
|
||||||
|
Bild oder PDF hierher ziehen
|
||||||
|
</div>
|
||||||
|
<label className="inline-block px-4 py-2 bg-teal-600 text-white text-sm rounded-lg cursor-pointer hover:bg-teal-700">
|
||||||
|
Datei auswaehlen
|
||||||
|
<input
|
||||||
|
type="file"
|
||||||
|
accept="image/*,.pdf"
|
||||||
|
onChange={handleFileSelect}
|
||||||
|
className="hidden"
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 p-3 rounded-lg">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -150,9 +150,18 @@ export const navigation: NavCategory[] = [
|
|||||||
audience: ['Entwickler', 'Data Scientists'],
|
audience: ['Entwickler', 'Data Scientists'],
|
||||||
subgroup: 'KI-Werkzeuge',
|
subgroup: 'KI-Werkzeuge',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: 'ocr-kombi',
|
||||||
|
name: 'OCR Kombi',
|
||||||
|
href: '/ai/ocr-kombi',
|
||||||
|
description: 'Modulare 11-Schritt-Pipeline',
|
||||||
|
purpose: 'Modulare OCR-Pipeline mit Dual-Engine (PP-OCRv5 + Tesseract), Strukturerkennung, Grid-Aufbau und Review. Multi-Page-Dokument-Unterstuetzung.',
|
||||||
|
audience: ['Entwickler'],
|
||||||
|
subgroup: 'KI-Werkzeuge',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
id: 'ocr-overlay',
|
id: 'ocr-overlay',
|
||||||
name: 'OCR Overlay',
|
name: 'OCR Overlay (Legacy)',
|
||||||
href: '/ai/ocr-overlay',
|
href: '/ai/ocr-overlay',
|
||||||
description: 'Ganzseitige Overlay-Rekonstruktion',
|
description: 'Ganzseitige Overlay-Rekonstruktion',
|
||||||
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
|
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeli
|
|||||||
from grid_editor_api import router as grid_editor_router
|
from grid_editor_api import router as grid_editor_router
|
||||||
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
|
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
|
||||||
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||||
|
from ocr_kombi.router import router as ocr_kombi_router
|
||||||
try:
|
try:
|
||||||
from handwriting_htr_api import router as htr_router
|
from handwriting_htr_api import router as htr_router
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -186,6 +187,7 @@ if htr_router:
|
|||||||
app.include_router(htr_router) # Handwriting HTR (Klausur)
|
app.include_router(htr_router) # Handwriting HTR (Klausur)
|
||||||
if dsfa_rag_router:
|
if dsfa_rag_router:
|
||||||
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
||||||
|
app.include_router(ocr_kombi_router) # OCR Kombi Pipeline (modular)
|
||||||
|
|
||||||
|
|
||||||
# =============================================
|
# =============================================
|
||||||
|
|||||||
@@ -0,0 +1,12 @@
|
|||||||
|
-- Migration: Add document_group_id and page_number for multi-page document grouping.
|
||||||
|
-- A document_group_id groups multiple sessions that belong to the same scanned document.
|
||||||
|
-- page_number is the 1-based page index within the group.
|
||||||
|
|
||||||
|
ALTER TABLE ocr_pipeline_sessions
|
||||||
|
ADD COLUMN IF NOT EXISTS document_group_id UUID,
|
||||||
|
ADD COLUMN IF NOT EXISTS page_number INT;
|
||||||
|
|
||||||
|
-- Index for efficient group lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ocr_sessions_document_group
|
||||||
|
ON ocr_pipeline_sessions (document_group_id)
|
||||||
|
WHERE document_group_id IS NOT NULL;
|
||||||
1
klausur-service/backend/ocr_kombi/__init__.py
Normal file
1
klausur-service/backend/ocr_kombi/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""OCR Kombi Pipeline - modular step-based OCR processing."""
|
||||||
19
klausur-service/backend/ocr_kombi/router.py
Normal file
19
klausur-service/backend/ocr_kombi/router.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
"""
|
||||||
|
Composite router for the OCR Kombi pipeline.
|
||||||
|
|
||||||
|
Aggregates step-specific sub-routers into one router for main.py to include.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from .step_upload import router as upload_router
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/v1/ocr-kombi", tags=["ocr-kombi"])
|
||||||
|
|
||||||
|
# Include step-specific routes
|
||||||
|
router.include_router(upload_router)
|
||||||
|
|
||||||
|
# Future steps will be added here:
|
||||||
|
# from .step_orientation import router as orientation_router
|
||||||
|
# router.include_router(orientation_router)
|
||||||
|
# ...
|
||||||
132
klausur-service/backend/ocr_kombi/step_upload.py
Normal file
132
klausur-service/backend/ocr_kombi/step_upload.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
"""
|
||||||
|
Step 1: Upload — handles single images and multi-page PDFs.
|
||||||
|
|
||||||
|
Multi-page PDFs are split into individual PNG pages, each getting its own
|
||||||
|
session linked by a shared document_group_id.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import uuid
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
|
||||||
|
|
||||||
|
from ocr_pipeline_session_store import create_session_db, get_document_group_sessions
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def _pdf_to_pngs(pdf_bytes: bytes) -> list[bytes]:
|
||||||
|
"""Convert a PDF to a list of PNG byte buffers (one per page)."""
|
||||||
|
try:
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
except ImportError:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail="PDF-Verarbeitung nicht verfuegbar (PyMuPDF fehlt)"
|
||||||
|
)
|
||||||
|
|
||||||
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||||
|
pages: list[bytes] = []
|
||||||
|
for page in doc:
|
||||||
|
# Render at 300 DPI for OCR quality
|
||||||
|
mat = fitz.Matrix(300 / 72, 300 / 72)
|
||||||
|
pix = page.get_pixmap(matrix=mat)
|
||||||
|
pages.append(pix.tobytes("png"))
|
||||||
|
doc.close()
|
||||||
|
return pages
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/upload")
|
||||||
|
async def upload_document(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
name: Optional[str] = Form(None),
|
||||||
|
document_category: Optional[str] = Form(None),
|
||||||
|
):
|
||||||
|
"""Upload a single image or multi-page PDF.
|
||||||
|
|
||||||
|
Single image: Creates 1 session with document_group_id + page_number=1.
|
||||||
|
Multi-page PDF: Creates N sessions with shared document_group_id,
|
||||||
|
page_number 1..N, and titles "Title — S. X".
|
||||||
|
"""
|
||||||
|
t0 = time.time()
|
||||||
|
file_bytes = await file.read()
|
||||||
|
filename = file.filename or "upload"
|
||||||
|
base_title = name or filename.rsplit(".", 1)[0]
|
||||||
|
|
||||||
|
is_pdf = (
|
||||||
|
filename.lower().endswith(".pdf")
|
||||||
|
or file.content_type == "application/pdf"
|
||||||
|
or file_bytes[:4] == b"%PDF"
|
||||||
|
)
|
||||||
|
|
||||||
|
group_id = str(uuid.uuid4())
|
||||||
|
created_sessions = []
|
||||||
|
|
||||||
|
if is_pdf:
|
||||||
|
pages = _pdf_to_pngs(file_bytes)
|
||||||
|
if not pages:
|
||||||
|
raise HTTPException(status_code=400, detail="PDF enthaelt keine Seiten")
|
||||||
|
|
||||||
|
for i, png_bytes in enumerate(pages, start=1):
|
||||||
|
session_id = str(uuid.uuid4())
|
||||||
|
page_title = f"{base_title} — S. {i}" if len(pages) > 1 else base_title
|
||||||
|
session = await create_session_db(
|
||||||
|
session_id=session_id,
|
||||||
|
name=page_title,
|
||||||
|
filename=filename,
|
||||||
|
original_png=png_bytes,
|
||||||
|
document_group_id=group_id,
|
||||||
|
page_number=i,
|
||||||
|
)
|
||||||
|
created_sessions.append({
|
||||||
|
"session_id": session["id"],
|
||||||
|
"name": session["name"],
|
||||||
|
"page_number": i,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Single image
|
||||||
|
session_id = str(uuid.uuid4())
|
||||||
|
session = await create_session_db(
|
||||||
|
session_id=session_id,
|
||||||
|
name=base_title,
|
||||||
|
filename=filename,
|
||||||
|
original_png=file_bytes,
|
||||||
|
document_group_id=group_id,
|
||||||
|
page_number=1,
|
||||||
|
)
|
||||||
|
created_sessions.append({
|
||||||
|
"session_id": session["id"],
|
||||||
|
"name": session["name"],
|
||||||
|
"page_number": 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
duration = round(time.time() - t0, 2)
|
||||||
|
logger.info(
|
||||||
|
"Upload complete: %d page(s), group=%s, %.2fs",
|
||||||
|
len(created_sessions), group_id, duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"document_group_id": group_id,
|
||||||
|
"page_count": len(created_sessions),
|
||||||
|
"sessions": created_sessions,
|
||||||
|
"duration_seconds": duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/documents/{group_id}")
|
||||||
|
async def get_document_group(group_id: str):
|
||||||
|
"""Get all sessions in a document group, sorted by page_number."""
|
||||||
|
sessions = await get_document_group_sessions(group_id)
|
||||||
|
if not sessions:
|
||||||
|
raise HTTPException(status_code=404, detail="Dokumentgruppe nicht gefunden")
|
||||||
|
return {
|
||||||
|
"document_group_id": group_id,
|
||||||
|
"page_count": len(sessions),
|
||||||
|
"sessions": sessions,
|
||||||
|
}
|
||||||
@@ -76,7 +76,16 @@ async def init_ocr_pipeline_tables():
|
|||||||
ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE,
|
ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE,
|
||||||
ADD COLUMN IF NOT EXISTS box_index INT,
|
ADD COLUMN IF NOT EXISTS box_index INT,
|
||||||
ADD COLUMN IF NOT EXISTS grid_editor_result JSONB,
|
ADD COLUMN IF NOT EXISTS grid_editor_result JSONB,
|
||||||
ADD COLUMN IF NOT EXISTS structure_result JSONB
|
ADD COLUMN IF NOT EXISTS structure_result JSONB,
|
||||||
|
ADD COLUMN IF NOT EXISTS document_group_id UUID,
|
||||||
|
ADD COLUMN IF NOT EXISTS page_number INT
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Index for document group lookups
|
||||||
|
await conn.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ocr_sessions_document_group
|
||||||
|
ON ocr_pipeline_sessions (document_group_id)
|
||||||
|
WHERE document_group_id IS NOT NULL
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
@@ -91,21 +100,26 @@ async def create_session_db(
|
|||||||
original_png: bytes,
|
original_png: bytes,
|
||||||
parent_session_id: Optional[str] = None,
|
parent_session_id: Optional[str] = None,
|
||||||
box_index: Optional[int] = None,
|
box_index: Optional[int] = None,
|
||||||
|
document_group_id: Optional[str] = None,
|
||||||
|
page_number: Optional[int] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Create a new OCR pipeline session.
|
"""Create a new OCR pipeline session.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
parent_session_id: If set, this is a sub-session for a box region.
|
parent_session_id: If set, this is a sub-session for a box region.
|
||||||
box_index: 0-based index of the box this sub-session represents.
|
box_index: 0-based index of the box this sub-session represents.
|
||||||
|
document_group_id: Groups multi-page uploads into one document.
|
||||||
|
page_number: 1-based page index within the document group.
|
||||||
"""
|
"""
|
||||||
pool = await get_pool()
|
pool = await get_pool()
|
||||||
parent_uuid = uuid.UUID(parent_session_id) if parent_session_id else None
|
parent_uuid = uuid.UUID(parent_session_id) if parent_session_id else None
|
||||||
|
group_uuid = uuid.UUID(document_group_id) if document_group_id else None
|
||||||
async with pool.acquire() as conn:
|
async with pool.acquire() as conn:
|
||||||
row = await conn.fetchrow("""
|
row = await conn.fetchrow("""
|
||||||
INSERT INTO ocr_pipeline_sessions (
|
INSERT INTO ocr_pipeline_sessions (
|
||||||
id, name, filename, original_png, status, current_step,
|
id, name, filename, original_png, status, current_step,
|
||||||
parent_session_id, box_index
|
parent_session_id, box_index, document_group_id, page_number
|
||||||
) VALUES ($1, $2, $3, $4, 'active', 1, $5, $6)
|
) VALUES ($1, $2, $3, $4, 'active', 1, $5, $6, $7, $8)
|
||||||
RETURNING id, name, filename, status, current_step,
|
RETURNING id, name, filename, status, current_step,
|
||||||
orientation_result, crop_result,
|
orientation_result, crop_result,
|
||||||
deskew_result, dewarp_result, column_result, row_result,
|
deskew_result, dewarp_result, column_result, row_result,
|
||||||
@@ -114,9 +128,10 @@ async def create_session_db(
|
|||||||
document_category, pipeline_log,
|
document_category, pipeline_log,
|
||||||
grid_editor_result, structure_result,
|
grid_editor_result, structure_result,
|
||||||
parent_session_id, box_index,
|
parent_session_id, box_index,
|
||||||
|
document_group_id, page_number,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
""", uuid.UUID(session_id), name, filename, original_png,
|
""", uuid.UUID(session_id), name, filename, original_png,
|
||||||
parent_uuid, box_index)
|
parent_uuid, box_index, group_uuid, page_number)
|
||||||
|
|
||||||
return _row_to_dict(row)
|
return _row_to_dict(row)
|
||||||
|
|
||||||
@@ -134,6 +149,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
|||||||
document_category, pipeline_log,
|
document_category, pipeline_log,
|
||||||
grid_editor_result, structure_result,
|
grid_editor_result, structure_result,
|
||||||
parent_session_id, box_index,
|
parent_session_id, box_index,
|
||||||
|
document_group_id, page_number,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
FROM ocr_pipeline_sessions WHERE id = $1
|
FROM ocr_pipeline_sessions WHERE id = $1
|
||||||
""", uuid.UUID(session_id))
|
""", uuid.UUID(session_id))
|
||||||
@@ -186,6 +202,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
|||||||
'document_category', 'pipeline_log',
|
'document_category', 'pipeline_log',
|
||||||
'grid_editor_result', 'structure_result',
|
'grid_editor_result', 'structure_result',
|
||||||
'parent_session_id', 'box_index',
|
'parent_session_id', 'box_index',
|
||||||
|
'document_group_id', 'page_number',
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result', 'structure_result'}
|
jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result', 'structure_result'}
|
||||||
@@ -217,8 +234,9 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
|||||||
word_result, ground_truth, auto_shear_degrees,
|
word_result, ground_truth, auto_shear_degrees,
|
||||||
doc_type, doc_type_result,
|
doc_type, doc_type_result,
|
||||||
document_category, pipeline_log,
|
document_category, pipeline_log,
|
||||||
grid_editor_result,
|
grid_editor_result, structure_result,
|
||||||
parent_session_id, box_index,
|
parent_session_id, box_index,
|
||||||
|
document_group_id, page_number,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
""", *values)
|
""", *values)
|
||||||
|
|
||||||
@@ -243,6 +261,7 @@ async def list_sessions_db(
|
|||||||
SELECT id, name, filename, status, current_step,
|
SELECT id, name, filename, status, current_step,
|
||||||
document_category, doc_type,
|
document_category, doc_type,
|
||||||
parent_session_id, box_index,
|
parent_session_id, box_index,
|
||||||
|
document_group_id, page_number,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
FROM ocr_pipeline_sessions
|
FROM ocr_pipeline_sessions
|
||||||
{where}
|
{where}
|
||||||
@@ -261,6 +280,7 @@ async def get_sub_sessions(parent_session_id: str) -> List[Dict[str, Any]]:
|
|||||||
SELECT id, name, filename, status, current_step,
|
SELECT id, name, filename, status, current_step,
|
||||||
document_category, doc_type,
|
document_category, doc_type,
|
||||||
parent_session_id, box_index,
|
parent_session_id, box_index,
|
||||||
|
document_group_id, page_number,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
FROM ocr_pipeline_sessions
|
FROM ocr_pipeline_sessions
|
||||||
WHERE parent_session_id = $1
|
WHERE parent_session_id = $1
|
||||||
@@ -270,6 +290,24 @@ async def get_sub_sessions(parent_session_id: str) -> List[Dict[str, Any]]:
|
|||||||
return [_row_to_dict(row) for row in rows]
|
return [_row_to_dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def get_document_group_sessions(document_group_id: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Get all sessions in a document group, ordered by page_number."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch("""
|
||||||
|
SELECT id, name, filename, status, current_step,
|
||||||
|
document_category, doc_type,
|
||||||
|
parent_session_id, box_index,
|
||||||
|
document_group_id, page_number,
|
||||||
|
created_at, updated_at
|
||||||
|
FROM ocr_pipeline_sessions
|
||||||
|
WHERE document_group_id = $1
|
||||||
|
ORDER BY page_number ASC
|
||||||
|
""", uuid.UUID(document_group_id))
|
||||||
|
|
||||||
|
return [_row_to_dict(row) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
async def list_ground_truth_sessions_db() -> List[Dict[str, Any]]:
|
async def list_ground_truth_sessions_db() -> List[Dict[str, Any]]:
|
||||||
"""List sessions that have a build_grid_reference in ground_truth."""
|
"""List sessions that have a build_grid_reference in ground_truth."""
|
||||||
pool = await get_pool()
|
pool = await get_pool()
|
||||||
@@ -324,7 +362,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
|||||||
result = dict(row)
|
result = dict(row)
|
||||||
|
|
||||||
# UUID → string
|
# UUID → string
|
||||||
for key in ['id', 'session_id', 'parent_session_id']:
|
for key in ['id', 'session_id', 'parent_session_id', 'document_group_id']:
|
||||||
if key in result and result[key] is not None:
|
if key in result and result[key] is not None:
|
||||||
result[key] = str(result[key])
|
result[key] = str(result[key])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user