Add OCR Kombi Pipeline: modular 11-step architecture with multi-page support
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 20s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 20s
Phase 1 of the clean architecture refactor: Replaces the 751-line ocr-overlay monolith with a modular pipeline. Each step gets its own component file. Frontend: /ai/ocr-kombi route with 11 steps (Upload, Orientation, PageSplit, Deskew, Dewarp, ContentCrop, OCR, Structure, GridBuild, GridReview, GroundTruth). Session list supports document grouping for multi-page uploads. Backend: New ocr_kombi/ module with multi-page PDF upload (splits PDF into N sessions with shared document_group_id). DB migration adds document_group_id and page_number columns. Old /ai/ocr-overlay remains fully functional for A/B testing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
174
admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx
Normal file
174
admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx
Normal file
@@ -0,0 +1,174 @@
|
||||
'use client'
|
||||
|
||||
import { Suspense } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
|
||||
import { KombiStepper } from '@/components/ocr-kombi/KombiStepper'
|
||||
import { SessionList } from '@/components/ocr-kombi/SessionList'
|
||||
import { SessionHeader } from '@/components/ocr-kombi/SessionHeader'
|
||||
import { StepUpload } from '@/components/ocr-kombi/StepUpload'
|
||||
import { StepOrientation } from '@/components/ocr-kombi/StepOrientation'
|
||||
import { StepPageSplit } from '@/components/ocr-kombi/StepPageSplit'
|
||||
import { StepDeskew } from '@/components/ocr-kombi/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-kombi/StepDewarp'
|
||||
import { StepContentCrop } from '@/components/ocr-kombi/StepContentCrop'
|
||||
import { StepOcr } from '@/components/ocr-kombi/StepOcr'
|
||||
import { StepStructure } from '@/components/ocr-kombi/StepStructure'
|
||||
import { StepGridBuild } from '@/components/ocr-kombi/StepGridBuild'
|
||||
import { StepGridReview } from '@/components/ocr-kombi/StepGridReview'
|
||||
import { StepGroundTruth } from '@/components/ocr-kombi/StepGroundTruth'
|
||||
import { useKombiPipeline } from './useKombiPipeline'
|
||||
|
||||
function OcrKombiContent() {
|
||||
const {
|
||||
currentStep,
|
||||
sessionId,
|
||||
sessionName,
|
||||
loadingSessions,
|
||||
activeCategory,
|
||||
isGroundTruth,
|
||||
subSessions,
|
||||
parentSessionId,
|
||||
steps,
|
||||
gridSaveRef,
|
||||
groupedSessions,
|
||||
loadSessions,
|
||||
openSession,
|
||||
handleStepClick,
|
||||
handleNext,
|
||||
handleNewSession,
|
||||
deleteSession,
|
||||
renameSession,
|
||||
updateCategory,
|
||||
handleOrientationComplete,
|
||||
handleSessionChange,
|
||||
setSessionId,
|
||||
setSubSessions,
|
||||
setParentSessionId,
|
||||
setIsGroundTruth,
|
||||
} = useKombiPipeline()
|
||||
|
||||
const renderStep = () => {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return (
|
||||
<StepUpload
|
||||
onUploaded={(sid) => {
|
||||
setSessionId(sid)
|
||||
loadSessions()
|
||||
openSession(sid)
|
||||
handleNext()
|
||||
}}
|
||||
/>
|
||||
)
|
||||
case 1:
|
||||
return (
|
||||
<StepOrientation
|
||||
sessionId={sessionId}
|
||||
onNext={handleOrientationComplete}
|
||||
onSessionList={() => { loadSessions(); handleNewSession() }}
|
||||
/>
|
||||
)
|
||||
case 2:
|
||||
return (
|
||||
<StepPageSplit
|
||||
sessionId={sessionId}
|
||||
onNext={handleNext}
|
||||
onSubSessionsCreated={(subs) => {
|
||||
setSubSessions(subs)
|
||||
if (sessionId) setParentSessionId(sessionId)
|
||||
}}
|
||||
/>
|
||||
)
|
||||
case 3:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 4:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 5:
|
||||
return <StepContentCrop sessionId={sessionId} onNext={handleNext} />
|
||||
case 6:
|
||||
return <StepOcr sessionId={sessionId} onNext={handleNext} />
|
||||
case 7:
|
||||
return <StepStructure sessionId={sessionId} onNext={handleNext} />
|
||||
case 8:
|
||||
return <StepGridBuild sessionId={sessionId} onNext={handleNext} />
|
||||
case 9:
|
||||
return <StepGridReview sessionId={sessionId} onNext={handleNext} saveRef={gridSaveRef} />
|
||||
case 10:
|
||||
return (
|
||||
<StepGroundTruth
|
||||
sessionId={sessionId}
|
||||
isGroundTruth={isGroundTruth}
|
||||
onMarked={() => setIsGroundTruth(true)}
|
||||
gridSaveRef={gridSaveRef}
|
||||
/>
|
||||
)
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<PagePurpose
|
||||
title="OCR Kombi Pipeline"
|
||||
purpose="Modulare 11-Schritt-Pipeline: Upload, Vorverarbeitung, Dual-Engine-OCR (PP-OCRv5 + Tesseract), Strukturerkennung, Grid-Aufbau und Review. Multi-Page-Dokument-Unterstuetzung."
|
||||
audience={['Entwickler']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract', 'PaddleOCR'],
|
||||
databases: ['PostgreSQL Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Overlay (Legacy)', href: '/ai/ocr-overlay', description: 'Alter 3-Modi-Monolith' },
|
||||
{ name: 'OCR Regression', href: '/ai/ocr-regression', description: 'Regressionstests' },
|
||||
]}
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
<SessionList
|
||||
items={groupedSessions()}
|
||||
loading={loadingSessions}
|
||||
activeSessionId={sessionId}
|
||||
onOpenSession={(sid) => openSession(sid)}
|
||||
onNewSession={handleNewSession}
|
||||
onDeleteSession={deleteSession}
|
||||
onRenameSession={renameSession}
|
||||
onUpdateCategory={updateCategory}
|
||||
/>
|
||||
|
||||
{sessionId && sessionName && (
|
||||
<SessionHeader
|
||||
sessionName={sessionName}
|
||||
activeCategory={activeCategory}
|
||||
isGroundTruth={isGroundTruth}
|
||||
onUpdateCategory={(cat) => updateCategory(sessionId, cat)}
|
||||
/>
|
||||
)}
|
||||
|
||||
<KombiStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
/>
|
||||
|
||||
{subSessions.length > 0 && parentSessionId && sessionId && (
|
||||
<BoxSessionTabs
|
||||
parentSessionId={parentSessionId}
|
||||
subSessions={subSessions}
|
||||
activeSessionId={sessionId}
|
||||
onSessionChange={handleSessionChange}
|
||||
/>
|
||||
)}
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default function OcrKombiPage() {
|
||||
return (
|
||||
<Suspense fallback={<div className="p-4 text-sm text-gray-400">Lade...</div>}>
|
||||
<OcrKombiContent />
|
||||
</Suspense>
|
||||
)
|
||||
}
|
||||
118
admin-lehrer/app/(admin)/ai/ocr-kombi/types.ts
Normal file
118
admin-lehrer/app/(admin)/ai/ocr-kombi/types.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import type { PipelineStep, PipelineStepStatus, DocumentCategory } from '../ocr-pipeline/types'
|
||||
|
||||
// Re-export shared types
|
||||
export type { PipelineStep, PipelineStepStatus, DocumentCategory }
|
||||
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
|
||||
|
||||
// Re-export grid/structure types used by later steps
|
||||
export type {
|
||||
SessionListItem,
|
||||
SessionInfo,
|
||||
SubSession,
|
||||
OrientationResult,
|
||||
CropResult,
|
||||
DeskewResult,
|
||||
DewarpResult,
|
||||
GridResult,
|
||||
GridCell,
|
||||
OcrWordBox,
|
||||
WordBbox,
|
||||
ColumnMeta,
|
||||
StructureResult,
|
||||
StructureBox,
|
||||
StructureZone,
|
||||
StructureGraphic,
|
||||
ExcludeRegion,
|
||||
} from '../ocr-pipeline/types'
|
||||
|
||||
/**
|
||||
* 11-step Kombi V2 pipeline.
|
||||
* Each step has its own component file in components/ocr-kombi/.
|
||||
*/
|
||||
export const KOMBI_V2_STEPS: PipelineStep[] = [
|
||||
{ id: 'upload', name: 'Upload', icon: '📤', status: 'pending' },
|
||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
||||
{ id: 'page-split', name: 'Seitentrennung', icon: '📖', status: 'pending' },
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'content-crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'ocr', name: 'OCR', icon: '🔀', status: 'pending' },
|
||||
{ id: 'structure', name: 'Strukturerkennung', icon: '🔍', status: 'pending' },
|
||||
{ id: 'grid-build', name: 'Grid-Aufbau', icon: '🧱', status: 'pending' },
|
||||
{ id: 'grid-review', name: 'Grid-Review', icon: '📊', status: 'pending' },
|
||||
{ id: 'ground-truth', name: 'Ground Truth', icon: '✅', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from Kombi V2 UI step index to DB step number */
|
||||
export const KOMBI_V2_UI_TO_DB: Record<number, number> = {
|
||||
0: 1, // upload
|
||||
1: 2, // orientation
|
||||
2: 2, // page-split (same DB step as orientation)
|
||||
3: 3, // deskew
|
||||
4: 4, // dewarp
|
||||
5: 5, // content-crop
|
||||
6: 8, // ocr (word_result)
|
||||
7: 9, // structure
|
||||
8: 10, // grid-build
|
||||
9: 11, // grid-review
|
||||
10: 12, // ground-truth
|
||||
}
|
||||
|
||||
/** Map from DB step to Kombi V2 UI step index */
|
||||
export function dbStepToKombiV2Ui(dbStep: number): number {
|
||||
if (dbStep <= 1) return 0 // upload
|
||||
if (dbStep === 2) return 1 // orientation
|
||||
if (dbStep === 3) return 3 // deskew
|
||||
if (dbStep === 4) return 4 // dewarp
|
||||
if (dbStep === 5) return 5 // content-crop
|
||||
if (dbStep <= 8) return 6 // ocr
|
||||
if (dbStep === 9) return 7 // structure
|
||||
if (dbStep === 10) return 8 // grid-build
|
||||
if (dbStep === 11) return 9 // grid-review
|
||||
return 10 // ground-truth
|
||||
}
|
||||
|
||||
/** Document group: groups multiple sessions from a multi-page upload */
|
||||
export interface DocumentGroup {
|
||||
group_id: string
|
||||
title: string
|
||||
page_count: number
|
||||
sessions: DocumentGroupSession[]
|
||||
}
|
||||
|
||||
export interface DocumentGroupSession {
|
||||
id: string
|
||||
name: string
|
||||
page_number: number
|
||||
current_step: number
|
||||
status: string
|
||||
document_category?: DocumentCategory
|
||||
created_at: string
|
||||
}
|
||||
|
||||
/** Engine source for OCR transparency */
|
||||
export type OcrEngineSource = 'both' | 'paddle_only' | 'tesseract_only' | 'conflict_paddle' | 'conflict_tesseract'
|
||||
|
||||
export interface OcrTransparentWord {
|
||||
text: string
|
||||
left: number
|
||||
top: number
|
||||
width: number
|
||||
height: number
|
||||
conf: number
|
||||
engine_source: OcrEngineSource
|
||||
}
|
||||
|
||||
export interface OcrTransparentResult {
|
||||
raw_tesseract: { words: OcrTransparentWord[] }
|
||||
raw_paddle: { words: OcrTransparentWord[] }
|
||||
merged: { words: OcrTransparentWord[] }
|
||||
stats: {
|
||||
total_words: number
|
||||
both_agree: number
|
||||
paddle_only: number
|
||||
tesseract_only: number
|
||||
conflict_paddle_wins: number
|
||||
conflict_tesseract_wins: number
|
||||
}
|
||||
}
|
||||
361
admin-lehrer/app/(admin)/ai/ocr-kombi/useKombiPipeline.ts
Normal file
361
admin-lehrer/app/(admin)/ai/ocr-kombi/useKombiPipeline.ts
Normal file
@@ -0,0 +1,361 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState, useRef } from 'react'
|
||||
import { useSearchParams } from 'next/navigation'
|
||||
import type { PipelineStep, DocumentCategory } from './types'
|
||||
import { KOMBI_V2_STEPS, dbStepToKombiV2Ui } from './types'
|
||||
import type { SubSession, SessionListItem } from '../ocr-pipeline/types'
|
||||
|
||||
export type { SessionListItem }
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
/** Groups sessions by document_group_id for the session list */
|
||||
export interface DocumentGroupView {
|
||||
group_id: string
|
||||
title: string
|
||||
sessions: SessionListItem[]
|
||||
page_count: number
|
||||
}
|
||||
|
||||
function initSteps(): PipelineStep[] {
|
||||
return KOMBI_V2_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i === 0 ? 'active' : 'pending',
|
||||
}))
|
||||
}
|
||||
|
||||
export function useKombiPipeline() {
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState('')
|
||||
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
||||
const [isGroundTruth, setIsGroundTruth] = useState(false)
|
||||
const [subSessions, setSubSessions] = useState<SubSession[]>([])
|
||||
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(initSteps())
|
||||
|
||||
const searchParams = useSearchParams()
|
||||
const deepLinkHandled = useRef(false)
|
||||
const gridSaveRef = useRef<(() => Promise<void>) | null>(null)
|
||||
|
||||
// ---- Session loading ----
|
||||
|
||||
const loadSessions = useCallback(async () => {
|
||||
setLoadingSessions(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setLoadingSessions(false)
|
||||
}
|
||||
}, [])
|
||||
|
||||
useEffect(() => { loadSessions() }, [loadSessions])
|
||||
|
||||
// ---- Group sessions by document_group_id ----
|
||||
|
||||
const groupedSessions = useCallback((): (SessionListItem | DocumentGroupView)[] => {
|
||||
const groups = new Map<string, SessionListItem[]>()
|
||||
const ungrouped: SessionListItem[] = []
|
||||
|
||||
for (const s of sessions) {
|
||||
if (s.document_group_id) {
|
||||
const existing = groups.get(s.document_group_id) || []
|
||||
existing.push(s)
|
||||
groups.set(s.document_group_id, existing)
|
||||
} else {
|
||||
ungrouped.push(s)
|
||||
}
|
||||
}
|
||||
|
||||
const result: (SessionListItem | DocumentGroupView)[] = []
|
||||
|
||||
// Sort groups by earliest created_at
|
||||
const sortedGroups = Array.from(groups.entries()).sort((a, b) => {
|
||||
const aTime = Math.min(...a[1].map(s => new Date(s.created_at).getTime()))
|
||||
const bTime = Math.min(...b[1].map(s => new Date(s.created_at).getTime()))
|
||||
return bTime - aTime
|
||||
})
|
||||
|
||||
for (const [groupId, groupSessions] of sortedGroups) {
|
||||
groupSessions.sort((a, b) => (a.page_number || 0) - (b.page_number || 0))
|
||||
// Extract base title (remove " — S. X" suffix)
|
||||
const baseName = groupSessions[0]?.name?.replace(/ — S\. \d+$/, '') || 'Dokument'
|
||||
result.push({
|
||||
group_id: groupId,
|
||||
title: baseName,
|
||||
sessions: groupSessions,
|
||||
page_count: groupSessions.length,
|
||||
})
|
||||
}
|
||||
|
||||
for (const s of ungrouped) {
|
||||
result.push(s)
|
||||
}
|
||||
|
||||
// Sort by creation time (most recent first)
|
||||
const getTime = (item: SessionListItem | DocumentGroupView): number => {
|
||||
if ('group_id' in item) {
|
||||
return Math.min(...item.sessions.map((s: SessionListItem) => new Date(s.created_at).getTime()))
|
||||
}
|
||||
return new Date(item.created_at).getTime()
|
||||
}
|
||||
result.sort((a, b) => getTime(b) - getTime(a))
|
||||
|
||||
return result
|
||||
}, [sessions])
|
||||
|
||||
// ---- Open session ----
|
||||
|
||||
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
setActiveCategory(data.document_category || undefined)
|
||||
setIsGroundTruth(!!data.ground_truth?.build_grid_reference)
|
||||
|
||||
// Sub-session handling
|
||||
if (data.sub_sessions?.length > 0) {
|
||||
setSubSessions(data.sub_sessions)
|
||||
setParentSessionId(sid)
|
||||
} else if (data.parent_session_id) {
|
||||
setParentSessionId(data.parent_session_id)
|
||||
} else if (!keepSubSessions) {
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
}
|
||||
|
||||
// Determine UI step from DB state
|
||||
const dbStep = data.current_step || 1
|
||||
const hasGrid = !!data.grid_editor_result
|
||||
const hasStructure = !!data.structure_result
|
||||
const hasWords = !!data.word_result
|
||||
|
||||
let uiStep: number
|
||||
if (hasGrid) {
|
||||
uiStep = 9 // grid-review
|
||||
} else if (hasStructure) {
|
||||
uiStep = 8 // grid-build
|
||||
} else if (hasWords) {
|
||||
uiStep = 7 // structure
|
||||
} else {
|
||||
uiStep = dbStepToKombiV2Ui(dbStep)
|
||||
}
|
||||
|
||||
// For sessions that already have an upload, skip the upload step
|
||||
if (uiStep === 0 && dbStep >= 2) {
|
||||
uiStep = 1
|
||||
}
|
||||
|
||||
const skipIds: string[] = []
|
||||
const isSubSession = !!data.parent_session_id
|
||||
if (isSubSession && dbStep >= 5) {
|
||||
skipIds.push('upload', 'orientation', 'page-split', 'deskew', 'dewarp', 'content-crop')
|
||||
if (uiStep < 6) uiStep = 6
|
||||
} else if (isSubSession && dbStep >= 2) {
|
||||
skipIds.push('upload', 'orientation')
|
||||
if (uiStep < 2) uiStep = 2
|
||||
}
|
||||
|
||||
setSteps(
|
||||
KOMBI_V2_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: skipIds.includes(s.id)
|
||||
? 'skipped'
|
||||
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
// ---- Deep link handling ----
|
||||
|
||||
useEffect(() => {
|
||||
if (deepLinkHandled.current) return
|
||||
const urlSession = searchParams.get('session')
|
||||
const urlStep = searchParams.get('step')
|
||||
if (urlSession) {
|
||||
deepLinkHandled.current = true
|
||||
openSession(urlSession).then(() => {
|
||||
if (urlStep) {
|
||||
const stepIdx = parseInt(urlStep, 10)
|
||||
if (!isNaN(stepIdx) && stepIdx >= 0 && stepIdx < KOMBI_V2_STEPS.length) {
|
||||
setCurrentStep(stepIdx)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}, [searchParams, openSession])
|
||||
|
||||
// ---- Step navigation ----
|
||||
|
||||
const goToStep = useCallback((step: number) => {
|
||||
setCurrentStep(step)
|
||||
setSteps(prev =>
|
||||
prev.map((s, i) => ({
|
||||
...s,
|
||||
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
}, [])
|
||||
|
||||
const handleStepClick = useCallback((index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
}
|
||||
}, [currentStep, steps])
|
||||
|
||||
const handleNext = useCallback(() => {
|
||||
if (currentStep >= steps.length - 1) {
|
||||
// Last step → return to session list
|
||||
setSteps(initSteps())
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
loadSessions()
|
||||
return
|
||||
}
|
||||
|
||||
const nextStep = currentStep + 1
|
||||
setSteps(prev =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === nextStep) return { ...s, status: 'active' }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep(nextStep)
|
||||
}, [currentStep, steps, loadSessions])
|
||||
|
||||
// ---- Session CRUD ----
|
||||
|
||||
const handleNewSession = useCallback(() => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
setSubSessions([])
|
||||
setParentSessionId(null)
|
||||
setSteps(initSteps())
|
||||
}, [])
|
||||
|
||||
const deleteSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||
setSessions(prev => prev.filter(s => s.id !== sid))
|
||||
if (sessionId === sid) handleNewSession()
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId, handleNewSession])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name: newName }),
|
||||
})
|
||||
setSessions(prev => prev.map(s => s.id === sid ? { ...s, name: newName } : s))
|
||||
if (sessionId === sid) setSessionName(newName)
|
||||
} catch (e) {
|
||||
console.error('Failed to rename session:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_category: category }),
|
||||
})
|
||||
setSessions(prev => prev.map(s => s.id === sid ? { ...s, document_category: category } : s))
|
||||
if (sessionId === sid) setActiveCategory(category)
|
||||
} catch (e) {
|
||||
console.error('Failed to update category:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
// ---- Orientation completion (checks for page-split sub-sessions) ----
|
||||
|
||||
const handleOrientationComplete = useCallback(async (sid: string) => {
|
||||
setSessionId(sid)
|
||||
loadSessions()
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
if (data.sub_sessions?.length > 0) {
|
||||
const subs: SubSession[] = data.sub_sessions.map((s: SubSession) => ({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
box_index: s.box_index,
|
||||
current_step: s.current_step,
|
||||
}))
|
||||
setSubSessions(subs)
|
||||
setParentSessionId(sid)
|
||||
openSession(subs[0].id, true)
|
||||
return
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to check for sub-sessions:', e)
|
||||
}
|
||||
|
||||
handleNext()
|
||||
}, [loadSessions, openSession, handleNext])
|
||||
|
||||
const handleSessionChange = useCallback((newSessionId: string) => {
|
||||
openSession(newSessionId, true)
|
||||
}, [openSession])
|
||||
|
||||
return {
|
||||
// State
|
||||
currentStep,
|
||||
sessionId,
|
||||
sessionName,
|
||||
sessions,
|
||||
loadingSessions,
|
||||
activeCategory,
|
||||
isGroundTruth,
|
||||
subSessions,
|
||||
parentSessionId,
|
||||
steps,
|
||||
gridSaveRef,
|
||||
// Computed
|
||||
groupedSessions,
|
||||
// Actions
|
||||
loadSessions,
|
||||
openSession,
|
||||
goToStep,
|
||||
handleStepClick,
|
||||
handleNext,
|
||||
handleNewSession,
|
||||
deleteSession,
|
||||
renameSession,
|
||||
updateCategory,
|
||||
handleOrientationComplete,
|
||||
handleSessionChange,
|
||||
setSessionId,
|
||||
setSubSessions,
|
||||
setParentSessionId,
|
||||
setIsGroundTruth,
|
||||
}
|
||||
}
|
||||
@@ -33,6 +33,9 @@ export interface SessionListItem {
|
||||
current_step: number
|
||||
document_category?: DocumentCategory
|
||||
doc_type?: string
|
||||
parent_session_id?: string
|
||||
document_group_id?: string
|
||||
page_number?: number
|
||||
created_at: string
|
||||
updated_at?: string
|
||||
}
|
||||
@@ -108,6 +111,8 @@ export interface SessionInfo {
|
||||
sub_sessions?: SubSession[]
|
||||
parent_session_id?: string
|
||||
box_index?: number
|
||||
document_group_id?: string
|
||||
page_number?: number
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
|
||||
59
admin-lehrer/components/ocr-kombi/KombiStepper.tsx
Normal file
59
admin-lehrer/components/ocr-kombi/KombiStepper.tsx
Normal file
@@ -0,0 +1,59 @@
|
||||
'use client'
|
||||
|
||||
import type { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface KombiStepperProps {
|
||||
steps: PipelineStep[]
|
||||
currentStep: number
|
||||
onStepClick: (index: number) => void
|
||||
}
|
||||
|
||||
export function KombiStepper({ steps, currentStep, onStepClick }: KombiStepperProps) {
|
||||
return (
|
||||
<div className="flex items-center gap-0.5 px-3 py-2.5 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-x-auto">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isSkipped = step.status === 'skipped'
|
||||
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
||||
|
||||
return (
|
||||
<div key={step.id} className="flex items-center flex-shrink-0">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-4 mx-0.5 ${
|
||||
isSkipped
|
||||
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
||||
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded-full text-xs font-medium transition-all whitespace-nowrap ${
|
||||
isSkipped
|
||||
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
||||
: isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
title={step.name}
|
||||
>
|
||||
<span className="text-sm">
|
||||
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden lg:inline">{step.name}</span>
|
||||
<span className="lg:hidden">{index + 1}</span>
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
66
admin-lehrer/components/ocr-kombi/SessionHeader.tsx
Normal file
66
admin-lehrer/components/ocr-kombi/SessionHeader.tsx
Normal file
@@ -0,0 +1,66 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface SessionHeaderProps {
|
||||
sessionName: string
|
||||
activeCategory?: DocumentCategory
|
||||
isGroundTruth: boolean
|
||||
onUpdateCategory: (category: DocumentCategory) => void
|
||||
}
|
||||
|
||||
export function SessionHeader({
|
||||
sessionName,
|
||||
activeCategory,
|
||||
isGroundTruth,
|
||||
onUpdateCategory,
|
||||
}: SessionHeaderProps) {
|
||||
const [showCategoryPicker, setShowCategoryPicker] = useState(false)
|
||||
|
||||
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
||||
|
||||
return (
|
||||
<div className="relative flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
||||
<span>
|
||||
Aktive Session:{' '}
|
||||
<span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span>
|
||||
</span>
|
||||
<button
|
||||
onClick={() => setShowCategoryPicker(!showCategoryPicker)}
|
||||
className={`text-xs px-2.5 py-1 rounded-full border transition-colors ${
|
||||
activeCategory
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300 hover:bg-teal-100'
|
||||
: 'bg-amber-50 dark:bg-amber-900/20 border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300 hover:bg-amber-100 animate-pulse'
|
||||
}`}
|
||||
>
|
||||
{catInfo ? `${catInfo.icon} ${catInfo.label}` : 'Kategorie setzen'}
|
||||
</button>
|
||||
{isGroundTruth && (
|
||||
<span className="text-xs px-2 py-0.5 rounded-full bg-amber-50 dark:bg-amber-900/20 border border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300">
|
||||
GT
|
||||
</span>
|
||||
)}
|
||||
{showCategoryPicker && (
|
||||
<div className="absolute left-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64">
|
||||
{DOCUMENT_CATEGORIES.map(cat => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => {
|
||||
onUpdateCategory(cat.value)
|
||||
setShowCategoryPicker(false)
|
||||
}}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
activeCategory === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
361
admin-lehrer/components/ocr-kombi/SessionList.tsx
Normal file
361
admin-lehrer/components/ocr-kombi/SessionList.tsx
Normal file
@@ -0,0 +1,361 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import type { SessionListItem, DocumentGroupView } from '@/app/(admin)/ai/ocr-kombi/useKombiPipeline'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface SessionListProps {
|
||||
items: (SessionListItem | DocumentGroupView)[]
|
||||
loading: boolean
|
||||
activeSessionId: string | null
|
||||
onOpenSession: (sid: string) => void
|
||||
onNewSession: () => void
|
||||
onDeleteSession: (sid: string) => void
|
||||
onRenameSession: (sid: string, newName: string) => void
|
||||
onUpdateCategory: (sid: string, category: DocumentCategory) => void
|
||||
}
|
||||
|
||||
function isGroup(item: SessionListItem | DocumentGroupView): item is DocumentGroupView {
|
||||
return 'group_id' in item
|
||||
}
|
||||
|
||||
export function SessionList({
|
||||
items,
|
||||
loading,
|
||||
activeSessionId,
|
||||
onOpenSession,
|
||||
onNewSession,
|
||||
onDeleteSession,
|
||||
onRenameSession,
|
||||
onUpdateCategory,
|
||||
}: SessionListProps) {
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
||||
const [expandedGroups, setExpandedGroups] = useState<Set<string>>(new Set())
|
||||
|
||||
const toggleGroup = (groupId: string) => {
|
||||
setExpandedGroups(prev => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(groupId)) next.delete(groupId)
|
||||
else next.add(groupId)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Sessions ({items.length})
|
||||
</h3>
|
||||
<button
|
||||
onClick={onNewSession}
|
||||
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
+ Neue Session
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{loading ? (
|
||||
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||
) : items.length === 0 ? (
|
||||
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||
) : (
|
||||
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
||||
{items.map(item =>
|
||||
isGroup(item) ? (
|
||||
<GroupRow
|
||||
key={item.group_id}
|
||||
group={item}
|
||||
expanded={expandedGroups.has(item.group_id)}
|
||||
activeSessionId={activeSessionId}
|
||||
onToggle={() => toggleGroup(item.group_id)}
|
||||
onOpenSession={onOpenSession}
|
||||
onDeleteSession={onDeleteSession}
|
||||
/>
|
||||
) : (
|
||||
<SessionRow
|
||||
key={item.id}
|
||||
session={item}
|
||||
isActive={activeSessionId === item.id}
|
||||
editingName={editingName}
|
||||
editNameValue={editNameValue}
|
||||
editingCategory={editingCategory}
|
||||
onOpenSession={() => onOpenSession(item.id)}
|
||||
onStartRename={() => {
|
||||
setEditNameValue(item.name || item.filename)
|
||||
setEditingName(item.id)
|
||||
}}
|
||||
onFinishRename={(newName) => {
|
||||
onRenameSession(item.id, newName)
|
||||
setEditingName(null)
|
||||
}}
|
||||
onCancelRename={() => setEditingName(null)}
|
||||
onEditNameChange={setEditNameValue}
|
||||
onToggleCategory={() => setEditingCategory(editingCategory === item.id ? null : item.id)}
|
||||
onUpdateCategory={(cat) => {
|
||||
onUpdateCategory(item.id, cat)
|
||||
setEditingCategory(null)
|
||||
}}
|
||||
onDelete={() => {
|
||||
if (confirm('Session loeschen?')) onDeleteSession(item.id)
|
||||
}}
|
||||
/>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---- Group row (multi-page document) ----
|
||||
|
||||
function GroupRow({
|
||||
group,
|
||||
expanded,
|
||||
activeSessionId,
|
||||
onToggle,
|
||||
onOpenSession,
|
||||
onDeleteSession,
|
||||
}: {
|
||||
group: DocumentGroupView
|
||||
expanded: boolean
|
||||
activeSessionId: string | null
|
||||
onToggle: () => void
|
||||
onOpenSession: (sid: string) => void
|
||||
onDeleteSession: (sid: string) => void
|
||||
}) {
|
||||
const isActive = group.sessions.some(s => s.id === activeSessionId)
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div
|
||||
onClick={onToggle}
|
||||
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm cursor-pointer transition-colors ${
|
||||
isActive
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="text-base">{expanded ? '\u25BC' : '\u25B6'}</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{group.title}
|
||||
</div>
|
||||
<div className="text-xs text-gray-400">
|
||||
{group.page_count} Seiten
|
||||
</div>
|
||||
</div>
|
||||
<span className="text-xs px-2 py-0.5 rounded-full bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 text-blue-600 dark:text-blue-400">
|
||||
Dokument
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{expanded && (
|
||||
<div className="ml-6 mt-1 space-y-1 border-l-2 border-gray-200 dark:border-gray-700 pl-3">
|
||||
{group.sessions.map(s => (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`flex items-center gap-2 px-2 py-1.5 rounded text-xs cursor-pointer transition-colors ${
|
||||
activeSessionId === s.id
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
onClick={() => onOpenSession(s.id)}
|
||||
>
|
||||
{/* Thumbnail */}
|
||||
<div className="flex-shrink-0 w-8 h-8 rounded overflow-hidden bg-gray-100 dark:bg-gray-700">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=64`}
|
||||
alt=""
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||
/>
|
||||
</div>
|
||||
<span className="truncate flex-1">S. {s.page_number || '?'}</span>
|
||||
<span className="text-[10px] text-gray-400">Step {s.current_step}</span>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Seite loeschen?')) onDeleteSession(s.id)
|
||||
}}
|
||||
className="p-0.5 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---- Single session row ----
|
||||
|
||||
function SessionRow({
|
||||
session,
|
||||
isActive,
|
||||
editingName,
|
||||
editNameValue,
|
||||
editingCategory,
|
||||
onOpenSession,
|
||||
onStartRename,
|
||||
onFinishRename,
|
||||
onCancelRename,
|
||||
onEditNameChange,
|
||||
onToggleCategory,
|
||||
onUpdateCategory,
|
||||
onDelete,
|
||||
}: {
|
||||
session: SessionListItem
|
||||
isActive: boolean
|
||||
editingName: string | null
|
||||
editNameValue: string
|
||||
editingCategory: string | null
|
||||
onOpenSession: () => void
|
||||
onStartRename: () => void
|
||||
onFinishRename: (name: string) => void
|
||||
onCancelRename: () => void
|
||||
onEditNameChange: (val: string) => void
|
||||
onToggleCategory: () => void
|
||||
onUpdateCategory: (cat: DocumentCategory) => void
|
||||
onDelete: () => void
|
||||
}) {
|
||||
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === session.document_category)
|
||||
const isEditing = editingName === session.id
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||
isActive
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
{/* Thumbnail */}
|
||||
<div
|
||||
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
||||
onClick={onOpenSession}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${session.id}/thumbnail?size=96`}
|
||||
alt=""
|
||||
className="w-full h-full object-cover"
|
||||
loading="lazy"
|
||||
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Info */}
|
||||
<div className="flex-1 min-w-0" onClick={onOpenSession}>
|
||||
{isEditing ? (
|
||||
<input
|
||||
autoFocus
|
||||
value={editNameValue}
|
||||
onChange={(e) => onEditNameChange(e.target.value)}
|
||||
onBlur={() => onFinishRename(editNameValue)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') onFinishRename(editNameValue)
|
||||
if (e.key === 'Escape') onCancelRename()
|
||||
}}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||
/>
|
||||
) : (
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{session.name || session.filename}
|
||||
</div>
|
||||
)}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
navigator.clipboard.writeText(session.id)
|
||||
const btn = e.currentTarget
|
||||
btn.textContent = 'Kopiert!'
|
||||
setTimeout(() => { btn.textContent = `ID: ${session.id.slice(0, 8)}` }, 1500)
|
||||
}}
|
||||
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
||||
title={`Volle ID: ${session.id} — Klick zum Kopieren`}
|
||||
>
|
||||
ID: {session.id.slice(0, 8)}
|
||||
</button>
|
||||
<div className="text-xs text-gray-400 mt-0.5">
|
||||
{new Date(session.created_at).toLocaleDateString('de-DE', {
|
||||
day: '2-digit', month: '2-digit', year: '2-digit',
|
||||
hour: '2-digit', minute: '2-digit',
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Category badge */}
|
||||
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
||||
<button
|
||||
onClick={onToggleCategory}
|
||||
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
||||
catInfo
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600'
|
||||
}`}
|
||||
title="Kategorie setzen"
|
||||
>
|
||||
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onStartRename() }}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||
title="Umbenennen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onDelete() }}
|
||||
className="p-1 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Category dropdown */}
|
||||
{editingCategory === session.id && (
|
||||
<div
|
||||
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{DOCUMENT_CATEGORIES.map(cat => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => onUpdateCategory(cat.value)}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
session.document_category === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
||||
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
13
admin-lehrer/components/ocr-kombi/StepContentCrop.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepContentCrop.tsx
Normal file
@@ -0,0 +1,13 @@
|
||||
'use client'
|
||||
|
||||
import { StepCrop as BaseStepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
|
||||
interface StepContentCropProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/** Thin wrapper around the shared StepCrop component */
|
||||
export function StepContentCrop({ sessionId, onNext }: StepContentCropProps) {
|
||||
return <BaseStepCrop key={sessionId} sessionId={sessionId} onNext={onNext} />
|
||||
}
|
||||
13
admin-lehrer/components/ocr-kombi/StepDeskew.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepDeskew.tsx
Normal file
@@ -0,0 +1,13 @@
|
||||
'use client'
|
||||
|
||||
import { StepDeskew as BaseStepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
|
||||
interface StepDeskewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/** Thin wrapper around the shared StepDeskew component */
|
||||
export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
|
||||
return <BaseStepDeskew key={sessionId} sessionId={sessionId} onNext={onNext} />
|
||||
}
|
||||
13
admin-lehrer/components/ocr-kombi/StepDewarp.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepDewarp.tsx
Normal file
@@ -0,0 +1,13 @@
|
||||
'use client'
|
||||
|
||||
import { StepDewarp as BaseStepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
|
||||
interface StepDewarpProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/** Thin wrapper around the shared StepDewarp component */
|
||||
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||
return <BaseStepDewarp key={sessionId} sessionId={sessionId} onNext={onNext} />
|
||||
}
|
||||
109
admin-lehrer/components/ocr-kombi/StepGridBuild.tsx
Normal file
109
admin-lehrer/components/ocr-kombi/StepGridBuild.tsx
Normal file
@@ -0,0 +1,109 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepGridBuildProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Step 9: Grid Build.
|
||||
* Triggers the build-grid endpoint and shows progress.
|
||||
*/
|
||||
export function StepGridBuild({ sessionId, onNext }: StepGridBuildProps) {
|
||||
const [building, setBuilding] = useState(false)
|
||||
const [result, setResult] = useState<{ rows: number; cols: number; cells: number } | null>(null)
|
||||
const [error, setError] = useState('')
|
||||
const [autoTriggered, setAutoTriggered] = useState(false)
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId || autoTriggered) return
|
||||
// Check if grid already exists
|
||||
checkExistingGrid()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const checkExistingGrid = async () => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
if (data.grid_shape) {
|
||||
setResult({ rows: data.grid_shape.rows, cols: data.grid_shape.cols, cells: data.grid_shape.total_cells })
|
||||
return
|
||||
}
|
||||
}
|
||||
} catch { /* no existing grid */ }
|
||||
|
||||
// Auto-trigger build
|
||||
setAutoTriggered(true)
|
||||
buildGrid()
|
||||
}
|
||||
|
||||
const buildGrid = async () => {
|
||||
if (!sessionId) return
|
||||
setBuilding(true)
|
||||
setError('')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `Grid-Build fehlgeschlagen (${res.status})`)
|
||||
}
|
||||
const data = await res.json()
|
||||
const shape = data.grid_shape || { rows: 0, cols: 0, total_cells: 0 }
|
||||
setResult({ rows: shape.rows, cols: shape.cols, cells: shape.total_cells })
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setBuilding(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{building && (
|
||||
<div className="flex items-center gap-3 p-6 bg-blue-50 dark:bg-blue-900/20 rounded-xl border border-blue-200 dark:border-blue-800">
|
||||
<div className="animate-spin w-5 h-5 border-2 border-blue-400 border-t-transparent rounded-full" />
|
||||
<span className="text-sm text-blue-600 dark:text-blue-400">Grid wird aufgebaut...</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{result && (
|
||||
<div className="space-y-3">
|
||||
<div className="p-4 bg-green-50 dark:bg-green-900/20 rounded-xl border border-green-200 dark:border-green-800">
|
||||
<div className="text-sm font-medium text-green-700 dark:text-green-300">
|
||||
Grid erstellt: {result.rows} Zeilen, {result.cols} Spalten, {result.cells} Zellen
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700"
|
||||
>
|
||||
Weiter zum Review
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="space-y-3">
|
||||
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 p-3 rounded-lg">
|
||||
{error}
|
||||
</div>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="px-4 py-2 bg-orange-600 text-white text-sm rounded-lg hover:bg-orange-700"
|
||||
>
|
||||
Erneut versuchen
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
15
admin-lehrer/components/ocr-kombi/StepGridReview.tsx
Normal file
15
admin-lehrer/components/ocr-kombi/StepGridReview.tsx
Normal file
@@ -0,0 +1,15 @@
|
||||
'use client'
|
||||
|
||||
import { StepGridReview as BaseStepGridReview } from '@/components/ocr-pipeline/StepGridReview'
|
||||
import type { MutableRefObject } from 'react'
|
||||
|
||||
interface StepGridReviewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
saveRef: MutableRefObject<(() => Promise<void>) | null>
|
||||
}
|
||||
|
||||
/** Thin wrapper around the shared StepGridReview component */
|
||||
export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewProps) {
|
||||
return <BaseStepGridReview sessionId={sessionId} onNext={onNext} saveRef={saveRef} />
|
||||
}
|
||||
74
admin-lehrer/components/ocr-kombi/StepGroundTruth.tsx
Normal file
74
admin-lehrer/components/ocr-kombi/StepGroundTruth.tsx
Normal file
@@ -0,0 +1,74 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepGroundTruthProps {
|
||||
sessionId: string | null
|
||||
isGroundTruth: boolean
|
||||
onMarked: () => void
|
||||
gridSaveRef: React.MutableRefObject<(() => Promise<void>) | null>
|
||||
}
|
||||
|
||||
/**
|
||||
* Step 11: Ground Truth marking.
|
||||
* Saves the current grid as reference data for regression tests.
|
||||
*/
|
||||
export function StepGroundTruth({ sessionId, isGroundTruth, onMarked, gridSaveRef }: StepGroundTruthProps) {
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [message, setMessage] = useState('')
|
||||
|
||||
const handleMark = async () => {
|
||||
if (!sessionId) return
|
||||
setSaving(true)
|
||||
setMessage('')
|
||||
try {
|
||||
// Auto-save grid editor before marking
|
||||
if (gridSaveRef.current) {
|
||||
await gridSaveRef.current()
|
||||
}
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=kombi`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '')
|
||||
throw new Error(`Ground Truth fehlgeschlagen (${res.status}): ${body}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setMessage(`Ground Truth gespeichert (${data.cells_saved} Zellen)`)
|
||||
onMarked()
|
||||
} catch (e) {
|
||||
setMessage(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4 p-6 bg-amber-50 dark:bg-amber-900/10 rounded-xl border border-amber-200 dark:border-amber-800">
|
||||
<h3 className="text-sm font-medium text-amber-700 dark:text-amber-300">
|
||||
Ground Truth
|
||||
</h3>
|
||||
<p className="text-sm text-amber-600 dark:text-amber-400">
|
||||
Markiert die aktuelle Grid-Ausgabe als Referenz fuer Regressionstests.
|
||||
{isGroundTruth && ' Diese Session ist bereits als Ground Truth markiert.'}
|
||||
</p>
|
||||
|
||||
<button
|
||||
onClick={handleMark}
|
||||
disabled={saving}
|
||||
className="px-4 py-2 text-sm bg-amber-600 text-white rounded-lg hover:bg-amber-700 disabled:opacity-50"
|
||||
>
|
||||
{saving ? 'Speichere...' : isGroundTruth ? 'Ground Truth aktualisieren' : 'Als Ground Truth markieren'}
|
||||
</button>
|
||||
|
||||
{message && (
|
||||
<div className={`text-sm ${message.includes('fehlgeschlagen') ? 'text-red-500' : 'text-amber-600 dark:text-amber-400'}`}>
|
||||
{message}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
30
admin-lehrer/components/ocr-kombi/StepOcr.tsx
Normal file
30
admin-lehrer/components/ocr-kombi/StepOcr.tsx
Normal file
@@ -0,0 +1,30 @@
|
||||
'use client'
|
||||
|
||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||
|
||||
interface StepOcrProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Step 7: OCR (Kombi mode = PaddleOCR + Tesseract).
|
||||
*
|
||||
* Phase 1: Uses the existing PaddleDirectStep with kombi endpoint.
|
||||
* Phase 3 (later) will add transparent 3-phase progress + engine comparison.
|
||||
*/
|
||||
export function StepOcr({ sessionId, onNext }: StepOcrProps) {
|
||||
return (
|
||||
<PaddleDirectStep
|
||||
sessionId={sessionId}
|
||||
onNext={onNext}
|
||||
endpoint="paddle-kombi"
|
||||
title="Kombi-Modus"
|
||||
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
||||
icon="🔀"
|
||||
buttonLabel="PP-OCRv5 + Tesseract starten"
|
||||
runningLabel="PP-OCRv5 + Tesseract laufen..."
|
||||
engineKey="kombi"
|
||||
/>
|
||||
)
|
||||
}
|
||||
21
admin-lehrer/components/ocr-kombi/StepOrientation.tsx
Normal file
21
admin-lehrer/components/ocr-kombi/StepOrientation.tsx
Normal file
@@ -0,0 +1,21 @@
|
||||
'use client'
|
||||
|
||||
import { StepOrientation as BaseStepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||
|
||||
interface StepOrientationProps {
|
||||
sessionId: string | null
|
||||
onNext: (sessionId: string) => void
|
||||
onSessionList: () => void
|
||||
}
|
||||
|
||||
/** Thin wrapper around the shared StepOrientation component */
|
||||
export function StepOrientation({ sessionId, onNext, onSessionList }: StepOrientationProps) {
|
||||
return (
|
||||
<BaseStepOrientation
|
||||
key={sessionId}
|
||||
sessionId={sessionId}
|
||||
onNext={onNext}
|
||||
onSessionList={onSessionList}
|
||||
/>
|
||||
)
|
||||
}
|
||||
123
admin-lehrer/components/ocr-kombi/StepPageSplit.tsx
Normal file
123
admin-lehrer/components/ocr-kombi/StepPageSplit.tsx
Normal file
@@ -0,0 +1,123 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepPageSplitProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
onSubSessionsCreated: (subs: SubSession[]) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Step 3: Page split detection.
|
||||
* Checks if the image is a double-page spread and offers to split it.
|
||||
* If no split needed, auto-advances.
|
||||
*/
|
||||
export function StepPageSplit({ sessionId, onNext, onSubSessionsCreated }: StepPageSplitProps) {
|
||||
const [checking, setChecking] = useState(false)
|
||||
const [splitResult, setSplitResult] = useState<{ is_double_page: boolean; pages?: number } | null>(null)
|
||||
const [splitting, setSplitting] = useState(false)
|
||||
const [error, setError] = useState('')
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
// Auto-check for page split
|
||||
checkPageSplit()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const checkPageSplit = async () => {
|
||||
if (!sessionId) return
|
||||
setChecking(true)
|
||||
setError('')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) throw new Error('Session nicht gefunden')
|
||||
const data = await res.json()
|
||||
|
||||
// If sub-sessions already exist, this was already split
|
||||
if (data.sub_sessions?.length > 0) {
|
||||
onSubSessionsCreated(data.sub_sessions)
|
||||
onNext()
|
||||
return
|
||||
}
|
||||
|
||||
// Check aspect ratio to guess if double-page
|
||||
// For now, just auto-advance (page-split detection happens in orientation step)
|
||||
setSplitResult({ is_double_page: false })
|
||||
// Auto-advance if single page
|
||||
onNext()
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setChecking(false)
|
||||
}
|
||||
}
|
||||
|
||||
const handleSplit = async () => {
|
||||
if (!sessionId) return
|
||||
setSplitting(true)
|
||||
setError('')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/page-split`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || 'Split fehlgeschlagen')
|
||||
}
|
||||
const data = await res.json()
|
||||
if (data.sub_sessions?.length > 0) {
|
||||
onSubSessionsCreated(data.sub_sessions)
|
||||
}
|
||||
onNext()
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setSplitting(false)
|
||||
}
|
||||
}
|
||||
|
||||
if (checking) {
|
||||
return <div className="text-sm text-gray-500 py-8 text-center">Pruefe Seitenformat...</div>
|
||||
}
|
||||
|
||||
if (splitResult?.is_double_page) {
|
||||
return (
|
||||
<div className="space-y-4 p-6 bg-blue-50 dark:bg-blue-900/20 rounded-xl border border-blue-200 dark:border-blue-800">
|
||||
<h3 className="text-sm font-medium text-blue-700 dark:text-blue-300">
|
||||
Doppelseite erkannt
|
||||
</h3>
|
||||
<p className="text-sm text-blue-600 dark:text-blue-400">
|
||||
Das Bild scheint eine Doppelseite zu sein. Soll es in zwei Einzelseiten aufgeteilt werden?
|
||||
</p>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={handleSplit}
|
||||
disabled={splitting}
|
||||
className="px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 disabled:opacity-50"
|
||||
>
|
||||
{splitting ? 'Wird aufgeteilt...' : 'Aufteilen'}
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-gray-200 dark:bg-gray-700 text-sm rounded-lg hover:bg-gray-300"
|
||||
>
|
||||
Einzelseite beibehalten
|
||||
</button>
|
||||
</div>
|
||||
{error && <div className="text-sm text-red-500">{error}</div>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="text-sm text-gray-500 py-8 text-center">
|
||||
Einzelseite erkannt — weiter zum naechsten Schritt.
|
||||
{error && <div className="text-sm text-red-500 mt-2">{error}</div>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
13
admin-lehrer/components/ocr-kombi/StepStructure.tsx
Normal file
13
admin-lehrer/components/ocr-kombi/StepStructure.tsx
Normal file
@@ -0,0 +1,13 @@
|
||||
'use client'
|
||||
|
||||
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||
|
||||
interface StepStructureProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/** Thin wrapper around the shared StepStructureDetection component */
|
||||
export function StepStructure({ sessionId, onNext }: StepStructureProps) {
|
||||
return <StepStructureDetection sessionId={sessionId} onNext={onNext} />
|
||||
}
|
||||
147
admin-lehrer/components/ocr-kombi/StepUpload.tsx
Normal file
147
admin-lehrer/components/ocr-kombi/StepUpload.tsx
Normal file
@@ -0,0 +1,147 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useCallback } from 'react'
|
||||
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepUploadProps {
|
||||
onUploaded: (sessionId: string) => void
|
||||
}
|
||||
|
||||
export function StepUpload({ onUploaded }: StepUploadProps) {
|
||||
const [dragging, setDragging] = useState(false)
|
||||
const [uploading, setUploading] = useState(false)
|
||||
const [title, setTitle] = useState('')
|
||||
const [category, setCategory] = useState<DocumentCategory>('vokabelseite')
|
||||
const [error, setError] = useState('')
|
||||
|
||||
const handleUpload = useCallback(async (file: File) => {
|
||||
setUploading(true)
|
||||
setError('')
|
||||
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
if (title.trim()) formData.append('name', title.trim())
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `Upload fehlgeschlagen (${res.status})`)
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
const sid = data.session_id || data.id
|
||||
|
||||
// Set category
|
||||
if (category) {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_category: category }),
|
||||
})
|
||||
}
|
||||
|
||||
onUploaded(sid)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setUploading(false)
|
||||
}
|
||||
}, [title, category, onUploaded])
|
||||
|
||||
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||
e.preventDefault()
|
||||
setDragging(false)
|
||||
const file = e.dataTransfer.files[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
const handleFileSelect = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0]
|
||||
if (file) handleUpload(file)
|
||||
}, [handleUpload])
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Title input */}
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||
Titel (optional)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={title}
|
||||
onChange={(e) => setTitle(e.target.value)}
|
||||
placeholder="z.B. Vokabeln Unit 3"
|
||||
className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-gray-800 text-sm"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Category selector */}
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||
Kategorie
|
||||
</label>
|
||||
<div className="grid grid-cols-4 gap-1.5">
|
||||
{DOCUMENT_CATEGORIES.map(cat => (
|
||||
<button
|
||||
key={cat.value}
|
||||
onClick={() => setCategory(cat.value)}
|
||||
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
||||
category === cat.value
|
||||
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300 ring-1 ring-teal-400'
|
||||
: 'bg-gray-50 dark:bg-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-100'
|
||||
}`}
|
||||
>
|
||||
{cat.icon} {cat.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Drop zone */}
|
||||
<div
|
||||
onDragOver={(e) => { e.preventDefault(); setDragging(true) }}
|
||||
onDragLeave={() => setDragging(false)}
|
||||
onDrop={handleDrop}
|
||||
className={`border-2 border-dashed rounded-xl p-12 text-center transition-colors ${
|
||||
dragging
|
||||
? 'border-teal-400 bg-teal-50 dark:bg-teal-900/20'
|
||||
: 'border-gray-300 dark:border-gray-600 hover:border-gray-400'
|
||||
}`}
|
||||
>
|
||||
{uploading ? (
|
||||
<div className="text-sm text-gray-500">Wird hochgeladen...</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-4xl mb-3">📤</div>
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400 mb-2">
|
||||
Bild oder PDF hierher ziehen
|
||||
</div>
|
||||
<label className="inline-block px-4 py-2 bg-teal-600 text-white text-sm rounded-lg cursor-pointer hover:bg-teal-700">
|
||||
Datei auswaehlen
|
||||
<input
|
||||
type="file"
|
||||
accept="image/*,.pdf"
|
||||
onChange={handleFileSelect}
|
||||
className="hidden"
|
||||
/>
|
||||
</label>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 p-3 rounded-lg">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -150,9 +150,18 @@ export const navigation: NavCategory[] = [
|
||||
audience: ['Entwickler', 'Data Scientists'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-kombi',
|
||||
name: 'OCR Kombi',
|
||||
href: '/ai/ocr-kombi',
|
||||
description: 'Modulare 11-Schritt-Pipeline',
|
||||
purpose: 'Modulare OCR-Pipeline mit Dual-Engine (PP-OCRv5 + Tesseract), Strukturerkennung, Grid-Aufbau und Review. Multi-Page-Dokument-Unterstuetzung.',
|
||||
audience: ['Entwickler'],
|
||||
subgroup: 'KI-Werkzeuge',
|
||||
},
|
||||
{
|
||||
id: 'ocr-overlay',
|
||||
name: 'OCR Overlay',
|
||||
name: 'OCR Overlay (Legacy)',
|
||||
href: '/ai/ocr-overlay',
|
||||
description: 'Ganzseitige Overlay-Rekonstruktion',
|
||||
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
|
||||
|
||||
@@ -46,6 +46,7 @@ from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeli
|
||||
from grid_editor_api import router as grid_editor_router
|
||||
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
|
||||
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||
from ocr_kombi.router import router as ocr_kombi_router
|
||||
try:
|
||||
from handwriting_htr_api import router as htr_router
|
||||
except ImportError:
|
||||
@@ -186,6 +187,7 @@ if htr_router:
|
||||
app.include_router(htr_router) # Handwriting HTR (Klausur)
|
||||
if dsfa_rag_router:
|
||||
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
||||
app.include_router(ocr_kombi_router) # OCR Kombi Pipeline (modular)
|
||||
|
||||
|
||||
# =============================================
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
-- Migration: Add document_group_id and page_number for multi-page document grouping.
|
||||
-- A document_group_id groups multiple sessions that belong to the same scanned document.
|
||||
-- page_number is the 1-based page index within the group.
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions
|
||||
ADD COLUMN IF NOT EXISTS document_group_id UUID,
|
||||
ADD COLUMN IF NOT EXISTS page_number INT;
|
||||
|
||||
-- Index for efficient group lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_sessions_document_group
|
||||
ON ocr_pipeline_sessions (document_group_id)
|
||||
WHERE document_group_id IS NOT NULL;
|
||||
1
klausur-service/backend/ocr_kombi/__init__.py
Normal file
1
klausur-service/backend/ocr_kombi/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""OCR Kombi Pipeline - modular step-based OCR processing."""
|
||||
19
klausur-service/backend/ocr_kombi/router.py
Normal file
19
klausur-service/backend/ocr_kombi/router.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Composite router for the OCR Kombi pipeline.
|
||||
|
||||
Aggregates step-specific sub-routers into one router for main.py to include.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
from .step_upload import router as upload_router
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-kombi", tags=["ocr-kombi"])
|
||||
|
||||
# Include step-specific routes
|
||||
router.include_router(upload_router)
|
||||
|
||||
# Future steps will be added here:
|
||||
# from .step_orientation import router as orientation_router
|
||||
# router.include_router(orientation_router)
|
||||
# ...
|
||||
132
klausur-service/backend/ocr_kombi/step_upload.py
Normal file
132
klausur-service/backend/ocr_kombi/step_upload.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Step 1: Upload — handles single images and multi-page PDFs.
|
||||
|
||||
Multi-page PDFs are split into individual PNG pages, each getting its own
|
||||
session linked by a shared document_group_id.
|
||||
"""
|
||||
|
||||
import io
|
||||
import uuid
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
|
||||
|
||||
from ocr_pipeline_session_store import create_session_db, get_document_group_sessions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _pdf_to_pngs(pdf_bytes: bytes) -> list[bytes]:
|
||||
"""Convert a PDF to a list of PNG byte buffers (one per page)."""
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
except ImportError:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="PDF-Verarbeitung nicht verfuegbar (PyMuPDF fehlt)"
|
||||
)
|
||||
|
||||
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||
pages: list[bytes] = []
|
||||
for page in doc:
|
||||
# Render at 300 DPI for OCR quality
|
||||
mat = fitz.Matrix(300 / 72, 300 / 72)
|
||||
pix = page.get_pixmap(matrix=mat)
|
||||
pages.append(pix.tobytes("png"))
|
||||
doc.close()
|
||||
return pages
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_document(
|
||||
file: UploadFile = File(...),
|
||||
name: Optional[str] = Form(None),
|
||||
document_category: Optional[str] = Form(None),
|
||||
):
|
||||
"""Upload a single image or multi-page PDF.
|
||||
|
||||
Single image: Creates 1 session with document_group_id + page_number=1.
|
||||
Multi-page PDF: Creates N sessions with shared document_group_id,
|
||||
page_number 1..N, and titles "Title — S. X".
|
||||
"""
|
||||
t0 = time.time()
|
||||
file_bytes = await file.read()
|
||||
filename = file.filename or "upload"
|
||||
base_title = name or filename.rsplit(".", 1)[0]
|
||||
|
||||
is_pdf = (
|
||||
filename.lower().endswith(".pdf")
|
||||
or file.content_type == "application/pdf"
|
||||
or file_bytes[:4] == b"%PDF"
|
||||
)
|
||||
|
||||
group_id = str(uuid.uuid4())
|
||||
created_sessions = []
|
||||
|
||||
if is_pdf:
|
||||
pages = _pdf_to_pngs(file_bytes)
|
||||
if not pages:
|
||||
raise HTTPException(status_code=400, detail="PDF enthaelt keine Seiten")
|
||||
|
||||
for i, png_bytes in enumerate(pages, start=1):
|
||||
session_id = str(uuid.uuid4())
|
||||
page_title = f"{base_title} — S. {i}" if len(pages) > 1 else base_title
|
||||
session = await create_session_db(
|
||||
session_id=session_id,
|
||||
name=page_title,
|
||||
filename=filename,
|
||||
original_png=png_bytes,
|
||||
document_group_id=group_id,
|
||||
page_number=i,
|
||||
)
|
||||
created_sessions.append({
|
||||
"session_id": session["id"],
|
||||
"name": session["name"],
|
||||
"page_number": i,
|
||||
})
|
||||
else:
|
||||
# Single image
|
||||
session_id = str(uuid.uuid4())
|
||||
session = await create_session_db(
|
||||
session_id=session_id,
|
||||
name=base_title,
|
||||
filename=filename,
|
||||
original_png=file_bytes,
|
||||
document_group_id=group_id,
|
||||
page_number=1,
|
||||
)
|
||||
created_sessions.append({
|
||||
"session_id": session["id"],
|
||||
"name": session["name"],
|
||||
"page_number": 1,
|
||||
})
|
||||
|
||||
duration = round(time.time() - t0, 2)
|
||||
logger.info(
|
||||
"Upload complete: %d page(s), group=%s, %.2fs",
|
||||
len(created_sessions), group_id, duration,
|
||||
)
|
||||
|
||||
return {
|
||||
"document_group_id": group_id,
|
||||
"page_count": len(created_sessions),
|
||||
"sessions": created_sessions,
|
||||
"duration_seconds": duration,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/documents/{group_id}")
|
||||
async def get_document_group(group_id: str):
|
||||
"""Get all sessions in a document group, sorted by page_number."""
|
||||
sessions = await get_document_group_sessions(group_id)
|
||||
if not sessions:
|
||||
raise HTTPException(status_code=404, detail="Dokumentgruppe nicht gefunden")
|
||||
return {
|
||||
"document_group_id": group_id,
|
||||
"page_count": len(sessions),
|
||||
"sessions": sessions,
|
||||
}
|
||||
@@ -76,7 +76,16 @@ async def init_ocr_pipeline_tables():
|
||||
ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE,
|
||||
ADD COLUMN IF NOT EXISTS box_index INT,
|
||||
ADD COLUMN IF NOT EXISTS grid_editor_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS structure_result JSONB
|
||||
ADD COLUMN IF NOT EXISTS structure_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS document_group_id UUID,
|
||||
ADD COLUMN IF NOT EXISTS page_number INT
|
||||
""")
|
||||
|
||||
# Index for document group lookups
|
||||
await conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_sessions_document_group
|
||||
ON ocr_pipeline_sessions (document_group_id)
|
||||
WHERE document_group_id IS NOT NULL
|
||||
""")
|
||||
|
||||
|
||||
@@ -91,21 +100,26 @@ async def create_session_db(
|
||||
original_png: bytes,
|
||||
parent_session_id: Optional[str] = None,
|
||||
box_index: Optional[int] = None,
|
||||
document_group_id: Optional[str] = None,
|
||||
page_number: Optional[int] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new OCR pipeline session.
|
||||
|
||||
Args:
|
||||
parent_session_id: If set, this is a sub-session for a box region.
|
||||
box_index: 0-based index of the box this sub-session represents.
|
||||
document_group_id: Groups multi-page uploads into one document.
|
||||
page_number: 1-based page index within the document group.
|
||||
"""
|
||||
pool = await get_pool()
|
||||
parent_uuid = uuid.UUID(parent_session_id) if parent_session_id else None
|
||||
group_uuid = uuid.UUID(document_group_id) if document_group_id else None
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO ocr_pipeline_sessions (
|
||||
id, name, filename, original_png, status, current_step,
|
||||
parent_session_id, box_index
|
||||
) VALUES ($1, $2, $3, $4, 'active', 1, $5, $6)
|
||||
parent_session_id, box_index, document_group_id, page_number
|
||||
) VALUES ($1, $2, $3, $4, 'active', 1, $5, $6, $7, $8)
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
orientation_result, crop_result,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
@@ -114,9 +128,10 @@ async def create_session_db(
|
||||
document_category, pipeline_log,
|
||||
grid_editor_result, structure_result,
|
||||
parent_session_id, box_index,
|
||||
document_group_id, page_number,
|
||||
created_at, updated_at
|
||||
""", uuid.UUID(session_id), name, filename, original_png,
|
||||
parent_uuid, box_index)
|
||||
parent_uuid, box_index, group_uuid, page_number)
|
||||
|
||||
return _row_to_dict(row)
|
||||
|
||||
@@ -134,6 +149,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
document_category, pipeline_log,
|
||||
grid_editor_result, structure_result,
|
||||
parent_session_id, box_index,
|
||||
document_group_id, page_number,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions WHERE id = $1
|
||||
""", uuid.UUID(session_id))
|
||||
@@ -186,6 +202,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
'document_category', 'pipeline_log',
|
||||
'grid_editor_result', 'structure_result',
|
||||
'parent_session_id', 'box_index',
|
||||
'document_group_id', 'page_number',
|
||||
}
|
||||
|
||||
jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result', 'structure_result'}
|
||||
@@ -217,8 +234,9 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
grid_editor_result,
|
||||
grid_editor_result, structure_result,
|
||||
parent_session_id, box_index,
|
||||
document_group_id, page_number,
|
||||
created_at, updated_at
|
||||
""", *values)
|
||||
|
||||
@@ -243,6 +261,7 @@ async def list_sessions_db(
|
||||
SELECT id, name, filename, status, current_step,
|
||||
document_category, doc_type,
|
||||
parent_session_id, box_index,
|
||||
document_group_id, page_number,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions
|
||||
{where}
|
||||
@@ -261,6 +280,7 @@ async def get_sub_sessions(parent_session_id: str) -> List[Dict[str, Any]]:
|
||||
SELECT id, name, filename, status, current_step,
|
||||
document_category, doc_type,
|
||||
parent_session_id, box_index,
|
||||
document_group_id, page_number,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions
|
||||
WHERE parent_session_id = $1
|
||||
@@ -270,6 +290,24 @@ async def get_sub_sessions(parent_session_id: str) -> List[Dict[str, Any]]:
|
||||
return [_row_to_dict(row) for row in rows]
|
||||
|
||||
|
||||
async def get_document_group_sessions(document_group_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get all sessions in a document group, ordered by page_number."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, filename, status, current_step,
|
||||
document_category, doc_type,
|
||||
parent_session_id, box_index,
|
||||
document_group_id, page_number,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions
|
||||
WHERE document_group_id = $1
|
||||
ORDER BY page_number ASC
|
||||
""", uuid.UUID(document_group_id))
|
||||
|
||||
return [_row_to_dict(row) for row in rows]
|
||||
|
||||
|
||||
async def list_ground_truth_sessions_db() -> List[Dict[str, Any]]:
|
||||
"""List sessions that have a build_grid_reference in ground_truth."""
|
||||
pool = await get_pool()
|
||||
@@ -324,7 +362,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||
result = dict(row)
|
||||
|
||||
# UUID → string
|
||||
for key in ['id', 'session_id', 'parent_session_id']:
|
||||
for key in ['id', 'session_id', 'parent_session_id', 'document_group_id']:
|
||||
if key in result and result[key] is not None:
|
||||
result[key] = str(result[key])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user