Files
breakpilot-lehrer/admin-lehrer/components/ocr-kombi/StepPageSplit.tsx
Benjamin Admin d26a9f60ab
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 20s
Add OCR Kombi Pipeline: modular 11-step architecture with multi-page support
Phase 1 of the clean architecture refactor: Replaces the 751-line ocr-overlay
monolith with a modular pipeline. Each step gets its own component file.

Frontend: /ai/ocr-kombi route with 11 steps (Upload, Orientation, PageSplit,
Deskew, Dewarp, ContentCrop, OCR, Structure, GridBuild, GridReview, GroundTruth).
Session list supports document grouping for multi-page uploads.

Backend: New ocr_kombi/ module with multi-page PDF upload (splits PDF into N
sessions with shared document_group_id). DB migration adds document_group_id
and page_number columns.

Old /ai/ocr-overlay remains fully functional for A/B testing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 15:55:28 +01:00

124 lines
3.9 KiB
TypeScript

'use client'
import { useState, useEffect } from 'react'
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
interface StepPageSplitProps {
sessionId: string | null
onNext: () => void
onSubSessionsCreated: (subs: SubSession[]) => void
}
/**
* Step 3: Page split detection.
* Checks if the image is a double-page spread and offers to split it.
* If no split needed, auto-advances.
*/
export function StepPageSplit({ sessionId, onNext, onSubSessionsCreated }: StepPageSplitProps) {
const [checking, setChecking] = useState(false)
const [splitResult, setSplitResult] = useState<{ is_double_page: boolean; pages?: number } | null>(null)
const [splitting, setSplitting] = useState(false)
const [error, setError] = useState('')
useEffect(() => {
if (!sessionId) return
// Auto-check for page split
checkPageSplit()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
const checkPageSplit = async () => {
if (!sessionId) return
setChecking(true)
setError('')
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!res.ok) throw new Error('Session nicht gefunden')
const data = await res.json()
// If sub-sessions already exist, this was already split
if (data.sub_sessions?.length > 0) {
onSubSessionsCreated(data.sub_sessions)
onNext()
return
}
// Check aspect ratio to guess if double-page
// For now, just auto-advance (page-split detection happens in orientation step)
setSplitResult({ is_double_page: false })
// Auto-advance if single page
onNext()
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setChecking(false)
}
}
const handleSplit = async () => {
if (!sessionId) return
setSplitting(true)
setError('')
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/page-split`, {
method: 'POST',
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || 'Split fehlgeschlagen')
}
const data = await res.json()
if (data.sub_sessions?.length > 0) {
onSubSessionsCreated(data.sub_sessions)
}
onNext()
} catch (e) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setSplitting(false)
}
}
if (checking) {
return <div className="text-sm text-gray-500 py-8 text-center">Pruefe Seitenformat...</div>
}
if (splitResult?.is_double_page) {
return (
<div className="space-y-4 p-6 bg-blue-50 dark:bg-blue-900/20 rounded-xl border border-blue-200 dark:border-blue-800">
<h3 className="text-sm font-medium text-blue-700 dark:text-blue-300">
Doppelseite erkannt
</h3>
<p className="text-sm text-blue-600 dark:text-blue-400">
Das Bild scheint eine Doppelseite zu sein. Soll es in zwei Einzelseiten aufgeteilt werden?
</p>
<div className="flex gap-2">
<button
onClick={handleSplit}
disabled={splitting}
className="px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 disabled:opacity-50"
>
{splitting ? 'Wird aufgeteilt...' : 'Aufteilen'}
</button>
<button
onClick={onNext}
className="px-4 py-2 bg-gray-200 dark:bg-gray-700 text-sm rounded-lg hover:bg-gray-300"
>
Einzelseite beibehalten
</button>
</div>
{error && <div className="text-sm text-red-500">{error}</div>}
</div>
)
}
return (
<div className="text-sm text-gray-500 py-8 text-center">
Einzelseite erkannt weiter zum naechsten Schritt.
{error && <div className="text-sm text-red-500 mt-2">{error}</div>}
</div>
)
}