Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 20s
Phase 1 of the clean architecture refactor: Replaces the 751-line ocr-overlay monolith with a modular pipeline. Each step gets its own component file. Frontend: /ai/ocr-kombi route with 11 steps (Upload, Orientation, PageSplit, Deskew, Dewarp, ContentCrop, OCR, Structure, GridBuild, GridReview, GroundTruth). Session list supports document grouping for multi-page uploads. Backend: New ocr_kombi/ module with multi-page PDF upload (splits PDF into N sessions with shared document_group_id). DB migration adds document_group_id and page_number columns. Old /ai/ocr-overlay remains fully functional for A/B testing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
124 lines
3.9 KiB
TypeScript
124 lines
3.9 KiB
TypeScript
'use client'
|
|
|
|
import { useState, useEffect } from 'react'
|
|
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
|
|
|
const KLAUSUR_API = '/klausur-api'
|
|
|
|
interface StepPageSplitProps {
|
|
sessionId: string | null
|
|
onNext: () => void
|
|
onSubSessionsCreated: (subs: SubSession[]) => void
|
|
}
|
|
|
|
/**
|
|
* Step 3: Page split detection.
|
|
* Checks if the image is a double-page spread and offers to split it.
|
|
* If no split needed, auto-advances.
|
|
*/
|
|
export function StepPageSplit({ sessionId, onNext, onSubSessionsCreated }: StepPageSplitProps) {
|
|
const [checking, setChecking] = useState(false)
|
|
const [splitResult, setSplitResult] = useState<{ is_double_page: boolean; pages?: number } | null>(null)
|
|
const [splitting, setSplitting] = useState(false)
|
|
const [error, setError] = useState('')
|
|
|
|
useEffect(() => {
|
|
if (!sessionId) return
|
|
// Auto-check for page split
|
|
checkPageSplit()
|
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
}, [sessionId])
|
|
|
|
const checkPageSplit = async () => {
|
|
if (!sessionId) return
|
|
setChecking(true)
|
|
setError('')
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
|
if (!res.ok) throw new Error('Session nicht gefunden')
|
|
const data = await res.json()
|
|
|
|
// If sub-sessions already exist, this was already split
|
|
if (data.sub_sessions?.length > 0) {
|
|
onSubSessionsCreated(data.sub_sessions)
|
|
onNext()
|
|
return
|
|
}
|
|
|
|
// Check aspect ratio to guess if double-page
|
|
// For now, just auto-advance (page-split detection happens in orientation step)
|
|
setSplitResult({ is_double_page: false })
|
|
// Auto-advance if single page
|
|
onNext()
|
|
} catch (e) {
|
|
setError(e instanceof Error ? e.message : String(e))
|
|
} finally {
|
|
setChecking(false)
|
|
}
|
|
}
|
|
|
|
const handleSplit = async () => {
|
|
if (!sessionId) return
|
|
setSplitting(true)
|
|
setError('')
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/page-split`, {
|
|
method: 'POST',
|
|
})
|
|
if (!res.ok) {
|
|
const data = await res.json().catch(() => ({}))
|
|
throw new Error(data.detail || 'Split fehlgeschlagen')
|
|
}
|
|
const data = await res.json()
|
|
if (data.sub_sessions?.length > 0) {
|
|
onSubSessionsCreated(data.sub_sessions)
|
|
}
|
|
onNext()
|
|
} catch (e) {
|
|
setError(e instanceof Error ? e.message : String(e))
|
|
} finally {
|
|
setSplitting(false)
|
|
}
|
|
}
|
|
|
|
if (checking) {
|
|
return <div className="text-sm text-gray-500 py-8 text-center">Pruefe Seitenformat...</div>
|
|
}
|
|
|
|
if (splitResult?.is_double_page) {
|
|
return (
|
|
<div className="space-y-4 p-6 bg-blue-50 dark:bg-blue-900/20 rounded-xl border border-blue-200 dark:border-blue-800">
|
|
<h3 className="text-sm font-medium text-blue-700 dark:text-blue-300">
|
|
Doppelseite erkannt
|
|
</h3>
|
|
<p className="text-sm text-blue-600 dark:text-blue-400">
|
|
Das Bild scheint eine Doppelseite zu sein. Soll es in zwei Einzelseiten aufgeteilt werden?
|
|
</p>
|
|
<div className="flex gap-2">
|
|
<button
|
|
onClick={handleSplit}
|
|
disabled={splitting}
|
|
className="px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 disabled:opacity-50"
|
|
>
|
|
{splitting ? 'Wird aufgeteilt...' : 'Aufteilen'}
|
|
</button>
|
|
<button
|
|
onClick={onNext}
|
|
className="px-4 py-2 bg-gray-200 dark:bg-gray-700 text-sm rounded-lg hover:bg-gray-300"
|
|
>
|
|
Einzelseite beibehalten
|
|
</button>
|
|
</div>
|
|
{error && <div className="text-sm text-red-500">{error}</div>}
|
|
</div>
|
|
)
|
|
}
|
|
|
|
return (
|
|
<div className="text-sm text-gray-500 py-8 text-center">
|
|
Einzelseite erkannt — weiter zum naechsten Schritt.
|
|
{error && <div className="text-sm text-red-500 mt-2">{error}</div>}
|
|
</div>
|
|
)
|
|
}
|