Compare commits
18 Commits
d552fd8b6b
...
2297f66edb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2297f66edb | ||
|
|
db8327f039 | ||
|
|
587b066a40 | ||
|
|
03fa186fec | ||
|
|
1040729874 | ||
|
|
4f37afa222 | ||
|
|
bb879a03a8 | ||
|
|
f535d3c967 | ||
|
|
7a3570fe46 | ||
|
|
1393a994f9 | ||
|
|
cf27a95308 | ||
|
|
aa06ae0f61 | ||
|
|
09b820efbe | ||
|
|
ff2bb79a91 | ||
|
|
fb496c5e34 | ||
|
|
9df745574b | ||
|
|
44e8c573af | ||
|
|
589d2f811a |
@@ -1,18 +1,27 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
||||
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||
import { PIPELINE_STEPS, type PipelineStep } from './types'
|
||||
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
export default function OcrPipelinePage() {
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState<string>('')
|
||||
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
@@ -20,6 +29,82 @@ export default function OcrPipelinePage() {
|
||||
})),
|
||||
)
|
||||
|
||||
// Load session list on mount
|
||||
useEffect(() => {
|
||||
loadSessions()
|
||||
}, [])
|
||||
|
||||
const loadSessions = async () => {
|
||||
setLoadingSessions(true)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setSessions(data.sessions || [])
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setLoadingSessions(false)
|
||||
}
|
||||
}
|
||||
|
||||
const openSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
|
||||
// Determine which step to jump to based on current_step
|
||||
const dbStep = data.current_step || 1
|
||||
// Steps: 1=deskew, 2=dewarp, 3=columns, ...
|
||||
// UI steps are 0-indexed: 0=deskew, 1=dewarp, 2=columns, ...
|
||||
const uiStep = Math.max(0, dbStep - 1)
|
||||
|
||||
setSteps(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const deleteSession = useCallback(async (sid: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sid))
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name: newName }),
|
||||
})
|
||||
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
|
||||
if (sessionId === sid) setSessionName(newName)
|
||||
} catch (e) {
|
||||
console.error('Failed to rename session:', e)
|
||||
}
|
||||
setEditingName(null)
|
||||
}, [sessionId])
|
||||
|
||||
const handleStepClick = (index: number) => {
|
||||
if (index <= currentStep || steps[index].status === 'completed') {
|
||||
setCurrentStep(index)
|
||||
@@ -39,19 +124,45 @@ export default function OcrPipelinePage() {
|
||||
}
|
||||
}
|
||||
|
||||
const handleDeskewComplete = (sid: string) => {
|
||||
setSessionId(sid)
|
||||
// Reload session list to show the new session
|
||||
loadSessions()
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleNewSession = () => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
const stepNames: Record<number, string> = {
|
||||
1: 'Begradigung',
|
||||
2: 'Entzerrung',
|
||||
3: 'Spalten',
|
||||
4: 'Woerter',
|
||||
5: 'Koordinaten',
|
||||
6: 'Rekonstruktion',
|
||||
7: 'Validierung',
|
||||
}
|
||||
|
||||
const renderStep = () => {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepDeskew onNext={handleNext} />
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleDeskewComplete} />
|
||||
case 1:
|
||||
return <StepColumnDetection />
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
return <StepWordRecognition />
|
||||
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
return <StepCoordinates />
|
||||
return <StepWordRecognition />
|
||||
case 4:
|
||||
return <StepReconstruction />
|
||||
return <StepCoordinates />
|
||||
case 5:
|
||||
return <StepReconstruction />
|
||||
case 6:
|
||||
return <StepGroundTruth />
|
||||
default:
|
||||
return null
|
||||
@@ -66,7 +177,7 @@ export default function OcrPipelinePage() {
|
||||
audience={['Entwickler', 'Data Scientists']}
|
||||
architecture={{
|
||||
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
||||
databases: ['In-Memory Sessions'],
|
||||
databases: ['PostgreSQL Sessions'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
||||
@@ -75,6 +186,97 @@ export default function OcrPipelinePage() {
|
||||
defaultCollapsed
|
||||
/>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Sessions
|
||||
</h3>
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
+ Neue Session
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{loadingSessions ? (
|
||||
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
||||
) : (
|
||||
<div className="space-y-1 max-h-48 overflow-y-auto">
|
||||
{sessions.map((s) => (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`flex items-center gap-2 px-3 py-2 rounded-lg text-sm transition-colors cursor-pointer ${
|
||||
sessionId === s.id
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
||||
{editingName === s.id ? (
|
||||
<input
|
||||
autoFocus
|
||||
value={editNameValue}
|
||||
onChange={(e) => setEditNameValue(e.target.value)}
|
||||
onBlur={() => renameSession(s.id, editNameValue)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
||||
if (e.key === 'Escape') setEditingName(null)
|
||||
}}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
||||
/>
|
||||
) : (
|
||||
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
||||
{s.name || s.filename}
|
||||
</div>
|
||||
)}
|
||||
<div className="text-xs text-gray-400 flex gap-2">
|
||||
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
||||
<span>Schritt {s.current_step}: {stepNames[s.current_step] || '?'}</span>
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setEditNameValue(s.name || s.filename)
|
||||
setEditingName(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
||||
title="Umbenennen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Session loeschen?')) deleteSession(s.id)
|
||||
}}
|
||||
className="p-1 text-gray-400 hover:text-red-500"
|
||||
title="Loeschen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Active session name */}
|
||||
{sessionId && sessionName && (
|
||||
<div className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<PipelineStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} />
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
|
||||
@@ -7,12 +7,27 @@ export interface PipelineStep {
|
||||
status: PipelineStepStatus
|
||||
}
|
||||
|
||||
export interface SessionListItem {
|
||||
id: string
|
||||
name: string
|
||||
filename: string
|
||||
status: string
|
||||
current_step: number
|
||||
created_at: string
|
||||
updated_at?: string
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
session_id: string
|
||||
filename: string
|
||||
name?: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
original_image_url: string
|
||||
current_step?: number
|
||||
deskew_result?: DeskewResult
|
||||
dewarp_result?: DewarpResult
|
||||
column_result?: ColumnResult
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
@@ -33,8 +48,52 @@ export interface DeskewGroundTruth {
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface DewarpResult {
|
||||
session_id: string
|
||||
method_used: 'vertical_edge' | 'manual' | 'none'
|
||||
shear_degrees: number
|
||||
confidence: number
|
||||
duration_seconds: number
|
||||
dewarped_image_url: string
|
||||
}
|
||||
|
||||
export interface DewarpGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_shear?: number
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface PageRegion {
|
||||
type: 'column_en' | 'column_de' | 'column_example' | 'page_ref'
|
||||
| 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer'
|
||||
x: number
|
||||
y: number
|
||||
width: number
|
||||
height: number
|
||||
classification_confidence?: number
|
||||
classification_method?: string
|
||||
}
|
||||
|
||||
export interface ColumnResult {
|
||||
columns: PageRegion[]
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface ColumnGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_columns?: PageRegion[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface ManualColumnDivider {
|
||||
xPercent: number // Position in % of image width (0-100)
|
||||
}
|
||||
|
||||
export type ColumnTypeKey = PageRegion['type']
|
||||
|
||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'coordinates', name: 'Koordinaten', icon: '📍', status: 'pending' },
|
||||
|
||||
@@ -1011,6 +1011,53 @@ const REGULATIONS = [
|
||||
keyTopics: ['Bussgeldberechnung', 'Schweregrad', 'Milderungsgruende', 'Bussgeldrahmen'],
|
||||
effectiveDate: '2022'
|
||||
},
|
||||
// =====================================================================
|
||||
// Neu ingestierte EU-Richtlinien (Februar 2026)
|
||||
// =====================================================================
|
||||
{
|
||||
code: 'E_COMMERCE_RL',
|
||||
name: 'E-Commerce-Richtlinie',
|
||||
fullName: 'Richtlinie 2000/31/EG ueber den elektronischen Geschaeftsverkehr',
|
||||
type: 'eu_directive',
|
||||
expected: 30,
|
||||
description: 'EU-Richtlinie ueber den elektronischen Geschaeftsverkehr (E-Commerce). Regelt Herkunftslandprinzip, Informationspflichten, Haftungsprivilegien fuer Vermittler (Mere Conduit, Caching, Hosting).',
|
||||
relevantFor: ['Online-Dienste', 'E-Commerce', 'Hosting-Anbieter', 'Plattformen'],
|
||||
keyTopics: ['Herkunftslandprinzip', 'Haftungsprivileg', 'Informationspflichten', 'Spam-Verbot', 'Vermittlerhaftung'],
|
||||
effectiveDate: '17. Juli 2000'
|
||||
},
|
||||
{
|
||||
code: 'VERBRAUCHERRECHTE_RL',
|
||||
name: 'Verbraucherrechte-Richtlinie',
|
||||
fullName: 'Richtlinie 2011/83/EU ueber die Rechte der Verbraucher',
|
||||
type: 'eu_directive',
|
||||
expected: 25,
|
||||
description: 'EU-weite Harmonisierung der Verbraucherrechte bei Fernabsatz und aussergeschaeftlichen Vertraegen. 14-Tage-Widerrufsrecht, Informationspflichten, digitale Inhalte.',
|
||||
relevantFor: ['Online-Shops', 'E-Commerce', 'Fernabsatz', 'Dienstleister'],
|
||||
keyTopics: ['Widerrufsrecht 14 Tage', 'Informationspflichten', 'Fernabsatzvertraege', 'Digitale Inhalte'],
|
||||
effectiveDate: '13. Juni 2014'
|
||||
},
|
||||
{
|
||||
code: 'DIGITALE_INHALTE_RL',
|
||||
name: 'Digitale-Inhalte-Richtlinie',
|
||||
fullName: 'Richtlinie (EU) 2019/770 ueber digitale Inhalte und Dienstleistungen',
|
||||
type: 'eu_directive',
|
||||
expected: 20,
|
||||
description: 'Gewaehrleistungsrecht fuer digitale Inhalte und Dienstleistungen. Regelt Maengelhaftung, Updates, Vertragsmaessigkeit und Kuendigungsrechte bei digitalen Produkten.',
|
||||
relevantFor: ['SaaS-Anbieter', 'App-Entwickler', 'Cloud-Dienste', 'Streaming-Anbieter', 'Software-Hersteller'],
|
||||
keyTopics: ['Digitale Gewaehrleistung', 'Update-Pflicht', 'Vertragsmaessigkeit', 'Kuendigungsrecht', 'Datenportabilitaet'],
|
||||
effectiveDate: '1. Januar 2022'
|
||||
},
|
||||
{
|
||||
code: 'DMA',
|
||||
name: 'Digital Markets Act',
|
||||
fullName: 'Verordnung (EU) 2022/1925 - Digital Markets Act',
|
||||
type: 'eu_regulation',
|
||||
expected: 50,
|
||||
description: 'Reguliert digitale Gatekeeper-Plattformen. Stellt Verhaltensregeln fuer grosse Plattformen auf (Apple, Google, Meta, Amazon, Microsoft). Verbietet Selbstbevorzugung und erzwingt Interoperabilitaet.',
|
||||
relevantFor: ['Grosse Plattformen', 'App-Stores', 'Suchmaschinen', 'Social Media', 'Messenger-Dienste'],
|
||||
keyTopics: ['Gatekeeper-Pflichten', 'Interoperabilitaet', 'Selbstbevorzugung', 'App-Store-Regeln', 'Datenportabilitaet'],
|
||||
effectiveDate: '2. Mai 2023'
|
||||
},
|
||||
]
|
||||
|
||||
// License info for each regulation
|
||||
@@ -1099,8 +1146,31 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
|
||||
LU_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Luxemburg — frei verwendbar' },
|
||||
DK_DATABESKYTTELSESLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Daenemark — frei verwendbar' },
|
||||
EDPB_GUIDELINES_1_2022: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
// Neue EU-Richtlinien (Februar 2026 ingestiert)
|
||||
E_COMMERCE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
VERBRAUCHERRECHTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DIGITALE_INHALTE_RL: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
}
|
||||
|
||||
// Regulations that are currently ingested in RAG (Qdrant collections)
|
||||
// Updated: 2026-02-27
|
||||
const REGULATIONS_IN_RAG = new Set([
|
||||
// EU Verordnungen/Richtlinien (bp_compliance_ce)
|
||||
'GDPR', 'EPRIVACY', 'SCC', 'SCC_FULL_TEXT', 'AIACT', 'CRA', 'NIS2', 'DGA', 'DSA', 'PLD',
|
||||
'E_COMMERCE_RL', 'VERBRAUCHERRECHTE_RL', 'DIGITALE_INHALTE_RL', 'DMA',
|
||||
// DE Gesetze (bp_compliance_gesetze)
|
||||
'TDDDG', 'BDSG_FULL', 'DE_DDG', 'DE_BGB_AGB', 'DE_EGBGB', 'DE_HGB_RET', 'DE_AO_RET',
|
||||
// BSI Standards (bp_compliance_gesetze)
|
||||
'BSI-TR-03161-1', 'BSI-TR-03161-2', 'BSI-TR-03161-3',
|
||||
// Nationale Datenschutzgesetze (bp_compliance_gesetze)
|
||||
'AT_DSG', 'CH_DSG', 'ES_LOPDGDD', 'IT_CODICE_PRIVACY', 'NL_UAVG', 'FR_CNIL_GUIDE',
|
||||
'IE_DPA_2018', 'UK_DPA_2018', 'UK_GDPR', 'NO_PERSONOPPLYSNINGSLOVEN', 'SE_DATASKYDDSLAG',
|
||||
'PL_UODO', 'CZ_ZOU', 'HU_INFOTV',
|
||||
// EDPB Guidelines (bp_compliance_datenschutz)
|
||||
'EDPB_GUIDELINES_5_2020',
|
||||
])
|
||||
|
||||
// License display labels
|
||||
const LICENSE_LABELS: Record<string, string> = {
|
||||
PUBLIC_DOMAIN: 'Public Domain',
|
||||
@@ -1804,7 +1874,7 @@ export default function RAGPage() {
|
||||
{/* Page Purpose */}
|
||||
<PagePurpose
|
||||
title="Daten & RAG"
|
||||
purpose="Verwalten und durchsuchen Sie 4 RAG-Collections: Legal Corpus (24 Regulierungen), DSFA Corpus (70+ Quellen inkl. internationaler Datenschutzgesetze), NiBiS EH (Bildungsinhalte) und Legal Templates (Dokumentvorlagen). Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur."
|
||||
purpose={`Verwalten und durchsuchen Sie 7 RAG-Collections mit ${REGULATIONS.length} Regulierungen (${REGULATIONS_IN_RAG.size} im RAG). Legal Corpus, DSFA Corpus (70+ Quellen), NiBiS EH (Bildungsinhalte) und Legal Templates. Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur.`}
|
||||
audience={['DSB', 'Compliance Officer', 'Entwickler']}
|
||||
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
|
||||
architecture={{
|
||||
@@ -1836,12 +1906,12 @@ export default function RAGPage() {
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Dokumentvorlagen</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1889,12 +1959,12 @@ export default function RAGPage() {
|
||||
</button>
|
||||
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">28.662</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">7.996</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">824</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">7.689</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Dokumentvorlagen (VVT, TOM, DSFA)</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1995,7 +2065,13 @@ export default function RAGPage() {
|
||||
{regulationCategory === 'regulations' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
||||
<h3 className="font-semibold text-slate-900">Alle {REGULATIONS.length} Regulierungen</h3>
|
||||
<h3 className="font-semibold text-slate-900">
|
||||
Alle {REGULATIONS.length} Regulierungen
|
||||
<span className="ml-2 text-sm font-normal text-slate-500">
|
||||
({REGULATIONS.filter(r => REGULATIONS_IN_RAG.has(r.code)).length} im RAG,{' '}
|
||||
{REGULATIONS.filter(r => !REGULATIONS_IN_RAG.has(r.code)).length} ausstehend)
|
||||
</span>
|
||||
</h3>
|
||||
<button
|
||||
onClick={fetchStatus}
|
||||
className="text-sm text-teal-600 hover:text-teal-700"
|
||||
@@ -2007,6 +2083,7 @@ export default function RAGPage() {
|
||||
<table className="w-full">
|
||||
<thead className="bg-slate-50 border-b">
|
||||
<tr>
|
||||
<th className="px-4 py-3 text-center text-xs font-medium text-slate-500 uppercase w-12">RAG</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Code</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Typ</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Name</th>
|
||||
@@ -2036,6 +2113,13 @@ export default function RAGPage() {
|
||||
onClick={() => setExpandedRegulation(isExpanded ? null : reg.code)}
|
||||
className="hover:bg-slate-50 cursor-pointer transition-colors"
|
||||
>
|
||||
<td className="px-4 py-3 text-center">
|
||||
{REGULATIONS_IN_RAG.has(reg.code) ? (
|
||||
<span className="inline-flex items-center justify-center w-6 h-6 bg-green-100 text-green-600 rounded-full text-xs font-bold" title="Im RAG vorhanden">✓</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center justify-center w-6 h-6 bg-red-50 text-red-400 rounded-full text-xs font-bold" title="Nicht im RAG">✗</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="px-4 py-3 font-mono font-medium text-teal-600">
|
||||
<span className="inline-flex items-center gap-2">
|
||||
<span className={`transform transition-transform ${isExpanded ? 'rotate-90' : ''}`}>▶</span>
|
||||
@@ -2054,7 +2138,7 @@ export default function RAGPage() {
|
||||
</tr>
|
||||
{isExpanded && (
|
||||
<tr key={`${reg.code}-detail`} className="bg-slate-50">
|
||||
<td colSpan={6} className="px-4 py-4">
|
||||
<td colSpan={7} className="px-4 py-4">
|
||||
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
|
||||
<div>
|
||||
<h4 className="font-semibold text-slate-900 mb-1">{reg.fullName}</h4>
|
||||
@@ -2232,7 +2316,7 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.996</p>
|
||||
</div>
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
|
||||
@@ -2264,7 +2348,7 @@ export default function RAGPage() {
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
<p className="text-2xl font-bold text-slate-900">7.689</p>
|
||||
</div>
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
|
||||
@@ -2332,20 +2416,28 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||||
{regs.map((reg) => (
|
||||
{regs.map((reg) => {
|
||||
const isInRag = REGULATIONS_IN_RAG.has(reg.code)
|
||||
return (
|
||||
<div
|
||||
key={reg.code}
|
||||
className="bg-white p-3 rounded-lg border border-slate-200"
|
||||
className={`bg-white p-3 rounded-lg border ${isInRag ? 'border-green-200' : 'border-slate-200'}`}
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${TYPE_COLORS[reg.type]}`}>
|
||||
{reg.code}
|
||||
</span>
|
||||
{isInRag ? (
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-green-100 text-green-600 rounded">RAG</span>
|
||||
) : (
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-bold bg-red-50 text-red-400 rounded">✗</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="font-medium text-sm text-slate-900">{reg.name}</div>
|
||||
<div className="text-xs text-slate-500 mt-1 line-clamp-2">{reg.description}</div>
|
||||
</div>
|
||||
))}
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
@@ -2372,17 +2464,22 @@ export default function RAGPage() {
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{group.regulations.map((code) => {
|
||||
const reg = REGULATIONS.find(r => r.code === code)
|
||||
const isInRag = REGULATIONS_IN_RAG.has(code)
|
||||
return (
|
||||
<span
|
||||
key={code}
|
||||
className="px-3 py-1.5 bg-slate-100 rounded-full text-sm font-medium text-slate-700 hover:bg-slate-200 cursor-pointer"
|
||||
className={`px-3 py-1.5 rounded-full text-sm font-medium cursor-pointer ${
|
||||
isInRag
|
||||
? 'bg-green-100 text-green-700 hover:bg-green-200'
|
||||
: 'bg-slate-100 text-slate-700 hover:bg-slate-200'
|
||||
}`}
|
||||
onClick={() => {
|
||||
setActiveTab('regulations')
|
||||
setExpandedRegulation(code)
|
||||
}}
|
||||
title={reg?.fullName || code}
|
||||
title={`${reg?.fullName || code}${isInRag ? ' (im RAG)' : ' (nicht im RAG)'}`}
|
||||
>
|
||||
{code}
|
||||
{isInRag ? '✓ ' : '✗ '}{code}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
@@ -2443,8 +2540,15 @@ export default function RAGPage() {
|
||||
<tbody className="divide-y">
|
||||
{REGULATIONS.map((reg) => (
|
||||
<tr key={reg.code} className="hover:bg-slate-50">
|
||||
<td className="px-2 py-2 font-medium text-teal-600 sticky left-0 bg-white">
|
||||
{reg.code}
|
||||
<td className="px-2 py-2 font-medium sticky left-0 bg-white">
|
||||
<span className="flex items-center gap-1">
|
||||
{REGULATIONS_IN_RAG.has(reg.code) ? (
|
||||
<span className="text-green-500 text-[10px]">●</span>
|
||||
) : (
|
||||
<span className="text-red-300 text-[10px]">○</span>
|
||||
)}
|
||||
<span className="text-teal-600">{reg.code}</span>
|
||||
</span>
|
||||
</td>
|
||||
{INDUSTRIES.filter(i => i.id !== 'all').map((industry) => {
|
||||
const applies = INDUSTRY_REGULATION_MAP[industry.id]?.includes(reg.code)
|
||||
@@ -2531,27 +2635,33 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Integrated Regulations */}
|
||||
{/* RAG Coverage Overview */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<span className="text-2xl">✅</span>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">Neu integrierte Regulierungen</h3>
|
||||
<p className="text-sm text-slate-500">Jetzt im RAG-System verfuegbar (Stand: Januar 2025)</p>
|
||||
<h3 className="font-semibold text-slate-900">RAG-Abdeckung ({REGULATIONS_IN_RAG.size} von {REGULATIONS.length} Regulierungen)</h3>
|
||||
<p className="text-sm text-slate-500">Stand: Februar 2026 — Alle im RAG-System verfuegbaren Regulierungen</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 md:grid-cols-5 gap-3">
|
||||
{INTEGRATED_REGULATIONS.map((reg) => (
|
||||
<div key={reg.code} className="rounded-lg border border-green-200 bg-green-50 p-3 text-center">
|
||||
<span className="px-2 py-1 text-sm font-bold bg-green-100 text-green-700 rounded">
|
||||
{reg.code}
|
||||
</span>
|
||||
<p className="text-xs text-slate-600 mt-2">{reg.name}</p>
|
||||
<p className="text-xs text-green-600 mt-1">Im RAG</p>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{REGULATIONS.filter(r => REGULATIONS_IN_RAG.has(r.code)).map((reg) => (
|
||||
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-green-100 text-green-700 rounded-full border border-green-200">
|
||||
✓ {reg.code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
<div className="mt-4 pt-4 border-t border-slate-100">
|
||||
<p className="text-xs font-medium text-slate-500 mb-2">Noch nicht im RAG:</p>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{REGULATIONS.filter(r => !REGULATIONS_IN_RAG.has(r.code)).map((reg) => (
|
||||
<span key={reg.code} className="px-2.5 py-1 text-xs font-medium bg-red-50 text-red-400 rounded-full border border-red-100">
|
||||
✗ {reg.code}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Potential Future Regulations */}
|
||||
@@ -2899,7 +3009,7 @@ export default function RAGPage() {
|
||||
<span className="flex items-center gap-2 text-teal-600">
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Ingestion laeuft...
|
||||
</span>
|
||||
@@ -2969,7 +3079,7 @@ export default function RAGPage() {
|
||||
{pipelineStarting ? (
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -2988,7 +3098,7 @@ export default function RAGPage() {
|
||||
{pipelineLoading ? (
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -3021,7 +3131,7 @@ export default function RAGPage() {
|
||||
<>
|
||||
<svg className="animate-spin h-5 w-5" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Startet...
|
||||
</>
|
||||
@@ -3058,7 +3168,7 @@ export default function RAGPage() {
|
||||
{pipelineState.status === 'running' && (
|
||||
<svg className="w-6 h-6 text-blue-600 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.7.689 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
)}
|
||||
{pipelineState.status === 'failed' && (
|
||||
|
||||
320
admin-lehrer/components/ocr-pipeline/ColumnControls.tsx
Normal file
320
admin-lehrer/components/ocr-pipeline/ColumnControls.tsx
Normal file
@@ -0,0 +1,320 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useMemo } from 'react'
|
||||
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface ColumnControlsProps {
|
||||
columnResult: ColumnResult | null
|
||||
onRerun: () => void
|
||||
onManualMode: () => void
|
||||
onGtMode: () => void
|
||||
onGroundTruth: (gt: ColumnGroundTruth) => void
|
||||
onNext: () => void
|
||||
isDetecting: boolean
|
||||
savedGtColumns: PageRegion[] | null
|
||||
}
|
||||
|
||||
const TYPE_COLORS: Record<string, string> = {
|
||||
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||
header: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||
footer: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
|
||||
}
|
||||
|
||||
const TYPE_LABELS: Record<string, string> = {
|
||||
column_en: 'EN',
|
||||
column_de: 'DE',
|
||||
column_example: 'Beispiel',
|
||||
column_text: 'Text',
|
||||
page_ref: 'Seite',
|
||||
column_marker: 'Marker',
|
||||
column_ignore: 'Ignorieren',
|
||||
header: 'Header',
|
||||
footer: 'Footer',
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
content: 'Inhalt',
|
||||
position_enhanced: 'Position',
|
||||
position_fallback: 'Fallback',
|
||||
}
|
||||
|
||||
interface DiffRow {
|
||||
index: number
|
||||
autoCol: PageRegion | null
|
||||
gtCol: PageRegion | null
|
||||
diffX: number | null
|
||||
diffW: number | null
|
||||
typeMismatch: boolean
|
||||
}
|
||||
|
||||
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
|
||||
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
|
||||
const rows: DiffRow[] = []
|
||||
const usedGt = new Set<number>()
|
||||
const usedAuto = new Set<number>()
|
||||
|
||||
// Match auto → GT by best X-axis overlap
|
||||
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||
const a = autoCols[ai]
|
||||
let bestIdx = -1
|
||||
let bestIoU = 0
|
||||
|
||||
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||
if (usedGt.has(gi)) continue
|
||||
const g = gtCols[gi]
|
||||
const overlapStart = Math.max(a.x, g.x)
|
||||
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
|
||||
const overlap = Math.max(0, overlapEnd - overlapStart)
|
||||
const union = (a.width + g.width) - overlap
|
||||
const iou = union > 0 ? overlap / union : 0
|
||||
if (iou > bestIoU) {
|
||||
bestIoU = iou
|
||||
bestIdx = gi
|
||||
}
|
||||
}
|
||||
|
||||
if (bestIdx >= 0 && bestIoU > 0.3) {
|
||||
usedGt.add(bestIdx)
|
||||
usedAuto.add(ai)
|
||||
const g = gtCols[bestIdx]
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: a,
|
||||
gtCol: g,
|
||||
diffX: g.x - a.x,
|
||||
diffW: g.width - a.width,
|
||||
typeMismatch: a.type !== g.type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Unmatched auto columns
|
||||
for (let ai = 0; ai < autoCols.length; ai++) {
|
||||
if (usedAuto.has(ai)) continue
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: autoCols[ai],
|
||||
gtCol: null,
|
||||
diffX: null,
|
||||
diffW: null,
|
||||
typeMismatch: false,
|
||||
})
|
||||
}
|
||||
|
||||
// Unmatched GT columns
|
||||
for (let gi = 0; gi < gtCols.length; gi++) {
|
||||
if (usedGt.has(gi)) continue
|
||||
rows.push({
|
||||
index: rows.length + 1,
|
||||
autoCol: null,
|
||||
gtCol: gtCols[gi],
|
||||
diffX: null,
|
||||
diffW: null,
|
||||
typeMismatch: false,
|
||||
})
|
||||
}
|
||||
|
||||
return rows
|
||||
}
|
||||
|
||||
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
const diffRows = useMemo(() => {
|
||||
if (!columnResult || !savedGtColumns) return null
|
||||
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
return computeDiff(autoCols, gtCols)
|
||||
}, [columnResult, savedGtColumns])
|
||||
|
||||
if (!columnResult) return null
|
||||
|
||||
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
const headerFooter = columnResult.columns.filter((c: PageRegion) => !c.type.startsWith('column') && c.type !== 'page_ref')
|
||||
|
||||
const handleGt = (isCorrect: boolean) => {
|
||||
onGroundTruth({ is_correct: isCorrect })
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
|
||||
{/* Summary */}
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
|
||||
{columnResult.duration_seconds > 0 && (
|
||||
<span className="ml-2 text-xs">({columnResult.duration_seconds}s)</span>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={onRerun}
|
||||
disabled={isDetecting}
|
||||
className="text-xs px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
<button
|
||||
onClick={onManualMode}
|
||||
className="text-xs px-2 py-1 bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400 rounded hover:bg-teal-200 dark:hover:bg-teal-900/50 transition-colors"
|
||||
>
|
||||
Manuell markieren
|
||||
</button>
|
||||
<button
|
||||
onClick={onGtMode}
|
||||
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
|
||||
>
|
||||
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Column list */}
|
||||
<div className="space-y-2">
|
||||
{columns.map((col: PageRegion, i: number) => (
|
||||
<div key={i} className="flex items-center gap-3 text-sm">
|
||||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[col.type] || ''}`}>
|
||||
{TYPE_LABELS[col.type] || col.type}
|
||||
</span>
|
||||
{col.classification_confidence != null && col.classification_confidence < 1.0 && (
|
||||
<span className="text-xs font-medium text-gray-600 dark:text-gray-300">
|
||||
{Math.round(col.classification_confidence * 100)}%
|
||||
</span>
|
||||
)}
|
||||
{col.classification_method && (
|
||||
<span className="text-xs text-gray-400 dark:text-gray-500">
|
||||
({METHOD_LABELS[col.classification_method] || col.classification_method})
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||
x={col.x} y={col.y} {col.width}x{col.height}px
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
{headerFooter.map((r: PageRegion, i: number) => (
|
||||
<div key={`hf-${i}`} className="flex items-center gap-3 text-sm">
|
||||
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[r.type] || ''}`}>
|
||||
{TYPE_LABELS[r.type] || r.type}
|
||||
</span>
|
||||
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
|
||||
x={r.x} y={r.y} {r.width}x{r.height}px
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Diff table (Auto vs GT) */}
|
||||
{diffRows && diffRows.length > 0 && (
|
||||
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||
Vergleich: Auto vs Ground Truth
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead>
|
||||
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
|
||||
<th className="text-left py-1 pr-2">#</th>
|
||||
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
|
||||
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
|
||||
<th className="text-right py-1 pr-2">Diff X</th>
|
||||
<th className="text-right py-1">Diff W</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{diffRows.map((row) => (
|
||||
<tr
|
||||
key={row.index}
|
||||
className={
|
||||
!row.autoCol || !row.gtCol || row.typeMismatch
|
||||
? 'bg-red-50 dark:bg-red-900/10'
|
||||
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
|
||||
? 'bg-amber-50 dark:bg-amber-900/10'
|
||||
: ''
|
||||
}
|
||||
>
|
||||
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
|
||||
<td className="py-1 pr-2 font-mono">
|
||||
{row.autoCol ? (
|
||||
<span>
|
||||
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
|
||||
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
|
||||
</span>
|
||||
{' '}{row.autoCol.x}, {row.autoCol.width}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-red-400">fehlt</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono">
|
||||
{row.gtCol ? (
|
||||
<span>
|
||||
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
|
||||
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
|
||||
</span>
|
||||
{' '}{row.gtCol.x}, {row.gtCol.width}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-red-400">fehlt</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="py-1 pr-2 text-right font-mono">
|
||||
{row.diffX !== null ? (
|
||||
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||
{row.diffX > 0 ? '+' : ''}{row.diffX}
|
||||
</span>
|
||||
) : '—'}
|
||||
</td>
|
||||
<td className="py-1 text-right font-mono">
|
||||
{row.diffW !== null ? (
|
||||
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
|
||||
{row.diffW > 0 ? '+' : ''}{row.diffW}
|
||||
</span>
|
||||
) : '—'}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth + Navigation */}
|
||||
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm text-gray-500 dark:text-gray-400">Spalten korrekt?</span>
|
||||
{gtSaved ? (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">Gespeichert</span>
|
||||
) : (
|
||||
<>
|
||||
<button
|
||||
onClick={() => handleGt(true)}
|
||||
className="text-xs px-3 py-1 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 rounded hover:bg-green-200 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGt(false)}
|
||||
className="text-xs px-3 py-1 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 rounded hover:bg-red-200 dark:hover:bg-red-900/50 transition-colors"
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -140,8 +140,9 @@ export function DeskewControls({
|
||||
{deskewResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Korrekt ausgerichtet?
|
||||
Rotation korrekt?
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mb-2">Nur die Drehung bewerten — Woelbung/Verzerrung wird im naechsten Schritt korrigiert.</p>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
|
||||
201
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
201
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
@@ -0,0 +1,201 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
interface DewarpControlsProps {
|
||||
dewarpResult: DewarpResult | null
|
||||
showGrid: boolean
|
||||
onToggleGrid: () => void
|
||||
onManualDewarp: (shearDegrees: number) => void
|
||||
onGroundTruth: (gt: DewarpGroundTruth) => void
|
||||
onNext: () => void
|
||||
isApplying: boolean
|
||||
}
|
||||
|
||||
const METHOD_LABELS: Record<string, string> = {
|
||||
vertical_edge: 'Vertikale Kanten',
|
||||
manual: 'Manuell',
|
||||
none: 'Keine Korrektur',
|
||||
}
|
||||
|
||||
export function DewarpControls({
|
||||
dewarpResult,
|
||||
showGrid,
|
||||
onToggleGrid,
|
||||
onManualDewarp,
|
||||
onGroundTruth,
|
||||
onNext,
|
||||
isApplying,
|
||||
}: DewarpControlsProps) {
|
||||
const [manualShear, setManualShear] = useState(0)
|
||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
// Initialize slider to auto-detected value when result arrives
|
||||
useEffect(() => {
|
||||
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
|
||||
setManualShear(dewarpResult.shear_degrees)
|
||||
}
|
||||
}, [dewarpResult?.shear_degrees])
|
||||
|
||||
const handleGroundTruth = (isCorrect: boolean) => {
|
||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||
if (isCorrect) {
|
||||
onGroundTruth({ is_correct: true })
|
||||
setGtSaved(true)
|
||||
}
|
||||
}
|
||||
|
||||
const handleGroundTruthIncorrect = () => {
|
||||
onGroundTruth({
|
||||
is_correct: false,
|
||||
corrected_shear: manualShear !== 0 ? manualShear : undefined,
|
||||
notes: gtNotes || undefined,
|
||||
})
|
||||
setGtSaved(true)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Results */}
|
||||
{dewarpResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">Scherung:</span>{' '}
|
||||
<span className="font-mono font-medium">{dewarpResult.shear_degrees}°</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Methode:</span>{' '}
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||
{METHOD_LABELS[dewarpResult.method_used] || dewarpResult.method_used}
|
||||
</span>
|
||||
</div>
|
||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||
<div>
|
||||
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||
<span className="font-mono">{Math.round(dewarpResult.confidence * 100)}%</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toggle */}
|
||||
<div className="flex gap-3 mt-3">
|
||||
<button
|
||||
onClick={onToggleGrid}
|
||||
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||
showGrid
|
||||
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
>
|
||||
Raster anzeigen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Manual shear angle slider */}
|
||||
{dewarpResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-xs text-gray-400 w-10 text-right">-2.0°</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-200}
|
||||
max={200}
|
||||
step={5}
|
||||
value={Math.round(manualShear * 100)}
|
||||
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
|
||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||
/>
|
||||
<span className="text-xs text-gray-400 w-10">+2.0°</span>
|
||||
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}°</span>
|
||||
<button
|
||||
onClick={() => onManualDewarp(manualShear)}
|
||||
disabled={isApplying}
|
||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{isApplying ? '...' : 'Anwenden'}
|
||||
</button>
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mt-1">
|
||||
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth */}
|
||||
{dewarpResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Spalten vertikal ausgerichtet?
|
||||
</div>
|
||||
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
|
||||
{!gtSaved ? (
|
||||
<div className="space-y-3">
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'correct'
|
||||
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Ja
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||
gtFeedback === 'incorrect'
|
||||
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Nein
|
||||
</button>
|
||||
</div>
|
||||
{gtFeedback === 'incorrect' && (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
placeholder="Notizen zur Korrektur..."
|
||||
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||
rows={2}
|
||||
/>
|
||||
<button
|
||||
onClick={handleGroundTruthIncorrect}
|
||||
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||
>
|
||||
Feedback speichern
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-green-600 dark:text-green-400">
|
||||
Feedback gespeichert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{dewarpResult && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Uebernehmen & Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -9,8 +9,11 @@ interface ImageCompareViewProps {
|
||||
originalUrl: string | null
|
||||
deskewedUrl: string | null
|
||||
showGrid: boolean
|
||||
showGridLeft?: boolean
|
||||
showBinarized: boolean
|
||||
binarizedUrl: string | null
|
||||
leftLabel?: string
|
||||
rightLabel?: string
|
||||
}
|
||||
|
||||
function MmGridOverlay() {
|
||||
@@ -75,8 +78,11 @@ export function ImageCompareView({
|
||||
originalUrl,
|
||||
deskewedUrl,
|
||||
showGrid,
|
||||
showGridLeft,
|
||||
showBinarized,
|
||||
binarizedUrl,
|
||||
leftLabel,
|
||||
rightLabel,
|
||||
}: ImageCompareViewProps) {
|
||||
const [leftError, setLeftError] = useState(false)
|
||||
const [rightError, setRightError] = useState(false)
|
||||
@@ -87,16 +93,19 @@ export function ImageCompareView({
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">Original (unbearbeitet)</h3>
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">{leftLabel || 'Original (unbearbeitet)'}</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
{originalUrl && !leftError ? (
|
||||
<img
|
||||
src={originalUrl}
|
||||
alt="Original Scan"
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setLeftError(true)}
|
||||
/>
|
||||
<>
|
||||
<img
|
||||
src={originalUrl}
|
||||
alt="Original Scan"
|
||||
className="w-full h-full object-contain"
|
||||
onError={() => setLeftError(true)}
|
||||
/>
|
||||
{showGridLeft && <MmGridOverlay />}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex items-center justify-center h-full text-gray-400">
|
||||
{leftError ? 'Fehler beim Laden' : 'Noch kein Bild'}
|
||||
@@ -108,7 +117,7 @@ export function ImageCompareView({
|
||||
{/* Right: Deskewed with Grid */}
|
||||
<div className="space-y-2">
|
||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
||||
{showBinarized ? 'Binarisiert' : 'Begradigt'} {showGrid && '+ Raster (mm)'}
|
||||
{rightLabel || `${showBinarized ? 'Binarisiert' : 'Begradigt'}${showGrid ? ' + Raster (mm)' : ''}`}
|
||||
</h3>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||
style={{ aspectRatio: '210/297' }}>
|
||||
|
||||
359
admin-lehrer/components/ocr-pipeline/ManualColumnEditor.tsx
Normal file
359
admin-lehrer/components/ocr-pipeline/ManualColumnEditor.tsx
Normal file
@@ -0,0 +1,359 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const COLUMN_TYPES: { value: ColumnTypeKey; label: string }[] = [
|
||||
{ value: 'column_en', label: 'EN' },
|
||||
{ value: 'column_de', label: 'DE' },
|
||||
{ value: 'column_example', label: 'Beispiel' },
|
||||
{ value: 'column_text', label: 'Text' },
|
||||
{ value: 'page_ref', label: 'Seite' },
|
||||
{ value: 'column_marker', label: 'Marker' },
|
||||
{ value: 'column_ignore', label: 'Ignorieren' },
|
||||
]
|
||||
|
||||
const TYPE_OVERLAY_COLORS: Record<string, string> = {
|
||||
column_en: 'rgba(59, 130, 246, 0.12)',
|
||||
column_de: 'rgba(34, 197, 94, 0.12)',
|
||||
column_example: 'rgba(249, 115, 22, 0.12)',
|
||||
column_text: 'rgba(6, 182, 212, 0.12)',
|
||||
page_ref: 'rgba(168, 85, 247, 0.12)',
|
||||
column_marker: 'rgba(239, 68, 68, 0.12)',
|
||||
column_ignore: 'rgba(128, 128, 128, 0.06)',
|
||||
}
|
||||
|
||||
const TYPE_BADGE_COLORS: Record<string, string> = {
|
||||
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
|
||||
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
|
||||
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
|
||||
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
|
||||
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
|
||||
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
|
||||
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
|
||||
}
|
||||
|
||||
// Default column type sequence for newly created columns
|
||||
const DEFAULT_TYPE_SEQUENCE: ColumnTypeKey[] = [
|
||||
'page_ref', 'column_en', 'column_de', 'column_example', 'column_text',
|
||||
]
|
||||
|
||||
const MIN_DIVIDER_DISTANCE_PERCENT = 2 // Minimum 2% apart
|
||||
|
||||
interface ManualColumnEditorProps {
|
||||
imageUrl: string
|
||||
imageWidth: number
|
||||
imageHeight: number
|
||||
onApply: (columns: PageRegion[]) => void
|
||||
onCancel: () => void
|
||||
applying: boolean
|
||||
mode?: 'manual' | 'ground-truth'
|
||||
layout?: 'two-column' | 'stacked'
|
||||
initialDividers?: number[]
|
||||
initialColumnTypes?: ColumnTypeKey[]
|
||||
}
|
||||
|
||||
export function ManualColumnEditor({
|
||||
imageUrl,
|
||||
imageWidth,
|
||||
imageHeight,
|
||||
onApply,
|
||||
onCancel,
|
||||
applying,
|
||||
mode = 'manual',
|
||||
layout = 'two-column',
|
||||
initialDividers,
|
||||
initialColumnTypes,
|
||||
}: ManualColumnEditorProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const [dividers, setDividers] = useState<number[]>(initialDividers ?? [])
|
||||
const [columnTypes, setColumnTypes] = useState<ColumnTypeKey[]>(initialColumnTypes ?? [])
|
||||
const [dragging, setDragging] = useState<number | null>(null)
|
||||
const [imageLoaded, setImageLoaded] = useState(false)
|
||||
|
||||
const isGT = mode === 'ground-truth'
|
||||
|
||||
// Sync columnTypes length when dividers change
|
||||
useEffect(() => {
|
||||
const numColumns = dividers.length + 1
|
||||
setColumnTypes(prev => {
|
||||
if (prev.length === numColumns) return prev
|
||||
const next = [...prev]
|
||||
while (next.length < numColumns) {
|
||||
const idx = next.length
|
||||
next.push(DEFAULT_TYPE_SEQUENCE[idx] || 'column_text')
|
||||
}
|
||||
while (next.length > numColumns) {
|
||||
next.pop()
|
||||
}
|
||||
return next
|
||||
})
|
||||
}, [dividers.length])
|
||||
|
||||
const getXPercent = useCallback((clientX: number): number => {
|
||||
if (!containerRef.current) return 0
|
||||
const rect = containerRef.current.getBoundingClientRect()
|
||||
const pct = ((clientX - rect.left) / rect.width) * 100
|
||||
return Math.max(0, Math.min(100, pct))
|
||||
}, [])
|
||||
|
||||
const canPlaceDivider = useCallback((xPct: number, excludeIndex?: number): boolean => {
|
||||
for (let i = 0; i < dividers.length; i++) {
|
||||
if (i === excludeIndex) continue
|
||||
if (Math.abs(dividers[i] - xPct) < MIN_DIVIDER_DISTANCE_PERCENT) return false
|
||||
}
|
||||
return xPct > MIN_DIVIDER_DISTANCE_PERCENT && xPct < (100 - MIN_DIVIDER_DISTANCE_PERCENT)
|
||||
}, [dividers])
|
||||
|
||||
// Click on image to add a divider
|
||||
const handleImageClick = useCallback((e: React.MouseEvent) => {
|
||||
if (dragging !== null) return
|
||||
// Don't add if clicking on a divider handle
|
||||
if ((e.target as HTMLElement).dataset.divider) return
|
||||
|
||||
const xPct = getXPercent(e.clientX)
|
||||
if (!canPlaceDivider(xPct)) return
|
||||
|
||||
setDividers(prev => [...prev, xPct].sort((a, b) => a - b))
|
||||
}, [dragging, getXPercent, canPlaceDivider])
|
||||
|
||||
// Drag handlers
|
||||
const handleDividerMouseDown = useCallback((e: React.MouseEvent, index: number) => {
|
||||
e.stopPropagation()
|
||||
e.preventDefault()
|
||||
setDragging(index)
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (dragging === null) return
|
||||
|
||||
const handleMouseMove = (e: MouseEvent) => {
|
||||
const xPct = getXPercent(e.clientX)
|
||||
if (canPlaceDivider(xPct, dragging)) {
|
||||
setDividers(prev => {
|
||||
const next = [...prev]
|
||||
next[dragging] = xPct
|
||||
return next.sort((a, b) => a - b)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const handleMouseUp = () => {
|
||||
setDragging(null)
|
||||
}
|
||||
|
||||
window.addEventListener('mousemove', handleMouseMove)
|
||||
window.addEventListener('mouseup', handleMouseUp)
|
||||
return () => {
|
||||
window.removeEventListener('mousemove', handleMouseMove)
|
||||
window.removeEventListener('mouseup', handleMouseUp)
|
||||
}
|
||||
}, [dragging, getXPercent, canPlaceDivider])
|
||||
|
||||
const removeDivider = useCallback((index: number) => {
|
||||
setDividers(prev => prev.filter((_, i) => i !== index))
|
||||
}, [])
|
||||
|
||||
const updateColumnType = useCallback((colIndex: number, type: ColumnTypeKey) => {
|
||||
setColumnTypes(prev => {
|
||||
const next = [...prev]
|
||||
next[colIndex] = type
|
||||
return next
|
||||
})
|
||||
}, [])
|
||||
|
||||
const handleApply = useCallback(() => {
|
||||
// Build PageRegion array from dividers
|
||||
const sorted = [...dividers].sort((a, b) => a - b)
|
||||
const columns: PageRegion[] = []
|
||||
|
||||
for (let i = 0; i <= sorted.length; i++) {
|
||||
const leftPct = i === 0 ? 0 : sorted[i - 1]
|
||||
const rightPct = i === sorted.length ? 100 : sorted[i]
|
||||
const x = Math.round((leftPct / 100) * imageWidth)
|
||||
const w = Math.round(((rightPct - leftPct) / 100) * imageWidth)
|
||||
|
||||
columns.push({
|
||||
type: columnTypes[i] || 'column_text',
|
||||
x,
|
||||
y: 0,
|
||||
width: w,
|
||||
height: imageHeight,
|
||||
classification_confidence: 1.0,
|
||||
classification_method: 'manual',
|
||||
})
|
||||
}
|
||||
|
||||
onApply(columns)
|
||||
}, [dividers, columnTypes, imageWidth, imageHeight, onApply])
|
||||
|
||||
// Compute column regions for overlay
|
||||
const sorted = [...dividers].sort((a, b) => a - b)
|
||||
const columnRegions = Array.from({ length: sorted.length + 1 }, (_, i) => ({
|
||||
leftPct: i === 0 ? 0 : sorted[i - 1],
|
||||
rightPct: i === sorted.length ? 100 : sorted[i],
|
||||
type: columnTypes[i] || 'column_text',
|
||||
}))
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Layout: image + controls */}
|
||||
<div className={layout === 'stacked' ? 'space-y-4' : 'grid grid-cols-2 gap-4'}>
|
||||
{/* Left: Interactive image */}
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||
Klicken um Trennlinien zu setzen
|
||||
</div>
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className="text-xs px-2 py-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200"
|
||||
>
|
||||
Abbrechen
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
ref={containerRef}
|
||||
className="relative border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 cursor-crosshair select-none"
|
||||
onClick={handleImageClick}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt="Entzerrtes Bild"
|
||||
className="w-full h-auto block"
|
||||
draggable={false}
|
||||
onLoad={() => setImageLoaded(true)}
|
||||
/>
|
||||
|
||||
{imageLoaded && (
|
||||
<>
|
||||
{/* Column overlays */}
|
||||
{columnRegions.map((region, i) => (
|
||||
<div
|
||||
key={`col-${i}`}
|
||||
className="absolute top-0 bottom-0 pointer-events-none"
|
||||
style={{
|
||||
left: `${region.leftPct}%`,
|
||||
width: `${region.rightPct - region.leftPct}%`,
|
||||
backgroundColor: TYPE_OVERLAY_COLORS[region.type] || 'rgba(128,128,128,0.08)',
|
||||
}}
|
||||
>
|
||||
<span className="absolute top-1 left-1/2 -translate-x-1/2 text-[10px] font-medium text-gray-600 dark:text-gray-300 bg-white/80 dark:bg-gray-800/80 px-1 rounded">
|
||||
{i + 1}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{/* Divider lines */}
|
||||
{sorted.map((xPct, i) => (
|
||||
<div
|
||||
key={`div-${i}`}
|
||||
data-divider="true"
|
||||
className="absolute top-0 bottom-0 group"
|
||||
style={{
|
||||
left: `${xPct}%`,
|
||||
transform: 'translateX(-50%)',
|
||||
width: '12px',
|
||||
cursor: 'col-resize',
|
||||
zIndex: 10,
|
||||
}}
|
||||
onMouseDown={(e) => handleDividerMouseDown(e, i)}
|
||||
>
|
||||
{/* Visible line */}
|
||||
<div
|
||||
data-divider="true"
|
||||
className="absolute top-0 bottom-0 left-1/2 -translate-x-1/2 w-0.5 border-l-2 border-dashed border-red-500"
|
||||
/>
|
||||
{/* Delete button */}
|
||||
<button
|
||||
data-divider="true"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
removeDivider(i)
|
||||
}}
|
||||
className="absolute top-2 left-1/2 -translate-x-1/2 w-4 h-4 bg-red-500 text-white rounded-full text-[10px] leading-none flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-20"
|
||||
title="Linie entfernen"
|
||||
>
|
||||
x
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Column type assignment + actions */}
|
||||
<div className="space-y-4">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Spaltentypen
|
||||
</div>
|
||||
|
||||
{dividers.length === 0 ? (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-6 text-center">
|
||||
<div className="text-3xl mb-2">👆</div>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Klicken Sie auf das Bild links, um vertikale Trennlinien zwischen den Spalten zu setzen.
|
||||
</p>
|
||||
<p className="text-xs text-gray-400 dark:text-gray-500 mt-2">
|
||||
Linien koennen per Drag verschoben und per Hover geloescht werden.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="text-sm text-gray-600 dark:text-gray-400">
|
||||
<span className="font-medium text-gray-800 dark:text-gray-200">
|
||||
{dividers.length} Linien = {dividers.length + 1} Spalten
|
||||
</span>
|
||||
</div>
|
||||
<div className="grid gap-2">
|
||||
{columnRegions.map((region, i) => (
|
||||
<div key={i} className="flex items-center gap-3">
|
||||
<span className={`w-16 text-center px-2 py-0.5 rounded text-xs font-medium ${TYPE_BADGE_COLORS[region.type] || 'bg-gray-100 text-gray-600'}`}>
|
||||
Spalte {i + 1}
|
||||
</span>
|
||||
<select
|
||||
value={columnTypes[i] || 'column_text'}
|
||||
onChange={(e) => updateColumnType(i, e.target.value as ColumnTypeKey)}
|
||||
className="text-sm border border-gray-200 dark:border-gray-600 rounded px-2 py-1 bg-white dark:bg-gray-700 text-gray-800 dark:text-gray-200"
|
||||
>
|
||||
{COLUMN_TYPES.map(t => (
|
||||
<option key={t.value} value={t.value}>{t.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<span className="text-xs text-gray-400 font-mono">
|
||||
{Math.round(region.rightPct - region.leftPct)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex flex-col gap-2">
|
||||
<button
|
||||
onClick={handleApply}
|
||||
disabled={dividers.length === 0 || applying}
|
||||
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{applying
|
||||
? 'Wird gespeichert...'
|
||||
: isGT
|
||||
? `${dividers.length + 1} Spalten als Ground Truth speichern`
|
||||
: `${dividers.length + 1} Spalten uebernehmen`}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setDividers([])}
|
||||
disabled={dividers.length === 0}
|
||||
className="text-xs px-3 py-2 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 disabled:opacity-50"
|
||||
>
|
||||
Alle Linien entfernen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,19 +1,341 @@
|
||||
'use client'
|
||||
|
||||
export function StepColumnDetection() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📊</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 2: Spaltenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Erkennung unsichtbarer Spaltentrennungen in der Vokabelseite.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ColumnControls } from './ColumnControls'
|
||||
import { ManualColumnEditor } from './ManualColumnEditor'
|
||||
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type ViewMode = 'normal' | 'ground-truth' | 'manual'
|
||||
|
||||
interface StepColumnDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/** Convert PageRegion[] to divider percentages + column types for ManualColumnEditor */
|
||||
function columnsToEditorState(
|
||||
columns: PageRegion[],
|
||||
imageWidth: number
|
||||
): { dividers: number[]; columnTypes: ColumnTypeKey[] } {
|
||||
if (!columns.length || !imageWidth) return { dividers: [], columnTypes: [] }
|
||||
|
||||
const sorted = [...columns].sort((a, b) => a.x - b.x)
|
||||
const dividers: number[] = []
|
||||
const columnTypes: ColumnTypeKey[] = sorted.map(c => c.type)
|
||||
|
||||
for (let i = 1; i < sorted.length; i++) {
|
||||
const xPct = (sorted[i].x / imageWidth) * 100
|
||||
dividers.push(xPct)
|
||||
}
|
||||
|
||||
return { dividers, columnTypes }
|
||||
}
|
||||
|
||||
export function StepColumnDetection({ sessionId, onNext }: StepColumnDetectionProps) {
|
||||
const [columnResult, setColumnResult] = useState<ColumnResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [viewMode, setViewMode] = useState<ViewMode>('normal')
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [imageDimensions, setImageDimensions] = useState<{ width: number; height: number } | null>(null)
|
||||
const [savedGtColumns, setSavedGtColumns] = useState<PageRegion[] | null>(null)
|
||||
|
||||
// Fetch session info (image dimensions) + check for cached column result
|
||||
useEffect(() => {
|
||||
if (!sessionId || imageDimensions) return
|
||||
|
||||
const fetchSessionInfo = async () => {
|
||||
try {
|
||||
const infoRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (infoRes.ok) {
|
||||
const info = await infoRes.json()
|
||||
if (info.image_width && info.image_height) {
|
||||
setImageDimensions({ width: info.image_width, height: info.image_height })
|
||||
}
|
||||
if (info.column_result) {
|
||||
setColumnResult(info.column_result)
|
||||
return
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch session info:', e)
|
||||
}
|
||||
|
||||
// No cached result - run auto-detection
|
||||
runAutoDetection()
|
||||
}
|
||||
|
||||
fetchSessionInfo()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
// Load saved GT if exists
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
const fetchGt = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`)
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
const corrected = data.columns_gt?.corrected_columns
|
||||
if (corrected) setSavedGtColumns(corrected)
|
||||
}
|
||||
} catch {
|
||||
// No saved GT - that's fine
|
||||
}
|
||||
}
|
||||
fetchGt()
|
||||
}, [sessionId])
|
||||
|
||||
const runAutoDetection = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Spaltenerkennung fehlgeschlagen')
|
||||
}
|
||||
const data: ColumnResult = await res.json()
|
||||
setColumnResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleRerun = useCallback(() => {
|
||||
runAutoDetection()
|
||||
}, [runAutoDetection])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: ColumnGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleManualApply = useCallback(async (columns: PageRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/columns/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ columns }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Manuelle Spalten konnten nicht gespeichert werden')
|
||||
}
|
||||
const data = await res.json()
|
||||
setColumnResult({
|
||||
columns: data.columns,
|
||||
duration_seconds: data.duration_seconds ?? 0,
|
||||
})
|
||||
setViewMode('normal')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGtApply = useCallback(async (columns: PageRegion[]) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
try {
|
||||
const gt: ColumnGroundTruth = {
|
||||
is_correct: false,
|
||||
corrected_columns: columns,
|
||||
}
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/columns`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setSavedGtColumns(columns)
|
||||
setViewMode('normal')
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler beim Speichern')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📊</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 3: Spaltenerkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritt 1 und 2 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/columns-overlay`
|
||||
|
||||
// Pre-compute editor state from saved GT or auto columns for GT mode
|
||||
const gtInitial = savedGtColumns
|
||||
? columnsToEditorState(savedGtColumns, imageDimensions?.width ?? 1000)
|
||||
: undefined
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Spaltenerkennung laeuft...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{viewMode === 'manual' ? (
|
||||
/* Manual column editor - overwrites column_result */
|
||||
<ManualColumnEditor
|
||||
imageUrl={dewarpedUrl}
|
||||
imageWidth={imageDimensions?.width ?? 1000}
|
||||
imageHeight={imageDimensions?.height ?? 1400}
|
||||
onApply={handleManualApply}
|
||||
onCancel={() => setViewMode('normal')}
|
||||
applying={applying}
|
||||
mode="manual"
|
||||
/>
|
||||
) : viewMode === 'ground-truth' ? (
|
||||
/* GT mode: auto result (left, readonly) + GT editor (right) */
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
{/* Left: Auto result (readonly overlay) */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Auto-Ergebnis (readonly)
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{columnResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Auto Spalten-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
Keine Auto-Daten
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Auto column list */}
|
||||
{columnResult && (
|
||||
<div className="mt-2 space-y-1">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400">
|
||||
Auto: {columnResult.columns.length} Spalten
|
||||
</div>
|
||||
{columnResult.columns
|
||||
.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
|
||||
.map((col, i) => (
|
||||
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono">
|
||||
{i + 1}. {col.type} x={col.x} w={col.width}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right: GT editor */}
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Ground Truth Editor
|
||||
</div>
|
||||
<ManualColumnEditor
|
||||
imageUrl={dewarpedUrl}
|
||||
imageWidth={imageDimensions?.width ?? 1000}
|
||||
imageHeight={imageDimensions?.height ?? 1400}
|
||||
onApply={handleGtApply}
|
||||
onCancel={() => setViewMode('normal')}
|
||||
applying={applying}
|
||||
mode="ground-truth"
|
||||
layout="stacked"
|
||||
initialDividers={gtInitial?.dividers}
|
||||
initialColumnTypes={gtInitial?.columnTypes}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
/* Normal mode: overlay (left) vs clean (right) */
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Spalten-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{columnResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Spalten-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Spalten...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{viewMode === 'normal' && (
|
||||
<ColumnControls
|
||||
columnResult={columnResult}
|
||||
onRerun={handleRerun}
|
||||
onManualMode={() => setViewMode('manual')}
|
||||
onGtMode={() => setViewMode('ground-truth')}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isDetecting={detecting}
|
||||
savedGtColumns={savedGtColumns}
|
||||
/>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ export function StepCoordinates() {
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">📍</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 4: Koordinatenzuweisung
|
||||
Schritt 5: Koordinatenzuweisung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useState } from 'react'
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DeskewControls } from './DeskewControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
@@ -8,10 +8,11 @@ import { ImageCompareView } from './ImageCompareView'
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDeskewProps {
|
||||
onNext: () => void
|
||||
sessionId?: string | null
|
||||
onNext: (sessionId: string) => void
|
||||
}
|
||||
|
||||
export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||
export function StepDeskew({ sessionId: existingSessionId, onNext }: StepDeskewProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [deskewResult, setDeskewResult] = useState<DeskewResult | null>(null)
|
||||
const [uploading, setUploading] = useState(false)
|
||||
@@ -21,6 +22,43 @@ export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dragOver, setDragOver] = useState(false)
|
||||
const [sessionName, setSessionName] = useState('')
|
||||
|
||||
// Reload session data when navigating back from a later step
|
||||
useEffect(() => {
|
||||
if (!existingSessionId || session) return
|
||||
|
||||
const loadSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}`)
|
||||
if (!res.ok) return
|
||||
const data = await res.json()
|
||||
|
||||
const sessionInfo: SessionInfo = {
|
||||
session_id: data.session_id,
|
||||
filename: data.filename,
|
||||
image_width: data.image_width,
|
||||
image_height: data.image_height,
|
||||
original_image_url: `${KLAUSUR_API}${data.original_image_url}`,
|
||||
}
|
||||
setSession(sessionInfo)
|
||||
|
||||
// Reconstruct deskew result from session data
|
||||
if (data.deskew_result) {
|
||||
const dr: DeskewResult = {
|
||||
...data.deskew_result,
|
||||
deskewed_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}/image/deskewed`,
|
||||
binarized_image_url: `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${existingSessionId}/image/binarized`,
|
||||
}
|
||||
setDeskewResult(dr)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to reload session:', e)
|
||||
}
|
||||
}
|
||||
|
||||
loadSession()
|
||||
}, [existingSessionId, session])
|
||||
|
||||
const handleUpload = useCallback(async (file: File) => {
|
||||
setUploading(true)
|
||||
@@ -30,6 +68,9 @@ export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||
try {
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
if (sessionName.trim()) {
|
||||
formData.append('name', sessionName.trim())
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, {
|
||||
method: 'POST',
|
||||
@@ -130,6 +171,20 @@ export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||
if (!session) {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Session name input */}
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-600 dark:text-gray-400 mb-1">
|
||||
Session-Name (optional)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={sessionName}
|
||||
onChange={(e) => setSessionName(e.target.value)}
|
||||
placeholder="z.B. Unit 3 Seite 42"
|
||||
className="w-full max-w-sm px-3 py-2 text-sm border rounded-lg dark:bg-gray-800 dark:border-gray-600 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-teal-500"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div
|
||||
onDragOver={(e) => { e.preventDefault(); setDragOver(true) }}
|
||||
onDragLeave={() => setDragOver(false)}
|
||||
@@ -208,7 +263,7 @@ export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDeskew={handleManualDeskew}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
onNext={() => session && onNext(session.session_id)}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
|
||||
151
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
151
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
@@ -0,0 +1,151 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { DewarpControls } from './DewarpControls'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepDewarpProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||
const [dewarpResult, setDewarpResult] = useState<DewarpResult | null>(null)
|
||||
const [dewarping, setDewarping] = useState(false)
|
||||
const [applying, setApplying] = useState(false)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
// Auto-trigger dewarp when component mounts with a sessionId
|
||||
useEffect(() => {
|
||||
if (!sessionId || dewarpResult) return
|
||||
|
||||
const runDewarp = async () => {
|
||||
setDewarping(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Entzerrung fehlgeschlagen')
|
||||
}
|
||||
const data: DewarpResult = await res.json()
|
||||
data.dewarped_image_url = `${KLAUSUR_API}${data.dewarped_image_url}`
|
||||
setDewarpResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDewarping(false)
|
||||
}
|
||||
}
|
||||
|
||||
runDewarp()
|
||||
}, [sessionId, dewarpResult])
|
||||
|
||||
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
|
||||
if (!sessionId) return
|
||||
setApplying(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ shear_degrees: shearDegrees }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
||||
|
||||
const data = await res.json()
|
||||
setDewarpResult((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
method_used: data.method_used,
|
||||
shear_degrees: data.shear_degrees,
|
||||
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||
}
|
||||
: null,
|
||||
)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Fehler')
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (gt: DewarpGroundTruth) => {
|
||||
if (!sessionId) return
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/dewarp`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔧</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 2: Entzerrung (Dewarp)
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritt 1 (Begradigung) abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const deskewedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`
|
||||
const dewarpedUrl = dewarpResult?.dewarped_image_url ?? null
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{dewarping && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Entzerrung laeuft (beide Methoden)...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Image comparison: deskewed (left) vs dewarped (right) */}
|
||||
<ImageCompareView
|
||||
originalUrl={deskewedUrl}
|
||||
deskewedUrl={dewarpedUrl}
|
||||
showGrid={showGrid}
|
||||
showGridLeft={showGrid}
|
||||
showBinarized={false}
|
||||
binarizedUrl={null}
|
||||
leftLabel={`Begradigt (nach Deskew)${showGrid ? ' + Raster' : ''}`}
|
||||
rightLabel={`Entzerrt${showGrid ? ' + Raster (mm)' : ''}`}
|
||||
/>
|
||||
|
||||
{/* Controls */}
|
||||
<DewarpControls
|
||||
dewarpResult={dewarpResult}
|
||||
showGrid={showGrid}
|
||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||
onManualDewarp={handleManualDewarp}
|
||||
onGroundTruth={handleGroundTruth}
|
||||
onNext={onNext}
|
||||
isApplying={applying}
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -5,7 +5,7 @@ export function StepGroundTruth() {
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 6: Ground Truth Validierung
|
||||
Schritt 7: Ground Truth Validierung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Gesamtpruefung der rekonstruierten Seite gegen das Original.
|
||||
|
||||
@@ -5,7 +5,7 @@ export function StepReconstruction() {
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🏗️</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Seitenrekonstruktion
|
||||
Schritt 6: Seitenrekonstruktion
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Nachbau der Originalseite aus erkannten Woertern und Positionen.
|
||||
|
||||
@@ -5,7 +5,7 @@ export function StepWordRecognition() {
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔤</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 3: Worterkennung
|
||||
Schritt 4: Worterkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
||||
|
||||
@@ -8,24 +8,15 @@ RUN npm install
|
||||
COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM python:3.11-slim
|
||||
# Production stage — uses pre-built base with Tesseract + Python deps.
|
||||
# Base image contains: python:3.11-slim + tesseract-ocr + all pip packages.
|
||||
# Rebuild base only when requirements.txt or system deps change:
|
||||
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||
FROM klausur-base:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies (incl. Tesseract OCR for bounding-box extraction)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-eng \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY backend/requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy backend code
|
||||
# Copy backend code (this is the only layer that changes on code edits)
|
||||
COPY backend/ ./
|
||||
|
||||
# Copy built frontend to the expected path
|
||||
|
||||
24
klausur-service/Dockerfile.base
Normal file
24
klausur-service/Dockerfile.base
Normal file
@@ -0,0 +1,24 @@
|
||||
# Base image with system dependencies + Python packages.
|
||||
# These change rarely — build once, reuse on every --no-cache.
|
||||
#
|
||||
# Rebuild manually when requirements.txt or system deps change:
|
||||
# docker build -f klausur-service/Dockerfile.base -t klausur-base:latest klausur-service/
|
||||
#
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# System dependencies (Tesseract OCR, curl for healthcheck)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-deu \
|
||||
tesseract-ocr-eng \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Python dependencies
|
||||
COPY backend/requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Clean up pip cache
|
||||
RUN rm -rf /root/.cache/pip
|
||||
File diff suppressed because it is too large
Load Diff
@@ -43,6 +43,7 @@ except ImportError:
|
||||
trocr_router = None
|
||||
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
||||
from ocr_pipeline_api import router as ocr_pipeline_router
|
||||
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||
try:
|
||||
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
||||
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
||||
@@ -76,6 +77,13 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as e:
|
||||
print(f"Warning: Vocab sessions database initialization failed: {e}")
|
||||
|
||||
# Initialize OCR Pipeline session tables
|
||||
try:
|
||||
await init_ocr_pipeline_tables()
|
||||
print("OCR Pipeline session tables initialized")
|
||||
except Exception as e:
|
||||
print(f"Warning: OCR Pipeline tables initialization failed: {e}")
|
||||
|
||||
# Initialize database pool for DSFA RAG
|
||||
dsfa_db_pool = None
|
||||
if DSFA_DATABASE_URL and set_dsfa_db_pool:
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
-- OCR Pipeline Sessions - Persistent session storage
|
||||
-- Applied automatically by ocr_pipeline_session_store.init_ocr_pipeline_tables()
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ocr_pipeline_sessions (
|
||||
id UUID PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
filename VARCHAR(255),
|
||||
status VARCHAR(50) DEFAULT 'active',
|
||||
current_step INT DEFAULT 1,
|
||||
original_png BYTEA,
|
||||
deskewed_png BYTEA,
|
||||
binarized_png BYTEA,
|
||||
dewarped_png BYTEA,
|
||||
deskew_result JSONB,
|
||||
dewarp_result JSONB,
|
||||
column_result JSONB,
|
||||
ground_truth JSONB DEFAULT '{}',
|
||||
auto_shear_degrees FLOAT,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for listing sessions
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_created
|
||||
ON ocr_pipeline_sessions (created_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ocr_pipeline_sessions_status
|
||||
ON ocr_pipeline_sessions (status);
|
||||
@@ -1,66 +1,104 @@
|
||||
"""
|
||||
OCR Pipeline API - Schrittweise Seitenrekonstruktion.
|
||||
|
||||
Zerlegt den OCR-Prozess in 6 einzelne Schritte:
|
||||
Zerlegt den OCR-Prozess in 7 einzelne Schritte:
|
||||
1. Deskewing - Scan begradigen
|
||||
2. Spaltenerkennung - Unsichtbare Spalten finden
|
||||
3. Worterkennung - OCR mit Bounding Boxes
|
||||
4. Koordinatenzuweisung - Exakte Positionen
|
||||
5. Seitenrekonstruktion - Seite nachbauen
|
||||
6. Ground Truth Validierung - Gesamtpruefung
|
||||
2. Dewarping - Buchwoelbung entzerren
|
||||
3. Spaltenerkennung - Unsichtbare Spalten finden
|
||||
4. Worterkennung - OCR mit Bounding Boxes
|
||||
5. Koordinatenzuweisung - Exakte Positionen
|
||||
6. Seitenrekonstruktion - Seite nachbauen
|
||||
7. Ground Truth Validierung - Gesamtpruefung
|
||||
|
||||
Lizenz: Apache 2.0
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
from dataclasses import asdict
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, File, HTTPException, UploadFile
|
||||
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
analyze_layout,
|
||||
analyze_layout_by_words,
|
||||
create_ocr_image,
|
||||
deskew_image,
|
||||
deskew_image_by_word_alignment,
|
||||
dewarp_image,
|
||||
dewarp_image_manual,
|
||||
render_image_high_res,
|
||||
render_pdf_high_res,
|
||||
)
|
||||
from ocr_pipeline_session_store import (
|
||||
create_session_db,
|
||||
delete_session_db,
|
||||
get_session_db,
|
||||
get_session_image,
|
||||
init_ocr_pipeline_tables,
|
||||
list_sessions_db,
|
||||
update_session_db,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-memory session store (24h TTL)
|
||||
# In-memory cache for active sessions (BGR numpy arrays for processing)
|
||||
# DB is source of truth, cache holds BGR arrays during active processing.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_sessions: Dict[str, Dict[str, Any]] = {}
|
||||
SESSION_TTL_HOURS = 24
|
||||
_cache: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
def _cleanup_expired():
|
||||
"""Remove sessions older than TTL."""
|
||||
cutoff = datetime.utcnow() - timedelta(hours=SESSION_TTL_HOURS)
|
||||
expired = [sid for sid, s in _sessions.items() if s.get("created_at", datetime.utcnow()) < cutoff]
|
||||
for sid in expired:
|
||||
del _sessions[sid]
|
||||
logger.info(f"OCR Pipeline: expired session {sid}")
|
||||
|
||||
|
||||
def _get_session(session_id: str) -> Dict[str, Any]:
|
||||
"""Get session or raise 404."""
|
||||
session = _sessions.get(session_id)
|
||||
async def _load_session_to_cache(session_id: str) -> Dict[str, Any]:
|
||||
"""Load session from DB into cache, decoding PNGs to BGR arrays."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
return session
|
||||
|
||||
if session_id in _cache:
|
||||
return _cache[session_id]
|
||||
|
||||
cache_entry: Dict[str, Any] = {
|
||||
"id": session_id,
|
||||
**session,
|
||||
"original_bgr": None,
|
||||
"deskewed_bgr": None,
|
||||
"dewarped_bgr": None,
|
||||
}
|
||||
|
||||
# Decode images from DB into BGR numpy arrays
|
||||
for img_type, bgr_key in [
|
||||
("original", "original_bgr"),
|
||||
("deskewed", "deskewed_bgr"),
|
||||
("dewarped", "dewarped_bgr"),
|
||||
]:
|
||||
png_data = await get_session_image(session_id, img_type)
|
||||
if png_data:
|
||||
arr = np.frombuffer(png_data, dtype=np.uint8)
|
||||
bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
cache_entry[bgr_key] = bgr
|
||||
|
||||
_cache[session_id] = cache_entry
|
||||
return cache_entry
|
||||
|
||||
|
||||
def _get_cached(session_id: str) -> Dict[str, Any]:
|
||||
"""Get from cache or raise 404."""
|
||||
entry = _cache.get(session_id)
|
||||
if not entry:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not in cache — reload first")
|
||||
return entry
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -77,15 +115,47 @@ class DeskewGroundTruthRequest(BaseModel):
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
class ManualDewarpRequest(BaseModel):
|
||||
shear_degrees: float
|
||||
|
||||
|
||||
class DewarpGroundTruthRequest(BaseModel):
|
||||
is_correct: bool
|
||||
corrected_shear: Optional[float] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
class RenameSessionRequest(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
class ManualColumnsRequest(BaseModel):
|
||||
columns: List[Dict[str, Any]]
|
||||
|
||||
|
||||
class ColumnGroundTruthRequest(BaseModel):
|
||||
is_correct: bool
|
||||
corrected_columns: Optional[List[Dict[str, Any]]] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# Session Management Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/sessions")
|
||||
async def list_sessions():
|
||||
"""List all OCR pipeline sessions."""
|
||||
sessions = await list_sessions_db()
|
||||
return {"sessions": sessions}
|
||||
|
||||
|
||||
@router.post("/sessions")
|
||||
async def create_session(file: UploadFile = File(...)):
|
||||
async def create_session(
|
||||
file: UploadFile = File(...),
|
||||
name: Optional[str] = Form(None),
|
||||
):
|
||||
"""Upload a PDF or image file and create a pipeline session."""
|
||||
_cleanup_expired()
|
||||
|
||||
file_data = await file.read()
|
||||
filename = file.filename or "upload"
|
||||
content_type = file.content_type or ""
|
||||
@@ -101,21 +171,32 @@ async def create_session(file: UploadFile = File(...)):
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Could not process file: {e}")
|
||||
|
||||
# Encode original as PNG bytes for serving
|
||||
# Encode original as PNG bytes
|
||||
success, png_buf = cv2.imencode(".png", img_bgr)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode image")
|
||||
|
||||
_sessions[session_id] = {
|
||||
original_png = png_buf.tobytes()
|
||||
session_name = name or filename
|
||||
|
||||
# Persist to DB
|
||||
await create_session_db(
|
||||
session_id=session_id,
|
||||
name=session_name,
|
||||
filename=filename,
|
||||
original_png=original_png,
|
||||
)
|
||||
|
||||
# Cache BGR array for immediate processing
|
||||
_cache[session_id] = {
|
||||
"id": session_id,
|
||||
"filename": filename,
|
||||
"created_at": datetime.utcnow(),
|
||||
"name": session_name,
|
||||
"original_bgr": img_bgr,
|
||||
"original_png": png_buf.tobytes(),
|
||||
"deskewed_bgr": None,
|
||||
"deskewed_png": None,
|
||||
"binarized_png": None,
|
||||
"dewarped_bgr": None,
|
||||
"deskew_result": None,
|
||||
"dewarp_result": None,
|
||||
"ground_truth": {},
|
||||
"current_step": 1,
|
||||
}
|
||||
@@ -126,17 +207,115 @@ async def create_session(file: UploadFile = File(...)):
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"filename": filename,
|
||||
"name": session_name,
|
||||
"image_width": img_bgr.shape[1],
|
||||
"image_height": img_bgr.shape[0],
|
||||
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}")
|
||||
async def get_session_info(session_id: str):
|
||||
"""Get session info including deskew/dewarp/column results for step navigation."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
# Get image dimensions from original PNG
|
||||
original_png = await get_session_image(session_id, "original")
|
||||
if original_png:
|
||||
arr = np.frombuffer(original_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
img_w, img_h = img.shape[1], img.shape[0] if img is not None else (0, 0)
|
||||
else:
|
||||
img_w, img_h = 0, 0
|
||||
|
||||
result = {
|
||||
"session_id": session["id"],
|
||||
"filename": session.get("filename", ""),
|
||||
"name": session.get("name", ""),
|
||||
"image_width": img_w,
|
||||
"image_height": img_h,
|
||||
"original_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/original",
|
||||
"current_step": session.get("current_step", 1),
|
||||
}
|
||||
|
||||
if session.get("deskew_result"):
|
||||
result["deskew_result"] = session["deskew_result"]
|
||||
if session.get("dewarp_result"):
|
||||
result["dewarp_result"] = session["dewarp_result"]
|
||||
if session.get("column_result"):
|
||||
result["column_result"] = session["column_result"]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@router.put("/sessions/{session_id}")
|
||||
async def rename_session(session_id: str, req: RenameSessionRequest):
|
||||
"""Rename a session."""
|
||||
updated = await update_session_db(session_id, name=req.name)
|
||||
if not updated:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
return {"session_id": session_id, "name": req.name}
|
||||
|
||||
|
||||
@router.delete("/sessions/{session_id}")
|
||||
async def delete_session(session_id: str):
|
||||
"""Delete a session."""
|
||||
_cache.pop(session_id, None)
|
||||
deleted = await delete_session_db(session_id)
|
||||
if not deleted:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
return {"session_id": session_id, "deleted": True}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||
async def get_image(session_id: str, image_type: str):
|
||||
"""Serve session images: original, deskewed, dewarped, binarized, or columns-overlay."""
|
||||
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay"}
|
||||
if image_type not in valid_types:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||
|
||||
if image_type == "columns-overlay":
|
||||
return await _get_columns_overlay(session_id)
|
||||
|
||||
# Try cache first for fast serving
|
||||
cached = _cache.get(session_id)
|
||||
if cached:
|
||||
png_key = f"{image_type}_png" if image_type != "original" else None
|
||||
bgr_key = f"{image_type}_bgr" if image_type != "binarized" else None
|
||||
|
||||
# For binarized, check if we have it cached as PNG
|
||||
if image_type == "binarized" and cached.get("binarized_png"):
|
||||
return Response(content=cached["binarized_png"], media_type="image/png")
|
||||
|
||||
# Load from DB
|
||||
data = await get_session_image(session_id, image_type)
|
||||
if not data:
|
||||
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
||||
|
||||
return Response(content=data, media_type="image/png")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Deskew Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/deskew")
|
||||
async def auto_deskew(session_id: str):
|
||||
"""Run both deskew methods and pick the best one."""
|
||||
session = _get_session(session_id)
|
||||
img_bgr = session["original_bgr"]
|
||||
# Ensure session is in cache
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("original_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Original image not available")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
@@ -159,12 +338,10 @@ async def auto_deskew(session_id: str):
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
# Pick method with larger detected angle (more correction needed = more skew found)
|
||||
# If both are ~0, prefer word alignment as it's more robust
|
||||
# Pick best method
|
||||
if abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
||||
method_used = "word_alignment"
|
||||
angle_applied = angle_wa
|
||||
# Decode word alignment result to BGR
|
||||
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
||||
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
|
||||
if deskewed_bgr is None:
|
||||
@@ -176,20 +353,19 @@ async def auto_deskew(session_id: str):
|
||||
angle_applied = angle_hough
|
||||
deskewed_bgr = deskewed_hough
|
||||
|
||||
# Encode deskewed as PNG
|
||||
# Encode as PNG
|
||||
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
deskewed_png = deskewed_png_buf.tobytes() if success else session["original_png"]
|
||||
deskewed_png = deskewed_png_buf.tobytes() if success else b""
|
||||
|
||||
# Create binarized version
|
||||
binarized_png = None
|
||||
try:
|
||||
binarized = create_ocr_image(deskewed_bgr)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception as e:
|
||||
logger.warning(f"Binarization failed: {e}")
|
||||
binarized_png = None
|
||||
|
||||
# Confidence: higher angle = lower confidence that we got it right
|
||||
confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
|
||||
|
||||
deskew_result = {
|
||||
@@ -201,13 +377,23 @@ async def auto_deskew(session_id: str):
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
session["deskewed_bgr"] = deskewed_bgr
|
||||
session["deskewed_png"] = deskewed_png
|
||||
session["binarized_png"] = binarized_png
|
||||
session["deskew_result"] = deskew_result
|
||||
# Update cache
|
||||
cached["deskewed_bgr"] = deskewed_bgr
|
||||
cached["binarized_png"] = binarized_png
|
||||
cached["deskew_result"] = deskew_result
|
||||
|
||||
# Persist to DB
|
||||
db_update = {
|
||||
"deskewed_png": deskewed_png,
|
||||
"deskew_result": deskew_result,
|
||||
"current_step": 2,
|
||||
}
|
||||
if binarized_png:
|
||||
db_update["binarized_png"] = binarized_png
|
||||
await update_session_db(session_id, **db_update)
|
||||
|
||||
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
||||
f"hough={angle_hough:.2f}° wa={angle_wa:.2f}° → {method_used} {angle_applied:.2f}°")
|
||||
f"hough={angle_hough:.2f} wa={angle_wa:.2f} -> {method_used} {angle_applied:.2f}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
@@ -220,8 +406,14 @@ async def auto_deskew(session_id: str):
|
||||
@router.post("/sessions/{session_id}/deskew/manual")
|
||||
async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
||||
"""Apply a manual rotation angle to the original image."""
|
||||
session = _get_session(session_id)
|
||||
img_bgr = session["original_bgr"]
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("original_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Original image not available")
|
||||
|
||||
angle = max(-5.0, min(5.0, req.angle))
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
@@ -232,26 +424,38 @@ async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
success, png_buf = cv2.imencode(".png", rotated)
|
||||
deskewed_png = png_buf.tobytes() if success else session["original_png"]
|
||||
deskewed_png = png_buf.tobytes() if success else b""
|
||||
|
||||
# Binarize
|
||||
binarized_png = None
|
||||
try:
|
||||
binarized = create_ocr_image(rotated)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception:
|
||||
binarized_png = None
|
||||
pass
|
||||
|
||||
session["deskewed_bgr"] = rotated
|
||||
session["deskewed_png"] = deskewed_png
|
||||
session["binarized_png"] = binarized_png
|
||||
session["deskew_result"] = {
|
||||
**(session.get("deskew_result") or {}),
|
||||
deskew_result = {
|
||||
**(cached.get("deskew_result") or {}),
|
||||
"angle_applied": round(angle, 3),
|
||||
"method_used": "manual",
|
||||
}
|
||||
|
||||
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}°")
|
||||
# Update cache
|
||||
cached["deskewed_bgr"] = rotated
|
||||
cached["binarized_png"] = binarized_png
|
||||
cached["deskew_result"] = deskew_result
|
||||
|
||||
# Persist to DB
|
||||
db_update = {
|
||||
"deskewed_png": deskewed_png,
|
||||
"deskew_result": deskew_result,
|
||||
}
|
||||
if binarized_png:
|
||||
db_update["binarized_png"] = binarized_png
|
||||
await update_session_db(session_id, **db_update)
|
||||
|
||||
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
@@ -261,31 +465,14 @@ async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||
async def get_image(session_id: str, image_type: str):
|
||||
"""Serve session images: original, deskewed, or binarized."""
|
||||
session = _get_session(session_id)
|
||||
|
||||
if image_type == "original":
|
||||
data = session.get("original_png")
|
||||
elif image_type == "deskewed":
|
||||
data = session.get("deskewed_png")
|
||||
elif image_type == "binarized":
|
||||
data = session.get("binarized_png")
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||
|
||||
if not data:
|
||||
raise HTTPException(status_code=404, detail=f"Image '{image_type}' not available yet")
|
||||
|
||||
return Response(content=data, media_type="image/png")
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/deskew")
|
||||
async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
|
||||
"""Save ground truth feedback for the deskew step."""
|
||||
session = _get_session(session_id)
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_angle": req.corrected_angle,
|
||||
@@ -293,9 +480,330 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"deskew_result": session.get("deskew_result"),
|
||||
}
|
||||
session["ground_truth"]["deskew"] = gt
|
||||
ground_truth["deskew"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
# Update cache
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
|
||||
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dewarp Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/dewarp")
|
||||
async def auto_dewarp(session_id: str):
|
||||
"""Detect and correct vertical shear on the deskewed image."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
deskewed_bgr = cached.get("deskewed_bgr")
|
||||
if deskewed_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||
|
||||
t0 = time.time()
|
||||
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||
duration = time.time() - t0
|
||||
|
||||
# Encode as PNG
|
||||
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||
dewarped_png = png_buf.tobytes() if success else b""
|
||||
|
||||
dewarp_result = {
|
||||
"method_used": dewarp_info["method"],
|
||||
"shear_degrees": dewarp_info["shear_degrees"],
|
||||
"confidence": dewarp_info["confidence"],
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["dewarped_bgr"] = dewarped_bgr
|
||||
cached["dewarp_result"] = dewarp_result
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
dewarped_png=dewarped_png,
|
||||
dewarp_result=dewarp_result,
|
||||
auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
|
||||
current_step=3,
|
||||
)
|
||||
|
||||
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
||||
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
|
||||
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**dewarp_result,
|
||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/dewarp/manual")
|
||||
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
||||
"""Apply shear correction with a manual angle."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
deskewed_bgr = cached.get("deskewed_bgr")
|
||||
if deskewed_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||
|
||||
shear_deg = max(-2.0, min(2.0, req.shear_degrees))
|
||||
|
||||
if abs(shear_deg) < 0.001:
|
||||
dewarped_bgr = deskewed_bgr
|
||||
else:
|
||||
dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
|
||||
|
||||
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||
dewarped_png = png_buf.tobytes() if success else b""
|
||||
|
||||
dewarp_result = {
|
||||
**(cached.get("dewarp_result") or {}),
|
||||
"method_used": "manual",
|
||||
"shear_degrees": round(shear_deg, 3),
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["dewarped_bgr"] = dewarped_bgr
|
||||
cached["dewarp_result"] = dewarp_result
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
dewarped_png=dewarped_png,
|
||||
dewarp_result=dewarp_result,
|
||||
)
|
||||
|
||||
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"shear_degrees": round(shear_deg, 3),
|
||||
"method_used": "manual",
|
||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/dewarp")
|
||||
async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
|
||||
"""Save ground truth feedback for the dewarp step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_shear": req.corrected_shear,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"dewarp_result": session.get("dewarp_result"),
|
||||
}
|
||||
ground_truth["dewarp"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
||||
f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Column Detection Endpoints (Step 3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/columns")
|
||||
async def detect_columns(session_id: str):
|
||||
"""Run column detection on the dewarped image."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
dewarped_bgr = cached.get("dewarped_bgr")
|
||||
if dewarped_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Dewarp must be completed before column detection")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# Binarized image for layout analysis
|
||||
ocr_img = create_ocr_image(dewarped_bgr)
|
||||
|
||||
# Word-based detection (with automatic fallback to projection profiles)
|
||||
regions = analyze_layout_by_words(ocr_img, dewarped_bgr)
|
||||
duration = time.time() - t0
|
||||
|
||||
columns = [asdict(r) for r in regions]
|
||||
|
||||
# Determine classification methods used
|
||||
methods = list(set(
|
||||
c.get("classification_method", "") for c in columns
|
||||
if c.get("classification_method")
|
||||
))
|
||||
|
||||
column_result = {
|
||||
"columns": columns,
|
||||
"classification_methods": methods,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
column_result=column_result,
|
||||
current_step=3,
|
||||
)
|
||||
|
||||
# Update cache
|
||||
cached["column_result"] = column_result
|
||||
|
||||
col_count = len([c for c in columns if c["type"].startswith("column")])
|
||||
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
||||
f"{col_count} columns detected ({duration:.2f}s)")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**column_result,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/columns/manual")
|
||||
async def set_manual_columns(session_id: str, req: ManualColumnsRequest):
|
||||
"""Override detected columns with manual definitions."""
|
||||
column_result = {
|
||||
"columns": req.columns,
|
||||
"duration_seconds": 0,
|
||||
"method": "manual",
|
||||
}
|
||||
|
||||
await update_session_db(session_id, column_result=column_result)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["column_result"] = column_result
|
||||
|
||||
logger.info(f"OCR Pipeline: manual columns session {session_id}: "
|
||||
f"{len(req.columns)} columns set")
|
||||
|
||||
return {"session_id": session_id, **column_result}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/columns")
|
||||
async def save_column_ground_truth(session_id: str, req: ColumnGroundTruthRequest):
|
||||
"""Save ground truth feedback for the column detection step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_columns": req.corrected_columns,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"column_result": session.get("column_result"),
|
||||
}
|
||||
ground_truth["columns"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/ground-truth/columns")
|
||||
async def get_column_ground_truth(session_id: str):
|
||||
"""Retrieve saved ground truth for column detection, including auto vs GT diff."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
columns_gt = ground_truth.get("columns")
|
||||
if not columns_gt:
|
||||
raise HTTPException(status_code=404, detail="No column ground truth saved")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"columns_gt": columns_gt,
|
||||
"columns_auto": session.get("column_result"),
|
||||
}
|
||||
|
||||
|
||||
async def _get_columns_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with column borders drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
column_result = session.get("column_result")
|
||||
if not column_result or not column_result.get("columns"):
|
||||
raise HTTPException(status_code=404, detail="No column data available")
|
||||
|
||||
# Load dewarped image
|
||||
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||
if not dewarped_png:
|
||||
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||
|
||||
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||
|
||||
# Color map for region types (BGR)
|
||||
colors = {
|
||||
"column_en": (255, 180, 0), # Blue
|
||||
"column_de": (0, 200, 0), # Green
|
||||
"column_example": (0, 140, 255), # Orange
|
||||
"column_text": (200, 200, 0), # Cyan/Turquoise
|
||||
"page_ref": (200, 0, 200), # Purple
|
||||
"column_marker": (0, 0, 220), # Red
|
||||
"column_ignore": (180, 180, 180), # Light Gray
|
||||
"header": (128, 128, 128), # Gray
|
||||
"footer": (128, 128, 128), # Gray
|
||||
}
|
||||
|
||||
overlay = img.copy()
|
||||
for col in column_result["columns"]:
|
||||
x, y = col["x"], col["y"]
|
||||
w, h = col["width"], col["height"]
|
||||
color = colors.get(col.get("type", ""), (200, 200, 200))
|
||||
|
||||
# Semi-transparent fill
|
||||
cv2.rectangle(overlay, (x, y), (x + w, y + h), color, -1)
|
||||
|
||||
# Solid border
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
||||
|
||||
# Label with confidence
|
||||
label = col.get("type", "unknown").replace("column_", "").upper()
|
||||
conf = col.get("classification_confidence")
|
||||
if conf is not None and conf < 1.0:
|
||||
label = f"{label} {int(conf * 100)}%"
|
||||
cv2.putText(img, label, (x + 10, y + 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
|
||||
|
||||
# Blend overlay at 20% opacity
|
||||
cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img)
|
||||
|
||||
success, result_png = cv2.imencode(".png", img)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||
|
||||
return Response(content=result_png.tobytes(), media_type="image/png")
|
||||
|
||||
228
klausur-service/backend/ocr_pipeline_session_store.py
Normal file
228
klausur-service/backend/ocr_pipeline_session_store.py
Normal file
@@ -0,0 +1,228 @@
|
||||
"""
|
||||
OCR Pipeline Session Store - PostgreSQL persistence for OCR pipeline sessions.
|
||||
|
||||
Replaces in-memory storage with database persistence.
|
||||
See migrations/002_ocr_pipeline_sessions.sql for schema.
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import logging
|
||||
import json
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Database configuration (same as vocab_session_store)
|
||||
DATABASE_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot@postgres:5432/breakpilot_db"
|
||||
)
|
||||
|
||||
# Connection pool (initialized lazily)
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_pool() -> asyncpg.Pool:
|
||||
"""Get or create the database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
async def init_ocr_pipeline_tables():
|
||||
"""Initialize OCR pipeline tables if they don't exist."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
tables_exist = await conn.fetchval("""
|
||||
SELECT EXISTS (
|
||||
SELECT FROM information_schema.tables
|
||||
WHERE table_name = 'ocr_pipeline_sessions'
|
||||
)
|
||||
""")
|
||||
|
||||
if not tables_exist:
|
||||
logger.info("Creating OCR pipeline tables...")
|
||||
migration_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"migrations/002_ocr_pipeline_sessions.sql"
|
||||
)
|
||||
if os.path.exists(migration_path):
|
||||
with open(migration_path, "r") as f:
|
||||
sql = f.read()
|
||||
await conn.execute(sql)
|
||||
logger.info("OCR pipeline tables created successfully")
|
||||
else:
|
||||
logger.warning(f"Migration file not found: {migration_path}")
|
||||
else:
|
||||
logger.debug("OCR pipeline tables already exist")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SESSION CRUD
|
||||
# =============================================================================
|
||||
|
||||
async def create_session_db(
|
||||
session_id: str,
|
||||
name: str,
|
||||
filename: str,
|
||||
original_png: bytes,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a new OCR pipeline session."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO ocr_pipeline_sessions (
|
||||
id, name, filename, original_png, status, current_step
|
||||
) VALUES ($1, $2, $3, $4, 'active', 1)
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
deskew_result, dewarp_result, column_result,
|
||||
ground_truth, auto_shear_degrees,
|
||||
created_at, updated_at
|
||||
""", uuid.UUID(session_id), name, filename, original_png)
|
||||
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get session metadata (without images)."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT id, name, filename, status, current_step,
|
||||
deskew_result, dewarp_result, column_result,
|
||||
ground_truth, auto_shear_degrees,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions WHERE id = $1
|
||||
""", uuid.UUID(session_id))
|
||||
|
||||
if row:
|
||||
return _row_to_dict(row)
|
||||
return None
|
||||
|
||||
|
||||
async def get_session_image(session_id: str, image_type: str) -> Optional[bytes]:
|
||||
"""Load a single image (BYTEA) from the session."""
|
||||
column_map = {
|
||||
"original": "original_png",
|
||||
"deskewed": "deskewed_png",
|
||||
"binarized": "binarized_png",
|
||||
"dewarped": "dewarped_png",
|
||||
}
|
||||
column = column_map.get(image_type)
|
||||
if not column:
|
||||
return None
|
||||
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
return await conn.fetchval(
|
||||
f"SELECT {column} FROM ocr_pipeline_sessions WHERE id = $1",
|
||||
uuid.UUID(session_id)
|
||||
)
|
||||
|
||||
|
||||
async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any]]:
|
||||
"""Update session fields dynamically."""
|
||||
pool = await get_pool()
|
||||
|
||||
fields = []
|
||||
values = []
|
||||
param_idx = 1
|
||||
|
||||
allowed_fields = {
|
||||
'name', 'filename', 'status', 'current_step',
|
||||
'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
|
||||
'deskew_result', 'dewarp_result', 'column_result',
|
||||
'ground_truth', 'auto_shear_degrees',
|
||||
}
|
||||
|
||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'ground_truth'}
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if key in allowed_fields:
|
||||
fields.append(f"{key} = ${param_idx}")
|
||||
if key in jsonb_fields and value is not None and not isinstance(value, str):
|
||||
value = json.dumps(value)
|
||||
values.append(value)
|
||||
param_idx += 1
|
||||
|
||||
if not fields:
|
||||
return await get_session_db(session_id)
|
||||
|
||||
# Always update updated_at
|
||||
fields.append(f"updated_at = NOW()")
|
||||
|
||||
values.append(uuid.UUID(session_id))
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(f"""
|
||||
UPDATE ocr_pipeline_sessions
|
||||
SET {', '.join(fields)}
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
deskew_result, dewarp_result, column_result,
|
||||
ground_truth, auto_shear_degrees,
|
||||
created_at, updated_at
|
||||
""", *values)
|
||||
|
||||
if row:
|
||||
return _row_to_dict(row)
|
||||
return None
|
||||
|
||||
|
||||
async def list_sessions_db(limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""List all sessions (metadata only, no images)."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, filename, status, current_step,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1
|
||||
""", limit)
|
||||
|
||||
return [_row_to_dict(row) for row in rows]
|
||||
|
||||
|
||||
async def delete_session_db(session_id: str) -> bool:
|
||||
"""Delete a session."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute("""
|
||||
DELETE FROM ocr_pipeline_sessions WHERE id = $1
|
||||
""", uuid.UUID(session_id))
|
||||
return result == "DELETE 1"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HELPER
|
||||
# =============================================================================
|
||||
|
||||
def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||
"""Convert asyncpg Record to JSON-serializable dict."""
|
||||
if row is None:
|
||||
return {}
|
||||
|
||||
result = dict(row)
|
||||
|
||||
# UUID → string
|
||||
for key in ['id', 'session_id']:
|
||||
if key in result and result[key] is not None:
|
||||
result[key] = str(result[key])
|
||||
|
||||
# datetime → ISO string
|
||||
for key in ['created_at', 'updated_at']:
|
||||
if key in result and result[key] is not None:
|
||||
result[key] = result[key].isoformat()
|
||||
|
||||
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'ground_truth']:
|
||||
if key in result and result[key] is not None:
|
||||
if isinstance(result[key], str):
|
||||
result[key] = json.loads(result[key])
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user