Phase 1 — Python (klausur-service): 5 monoliths → 36 files - dsfa_corpus_ingestion.py (1,828 LOC → 5 files) - cv_ocr_engines.py (2,102 LOC → 7 files) - cv_layout.py (3,653 LOC → 10 files) - vocab_worksheet_api.py (2,783 LOC → 8 files) - grid_build_core.py (1,958 LOC → 6 files) Phase 2 — Go (edu-search-service, school-service): 8 monoliths → 19 files - staff_crawler.go (1,402 → 4), policy/store.go (1,168 → 3) - policy_handlers.go (700 → 2), repository.go (684 → 2) - search.go (592 → 2), ai_extraction_handlers.go (554 → 2) - seed_data.go (591 → 2), grade_service.go (646 → 2) Phase 3 — TypeScript (admin-lehrer): 45 monoliths → 220+ files - sdk/types.ts (2,108 → 16 domain files) - ai/rag/page.tsx (2,686 → 14 files) - 22 page.tsx files split into _components/ + _hooks/ - 11 component files split into sub-components - 10 SDK data catalogs added to loc-exceptions - Deleted dead backup index_original.ts (4,899 LOC) All original public APIs preserved via re-export facades. Zero new errors: Python imports verified, Go builds clean, TypeScript tsc --noEmit shows only pre-existing errors. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
202 lines
7.4 KiB
TypeScript
202 lines
7.4 KiB
TypeScript
import { useCallback, useEffect, useState } from 'react'
|
|
import type { GridResult, GridCell, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-kombi/types'
|
|
import type { EditableCell, EditorMode, PageRegion, RowItem, PageZone, ReconstructionStatus } from './StepReconstructionTypes'
|
|
import { KLAUSUR_API } from './StepReconstructionTypes'
|
|
|
|
interface ReconstructionData {
|
|
status: ReconstructionStatus
|
|
setStatus: (s: ReconstructionStatus) => void
|
|
error: string
|
|
setError: (e: string) => void
|
|
cells: EditableCell[]
|
|
setCells: (cells: EditableCell[]) => void
|
|
gridCells: GridCell[]
|
|
editorMode: EditorMode
|
|
setEditorMode: (mode: EditorMode) => void
|
|
isParentWithBoxes: boolean
|
|
mergedGridCells: GridCell[]
|
|
parentColumns: PageRegion[]
|
|
parentRows: RowItem[]
|
|
parentZones: PageZone[]
|
|
imageNaturalSize: { w: number; h: number } | null
|
|
setImageNaturalSize: (size: { w: number; h: number } | null) => void
|
|
structureBoxes: StructureBox[]
|
|
structureGraphics: StructureGraphic[]
|
|
imageRotation: 0 | 180
|
|
setImageRotation: (fn: (r: 0 | 180) => 0 | 180) => void
|
|
loadSessionData: () => void
|
|
}
|
|
|
|
function gridCellToEditable(c: GridCell): EditableCell {
|
|
return {
|
|
cellId: c.cell_id,
|
|
text: c.text,
|
|
originalText: c.text,
|
|
bboxPct: c.bbox_pct,
|
|
colType: c.col_type,
|
|
rowIndex: c.row_index,
|
|
colIndex: c.col_index,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Loads reconstruction session data from the API, including sub-session
|
|
* merging for parent sessions with box zones.
|
|
*/
|
|
export function useReconstructionData(
|
|
sessionId: string | null,
|
|
onResetEditing: () => void,
|
|
): ReconstructionData {
|
|
const [status, setStatus] = useState<ReconstructionStatus>('loading')
|
|
const [error, setError] = useState('')
|
|
const [cells, setCells] = useState<EditableCell[]>([])
|
|
const [gridCells, setGridCells] = useState<GridCell[]>([])
|
|
const [editorMode, setEditorMode] = useState<EditorMode>('simple')
|
|
|
|
const [isParentWithBoxes, setIsParentWithBoxes] = useState(false)
|
|
const [mergedGridCells, setMergedGridCells] = useState<GridCell[]>([])
|
|
const [parentColumns, setParentColumns] = useState<PageRegion[]>([])
|
|
const [parentRows, setParentRows] = useState<RowItem[]>([])
|
|
const [parentZones, setParentZones] = useState<PageZone[]>([])
|
|
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
|
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
|
|
const [structureBoxes, setStructureBoxes] = useState<StructureBox[]>([])
|
|
const [structureGraphics, setStructureGraphics] = useState<StructureGraphic[]>([])
|
|
|
|
const loadSessionData = useCallback(async () => {
|
|
if (!sessionId) return
|
|
setStatus('loading')
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
|
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
|
const data = await res.json()
|
|
|
|
const wordResult: GridResult | undefined = data.word_result
|
|
if (!wordResult) {
|
|
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst Schritt 5 abschliessen.')
|
|
setStatus('error')
|
|
return
|
|
}
|
|
|
|
const rawGridCells: GridCell[] = wordResult.cells || []
|
|
setGridCells(rawGridCells)
|
|
const allEditableCells = rawGridCells.map(gridCellToEditable)
|
|
|
|
setCells(allEditableCells)
|
|
onResetEditing()
|
|
|
|
// Load structure result (boxes, graphics, colors)
|
|
const structureResult: StructureResult | undefined = data.structure_result
|
|
if (structureResult) {
|
|
setStructureBoxes(structureResult.boxes || [])
|
|
setStructureGraphics(structureResult.graphics || [])
|
|
}
|
|
|
|
// Check for parent with boxes (sub-sessions + zones)
|
|
const columnResult = data.column_result as { columns?: PageRegion[]; zones?: PageZone[] } | undefined
|
|
const rowResult = data.row_result as { rows?: RowItem[] } | undefined
|
|
const subSessions: { id: string; box_index: number }[] = data.sub_sessions || []
|
|
const zones: PageZone[] = columnResult?.zones || []
|
|
const hasBoxes = subSessions.length > 0 && zones.some(z => z.zone_type === 'box')
|
|
|
|
setIsParentWithBoxes(hasBoxes)
|
|
if (hasBoxes) setImageRotation(() => 180)
|
|
|
|
if (columnResult?.columns) setParentColumns(columnResult.columns)
|
|
if (rowResult?.rows) setParentRows(rowResult.rows)
|
|
if (zones.length > 0) setParentZones(zones)
|
|
|
|
if (wordResult.image_width && wordResult.image_height) {
|
|
setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
|
|
}
|
|
|
|
if (hasBoxes) {
|
|
setEditorMode('overlay')
|
|
await loadMergedCells(rawGridCells, subSessions, zones, wordResult)
|
|
} else {
|
|
setMergedGridCells(rawGridCells)
|
|
}
|
|
|
|
setStatus('ready')
|
|
} catch (e: unknown) {
|
|
setError(e instanceof Error ? e.message : String(e))
|
|
setStatus('error')
|
|
}
|
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
}, [sessionId])
|
|
|
|
/** Load sub-session cells and merge into parent coordinate space */
|
|
const loadMergedCells = async (
|
|
rawGridCells: GridCell[],
|
|
subSessions: { id: string; box_index: number }[],
|
|
zones: PageZone[],
|
|
wordResult: GridResult,
|
|
) => {
|
|
const imgW = wordResult.image_width || 1
|
|
const imgH = wordResult.image_height || 1
|
|
const allMergedCells: GridCell[] = [...rawGridCells]
|
|
|
|
for (const sub of subSessions) {
|
|
try {
|
|
const subRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sub.id}`)
|
|
if (!subRes.ok) continue
|
|
const subData = await subRes.json()
|
|
const subWordResult: GridResult | undefined = subData.word_result
|
|
if (!subWordResult?.cells) continue
|
|
|
|
const boxZone = zones.find(z => z.zone_type === 'box')
|
|
if (!boxZone?.box) continue
|
|
|
|
const box = boxZone.box
|
|
const boxXPct = (box.x / imgW) * 100
|
|
const boxYPct = (box.y / imgH) * 100
|
|
const boxWPct = (box.width / imgW) * 100
|
|
const boxHPct = (box.height / imgH) * 100
|
|
|
|
for (const subCell of subWordResult.cells) {
|
|
if (!subCell.bbox_pct) continue
|
|
const parentCellX = boxXPct + (subCell.bbox_pct.x / 100) * boxWPct
|
|
const parentCellY = boxYPct + (subCell.bbox_pct.y / 100) * boxHPct
|
|
const parentCellW = (subCell.bbox_pct.w / 100) * boxWPct
|
|
const parentCellH = (subCell.bbox_pct.h / 100) * boxHPct
|
|
|
|
allMergedCells.push({
|
|
...subCell,
|
|
cell_id: `sub_${sub.id}_${subCell.cell_id}`,
|
|
bbox_pct: { x: parentCellX, y: parentCellY, w: parentCellW, h: parentCellH },
|
|
bbox_px: {
|
|
x: Math.round(parentCellX / 100 * imgW),
|
|
y: Math.round(parentCellY / 100 * imgH),
|
|
w: Math.round(parentCellW / 100 * imgW),
|
|
h: Math.round(parentCellH / 100 * imgH),
|
|
},
|
|
})
|
|
}
|
|
} catch {
|
|
// Skip failing sub-sessions
|
|
}
|
|
}
|
|
|
|
setMergedGridCells(allMergedCells)
|
|
setCells(allMergedCells.map(gridCellToEditable))
|
|
}
|
|
|
|
// Load session data on mount
|
|
useEffect(() => {
|
|
if (!sessionId) return
|
|
loadSessionData()
|
|
}, [sessionId, loadSessionData])
|
|
|
|
return {
|
|
status, setStatus, error, setError,
|
|
cells, setCells, gridCells,
|
|
editorMode, setEditorMode,
|
|
isParentWithBoxes, mergedGridCells,
|
|
parentColumns, parentRows, parentZones,
|
|
imageNaturalSize, setImageNaturalSize,
|
|
structureBoxes, structureGraphics,
|
|
imageRotation, setImageRotation,
|
|
loadSessionData,
|
|
}
|
|
}
|