[split-required] Split 58 monoliths across Python, Go, TypeScript (Phases 1-3)
Phase 1 — Python (klausur-service): 5 monoliths → 36 files - dsfa_corpus_ingestion.py (1,828 LOC → 5 files) - cv_ocr_engines.py (2,102 LOC → 7 files) - cv_layout.py (3,653 LOC → 10 files) - vocab_worksheet_api.py (2,783 LOC → 8 files) - grid_build_core.py (1,958 LOC → 6 files) Phase 2 — Go (edu-search-service, school-service): 8 monoliths → 19 files - staff_crawler.go (1,402 → 4), policy/store.go (1,168 → 3) - policy_handlers.go (700 → 2), repository.go (684 → 2) - search.go (592 → 2), ai_extraction_handlers.go (554 → 2) - seed_data.go (591 → 2), grade_service.go (646 → 2) Phase 3 — TypeScript (admin-lehrer): 45 monoliths → 220+ files - sdk/types.ts (2,108 → 16 domain files) - ai/rag/page.tsx (2,686 → 14 files) - 22 page.tsx files split into _components/ + _hooks/ - 11 component files split into sub-components - 10 SDK data catalogs added to loc-exceptions - Deleted dead backup index_original.ts (4,899 LOC) All original public APIs preserved via re-export facades. Zero new errors: Python imports verified, Go builds clean, TypeScript tsc --noEmit shows only pre-existing errors. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
201
admin-lehrer/components/ocr-pipeline/useReconstructionData.ts
Normal file
201
admin-lehrer/components/ocr-pipeline/useReconstructionData.ts
Normal file
@@ -0,0 +1,201 @@
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { GridResult, GridCell, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-kombi/types'
|
||||
import type { EditableCell, EditorMode, PageRegion, RowItem, PageZone, ReconstructionStatus } from './StepReconstructionTypes'
|
||||
import { KLAUSUR_API } from './StepReconstructionTypes'
|
||||
|
||||
interface ReconstructionData {
|
||||
status: ReconstructionStatus
|
||||
setStatus: (s: ReconstructionStatus) => void
|
||||
error: string
|
||||
setError: (e: string) => void
|
||||
cells: EditableCell[]
|
||||
setCells: (cells: EditableCell[]) => void
|
||||
gridCells: GridCell[]
|
||||
editorMode: EditorMode
|
||||
setEditorMode: (mode: EditorMode) => void
|
||||
isParentWithBoxes: boolean
|
||||
mergedGridCells: GridCell[]
|
||||
parentColumns: PageRegion[]
|
||||
parentRows: RowItem[]
|
||||
parentZones: PageZone[]
|
||||
imageNaturalSize: { w: number; h: number } | null
|
||||
setImageNaturalSize: (size: { w: number; h: number } | null) => void
|
||||
structureBoxes: StructureBox[]
|
||||
structureGraphics: StructureGraphic[]
|
||||
imageRotation: 0 | 180
|
||||
setImageRotation: (fn: (r: 0 | 180) => 0 | 180) => void
|
||||
loadSessionData: () => void
|
||||
}
|
||||
|
||||
function gridCellToEditable(c: GridCell): EditableCell {
|
||||
return {
|
||||
cellId: c.cell_id,
|
||||
text: c.text,
|
||||
originalText: c.text,
|
||||
bboxPct: c.bbox_pct,
|
||||
colType: c.col_type,
|
||||
rowIndex: c.row_index,
|
||||
colIndex: c.col_index,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads reconstruction session data from the API, including sub-session
|
||||
* merging for parent sessions with box zones.
|
||||
*/
|
||||
export function useReconstructionData(
|
||||
sessionId: string | null,
|
||||
onResetEditing: () => void,
|
||||
): ReconstructionData {
|
||||
const [status, setStatus] = useState<ReconstructionStatus>('loading')
|
||||
const [error, setError] = useState('')
|
||||
const [cells, setCells] = useState<EditableCell[]>([])
|
||||
const [gridCells, setGridCells] = useState<GridCell[]>([])
|
||||
const [editorMode, setEditorMode] = useState<EditorMode>('simple')
|
||||
|
||||
const [isParentWithBoxes, setIsParentWithBoxes] = useState(false)
|
||||
const [mergedGridCells, setMergedGridCells] = useState<GridCell[]>([])
|
||||
const [parentColumns, setParentColumns] = useState<PageRegion[]>([])
|
||||
const [parentRows, setParentRows] = useState<RowItem[]>([])
|
||||
const [parentZones, setParentZones] = useState<PageZone[]>([])
|
||||
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
||||
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
|
||||
const [structureBoxes, setStructureBoxes] = useState<StructureBox[]>([])
|
||||
const [structureGraphics, setStructureGraphics] = useState<StructureGraphic[]>([])
|
||||
|
||||
const loadSessionData = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('loading')
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const data = await res.json()
|
||||
|
||||
const wordResult: GridResult | undefined = data.word_result
|
||||
if (!wordResult) {
|
||||
setError('Keine Worterkennungsdaten gefunden. Bitte zuerst Schritt 5 abschliessen.')
|
||||
setStatus('error')
|
||||
return
|
||||
}
|
||||
|
||||
const rawGridCells: GridCell[] = wordResult.cells || []
|
||||
setGridCells(rawGridCells)
|
||||
const allEditableCells = rawGridCells.map(gridCellToEditable)
|
||||
|
||||
setCells(allEditableCells)
|
||||
onResetEditing()
|
||||
|
||||
// Load structure result (boxes, graphics, colors)
|
||||
const structureResult: StructureResult | undefined = data.structure_result
|
||||
if (structureResult) {
|
||||
setStructureBoxes(structureResult.boxes || [])
|
||||
setStructureGraphics(structureResult.graphics || [])
|
||||
}
|
||||
|
||||
// Check for parent with boxes (sub-sessions + zones)
|
||||
const columnResult = data.column_result as { columns?: PageRegion[]; zones?: PageZone[] } | undefined
|
||||
const rowResult = data.row_result as { rows?: RowItem[] } | undefined
|
||||
const subSessions: { id: string; box_index: number }[] = data.sub_sessions || []
|
||||
const zones: PageZone[] = columnResult?.zones || []
|
||||
const hasBoxes = subSessions.length > 0 && zones.some(z => z.zone_type === 'box')
|
||||
|
||||
setIsParentWithBoxes(hasBoxes)
|
||||
if (hasBoxes) setImageRotation(() => 180)
|
||||
|
||||
if (columnResult?.columns) setParentColumns(columnResult.columns)
|
||||
if (rowResult?.rows) setParentRows(rowResult.rows)
|
||||
if (zones.length > 0) setParentZones(zones)
|
||||
|
||||
if (wordResult.image_width && wordResult.image_height) {
|
||||
setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
|
||||
}
|
||||
|
||||
if (hasBoxes) {
|
||||
setEditorMode('overlay')
|
||||
await loadMergedCells(rawGridCells, subSessions, zones, wordResult)
|
||||
} else {
|
||||
setMergedGridCells(rawGridCells)
|
||||
}
|
||||
|
||||
setStatus('ready')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
setStatus('error')
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
/** Load sub-session cells and merge into parent coordinate space */
|
||||
const loadMergedCells = async (
|
||||
rawGridCells: GridCell[],
|
||||
subSessions: { id: string; box_index: number }[],
|
||||
zones: PageZone[],
|
||||
wordResult: GridResult,
|
||||
) => {
|
||||
const imgW = wordResult.image_width || 1
|
||||
const imgH = wordResult.image_height || 1
|
||||
const allMergedCells: GridCell[] = [...rawGridCells]
|
||||
|
||||
for (const sub of subSessions) {
|
||||
try {
|
||||
const subRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sub.id}`)
|
||||
if (!subRes.ok) continue
|
||||
const subData = await subRes.json()
|
||||
const subWordResult: GridResult | undefined = subData.word_result
|
||||
if (!subWordResult?.cells) continue
|
||||
|
||||
const boxZone = zones.find(z => z.zone_type === 'box')
|
||||
if (!boxZone?.box) continue
|
||||
|
||||
const box = boxZone.box
|
||||
const boxXPct = (box.x / imgW) * 100
|
||||
const boxYPct = (box.y / imgH) * 100
|
||||
const boxWPct = (box.width / imgW) * 100
|
||||
const boxHPct = (box.height / imgH) * 100
|
||||
|
||||
for (const subCell of subWordResult.cells) {
|
||||
if (!subCell.bbox_pct) continue
|
||||
const parentCellX = boxXPct + (subCell.bbox_pct.x / 100) * boxWPct
|
||||
const parentCellY = boxYPct + (subCell.bbox_pct.y / 100) * boxHPct
|
||||
const parentCellW = (subCell.bbox_pct.w / 100) * boxWPct
|
||||
const parentCellH = (subCell.bbox_pct.h / 100) * boxHPct
|
||||
|
||||
allMergedCells.push({
|
||||
...subCell,
|
||||
cell_id: `sub_${sub.id}_${subCell.cell_id}`,
|
||||
bbox_pct: { x: parentCellX, y: parentCellY, w: parentCellW, h: parentCellH },
|
||||
bbox_px: {
|
||||
x: Math.round(parentCellX / 100 * imgW),
|
||||
y: Math.round(parentCellY / 100 * imgH),
|
||||
w: Math.round(parentCellW / 100 * imgW),
|
||||
h: Math.round(parentCellH / 100 * imgH),
|
||||
},
|
||||
})
|
||||
}
|
||||
} catch {
|
||||
// Skip failing sub-sessions
|
||||
}
|
||||
}
|
||||
|
||||
setMergedGridCells(allMergedCells)
|
||||
setCells(allMergedCells.map(gridCellToEditable))
|
||||
}
|
||||
|
||||
// Load session data on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
loadSessionData()
|
||||
}, [sessionId, loadSessionData])
|
||||
|
||||
return {
|
||||
status, setStatus, error, setError,
|
||||
cells, setCells, gridCells,
|
||||
editorMode, setEditorMode,
|
||||
isParentWithBoxes, mergedGridCells,
|
||||
parentColumns, parentRows, parentZones,
|
||||
imageNaturalSize, setImageNaturalSize,
|
||||
structureBoxes, structureGraphics,
|
||||
imageRotation, setImageRotation,
|
||||
loadSessionData,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user