[split-required] Split 58 monoliths across Python, Go, TypeScript (Phases 1-3)

Phase 1 — Python (klausur-service): 5 monoliths → 36 files
- dsfa_corpus_ingestion.py (1,828 LOC → 5 files)
- cv_ocr_engines.py (2,102 LOC → 7 files)
- cv_layout.py (3,653 LOC → 10 files)
- vocab_worksheet_api.py (2,783 LOC → 8 files)
- grid_build_core.py (1,958 LOC → 6 files)

Phase 2 — Go (edu-search-service, school-service): 8 monoliths → 19 files
- staff_crawler.go (1,402 → 4), policy/store.go (1,168 → 3)
- policy_handlers.go (700 → 2), repository.go (684 → 2)
- search.go (592 → 2), ai_extraction_handlers.go (554 → 2)
- seed_data.go (591 → 2), grade_service.go (646 → 2)

Phase 3 — TypeScript (admin-lehrer): 45 monoliths → 220+ files
- sdk/types.ts (2,108 → 16 domain files)
- ai/rag/page.tsx (2,686 → 14 files)
- 22 page.tsx files split into _components/ + _hooks/
- 11 component files split into sub-components
- 10 SDK data catalogs added to loc-exceptions
- Deleted dead backup index_original.ts (4,899 LOC)

All original public APIs preserved via re-export facades.
Zero new errors: Python imports verified, Go builds clean,
TypeScript tsc --noEmit shows only pre-existing errors.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-24 17:28:57 +02:00
parent 9ba420fa91
commit b681ddb131
251 changed files with 30016 additions and 25037 deletions

View File

@@ -1,66 +1,14 @@
'use client'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-kombi/types'
import { usePixelWordPositions } from './usePixelWordPositions'
const KLAUSUR_API = '/klausur-api'
interface LlmChange {
row_index: number
field: 'english' | 'german' | 'example'
old: string
new: string
}
interface StepLlmReviewProps {
sessionId: string | null
onNext: () => void
}
interface ReviewMeta {
total_entries: number
to_review: number
skipped: number
model: string
skipped_indices?: number[]
}
interface StreamProgress {
current: number
total: number
}
const FIELD_LABELS: Record<string, string> = {
english: 'EN',
german: 'DE',
example: 'Beispiel',
source_page: 'Seite',
marker: 'Marker',
text: 'Text',
}
/** Map column type to WordEntry field name */
const COL_TYPE_TO_FIELD: Record<string, string> = {
column_en: 'english',
column_de: 'german',
column_example: 'example',
page_ref: 'source_page',
column_marker: 'marker',
column_text: 'text',
}
/** Column type → color class */
const COL_TYPE_COLOR: Record<string, string> = {
column_en: 'text-blue-600 dark:text-blue-400',
column_de: 'text-green-600 dark:text-green-400',
column_example: 'text-orange-600 dark:text-orange-400',
page_ref: 'text-cyan-600 dark:text-cyan-400',
column_marker: 'text-gray-500 dark:text-gray-400',
column_text: 'text-gray-700 dark:text-gray-300',
}
type RowStatus = 'pending' | 'active' | 'reviewed' | 'corrected' | 'skipped'
import type { LlmChange, StepLlmReviewProps, ReviewMeta, StreamProgress, RowStatus } from './llm-review-types'
import { COL_TYPE_TO_FIELD, KLAUSUR_API } from './llm-review-types'
import { LoadingScreen, ErrorScreen, AppliedScreen, NoSessionScreen } from './LlmReviewStatusScreens'
import { LlmReviewVocabTable } from './LlmReviewVocabTable'
import { LlmReviewOverlay } from './LlmReviewOverlay'
import { LlmReviewCorrections } from './LlmReviewCorrections'
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
// Core state
@@ -90,8 +38,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
const [leftPaddingPct, setLeftPaddingPct] = useState(0)
const [globalBold, setGlobalBold] = useState(false)
const [cells, setCells] = useState<GridCell[]>([])
const reconRef = useRef<HTMLDivElement>(null)
const [reconWidth, setReconWidth] = useState(0)
// Pixel-analysed word positions via shared hook
const overlayImageUrl = sessionId
@@ -102,17 +48,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
const tableRef = useRef<HTMLDivElement>(null)
const activeRowRef = useRef<HTMLTableRowElement>(null)
// Track reconstruction container width for font size calculation
useEffect(() => {
const el = reconRef.current
if (!el) return
const obs = new ResizeObserver(entries => {
for (const entry of entries) setReconWidth(entry.contentRect.width)
})
obs.observe(el)
return () => obs.disconnect()
}, [viewMode])
// Load session data on mount
useEffect(() => {
if (!sessionId) return
@@ -235,7 +170,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
model: event.model,
skipped_indices: event.skipped_indices,
})
// Mark skipped rows
if (event.skipped_indices) {
allSkipped = new Set(event.skipped_indices)
setSkippedRows(allSkipped)
@@ -246,15 +180,12 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
const batchChanges: LlmChange[] = event.changes || []
const batchRows: number[] = event.entries_reviewed || []
// Update active rows (currently being reviewed)
setActiveRowIndices(new Set(batchRows))
// Accumulate changes
allChanges = [...allChanges, ...batchChanges]
setChanges(allChanges)
setProgress(event.progress)
// Update corrected map
for (const c of batchChanges) {
const existing = cMap.get(c.row_index) || []
existing.push(c)
@@ -262,13 +193,11 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
}
setCorrectedMap(new Map(cMap))
// Mark batch rows as reviewed
for (const r of batchRows) {
allReviewed.add(r)
}
setReviewedRows(new Set(allReviewed))
// Scroll to active row in table
setTimeout(() => {
activeRowRef.current?.scrollIntoView({ behavior: 'smooth', block: 'center' })
}, 50)
@@ -278,7 +207,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
setActiveRowIndices(new Set())
setTotalDuration(event.duration_ms)
setAccepted(new Set(allChanges.map((_: LlmChange, i: number) => i)))
// Mark all non-skipped as reviewed
const allEntryIndices = vocabEntries.map((_: WordEntry, i: number) => i)
for (const i of allEntryIndices) {
if (!allSkipped.has(i)) allReviewed.add(i)
@@ -293,7 +221,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
}
}
// If stream ended without complete event
if (allChanges.length === 0) {
setStatus('done')
}
@@ -354,90 +281,24 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
: ''
// Snap all cells in the same column to consistent x/w positions
// Uses the median x and width per col_index so columns align vertically
const colPositions = useMemo(() => {
const byCol = new Map<number, { xs: number[]; ws: number[] }>()
for (const cell of cells) {
if (!cell.bbox_pct) continue
const entry = byCol.get(cell.col_index) || { xs: [], ws: [] }
entry.xs.push(cell.bbox_pct.x)
entry.ws.push(cell.bbox_pct.w)
byCol.set(cell.col_index, entry)
}
const result = new Map<number, { x: number; w: number }>()
for (const [colIdx, { xs, ws }] of byCol) {
xs.sort((a, b) => a - b)
ws.sort((a, b) => a - b)
const medianX = xs[Math.floor(xs.length / 2)]
const medianW = ws[Math.floor(ws.length / 2)]
result.set(colIdx, { x: medianX, w: medianW })
}
return result
}, [cells])
if (!sessionId) {
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
}
// --- Loading session data ---
if (status === 'loading' || status === 'idle') {
return (
<div className="flex items-center gap-3 justify-center py-12">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
<span className="text-gray-500">Session-Daten werden geladen...</span>
</div>
)
}
// --- Error ---
// --- Early returns for non-main states ---
if (!sessionId) return <NoSessionScreen />
if (status === 'loading' || status === 'idle') return <LoadingScreen />
if (status === 'error') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler bei OCR-Zeichenkorrektur</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
<div className="flex gap-3">
<button onClick={() => { setError(''); loadSessionData() }}
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
Erneut versuchen
</button>
<button onClick={onNext}
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
Ueberspringen
</button>
</div>
</div>
)
return <ErrorScreen error={error} onRetry={() => { setError(''); loadSessionData() }} onSkip={onNext} />
}
// --- Applied ---
if (status === 'applied') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturen uebernommen</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
{accepted.size} von {changes.length} Korrekturen wurden angewendet.
</p>
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Weiter
</button>
</div>
)
return <AppliedScreen acceptedCount={accepted.size} totalChanges={changes.length} onNext={onNext} />
}
// Active entry for highlighting on image
const activeEntry = vocabEntries.find((_: WordEntry, i: number) => activeRowIndices.has(i))
const pct = progress ? Math.round((progress.current / progress.total) * 100) : 0
/** Handle inline edit of a cell in the overlay */
const handleCellEdit = (cellId: string, rowIndex: number, newText: string | null) => {
if (newText === null) return
setCells(prev => prev.map(c => c.cell_id === cellId ? { ...c, text: newText } : c))
// Also update vocabEntries if this cell maps to a known field
const cell = cells.find(c => c.cell_id === cellId)
if (cell) {
const field = COL_TYPE_TO_FIELD[cell.col_type]
@@ -599,324 +460,40 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
{/* Right: Table or Overlay */}
<div className={viewMode === 'table' ? 'col-span-2' : 'col-span-1'} ref={tableRef}>
{viewMode === 'table' ? (
<>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
{columnsUsed.length === 1 && columnsUsed[0]?.type === 'column_text' ? 'Tabelle' : 'Vokabeltabelle'} ({vocabEntries.length} Eintraege)
</div>
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<div className="max-h-[70vh] overflow-y-auto">
<table className="w-full text-sm">
<thead className="sticky top-0 z-10">
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium w-10">#</th>
{columnsUsed.length > 0 ? (
columnsUsed.map((col, i) => {
const field = COL_TYPE_TO_FIELD[col.type]
if (!field) return null
return (
<th key={i} className={`px-2 py-2 text-left font-medium ${COL_TYPE_COLOR[col.type] || 'text-gray-500 dark:text-gray-400'}`}>
{FIELD_LABELS[field] || field}
</th>
)
})
) : (
<>
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">EN</th>
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">DE</th>
<th className="px-2 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Beispiel</th>
</>
)}
<th className="px-2 py-2 text-center text-gray-500 dark:text-gray-400 font-medium w-16">Status</th>
</tr>
</thead>
<tbody>
{vocabEntries.map((entry, idx) => {
const rowStatus = getRowStatus(idx)
const rowChanges = correctedMap.get(idx)
const rowBg = {
pending: '',
active: 'bg-yellow-50 dark:bg-yellow-900/20',
reviewed: '',
corrected: 'bg-teal-50/50 dark:bg-teal-900/10',
skipped: 'bg-gray-50 dark:bg-gray-800/50',
}[rowStatus]
return (
<tr
key={idx}
ref={rowStatus === 'active' ? activeRowRef : undefined}
className={`border-b border-gray-100 dark:border-gray-700/50 ${rowBg} ${
rowStatus === 'active' ? 'ring-1 ring-yellow-400 ring-inset' : ''
}`}
>
<td className="px-2 py-1.5 text-gray-400 font-mono text-xs">{idx}</td>
{columnsUsed.length > 0 ? (
columnsUsed.map((col, i) => {
const field = COL_TYPE_TO_FIELD[col.type]
if (!field) return null
const text = (entry as Record<string, unknown>)[field] as string || ''
return (
<td key={i} className="px-2 py-1.5 text-xs">
<CellContent text={text} field={field} rowChanges={rowChanges} />
</td>
)
})
) : (
<>
<td className="px-2 py-1.5">
<CellContent text={entry.english} field="english" rowChanges={rowChanges} />
</td>
<td className="px-2 py-1.5">
<CellContent text={entry.german} field="german" rowChanges={rowChanges} />
</td>
<td className="px-2 py-1.5 text-xs">
<CellContent text={entry.example} field="example" rowChanges={rowChanges} />
</td>
</>
)}
<td className="px-2 py-1.5 text-center">
<StatusIcon status={rowStatus} />
</td>
</tr>
)
})}
</tbody>
</table>
</div>
</div>
</>
<LlmReviewVocabTable
vocabEntries={vocabEntries}
columnsUsed={columnsUsed}
getRowStatus={getRowStatus}
correctedMap={correctedMap}
activeRowRef={activeRowRef}
/>
) : (
<>
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
Text-Rekonstruktion ({cells.filter(c => c.text).length} Zellen)
</div>
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-white dark:bg-white">
<div
ref={reconRef}
className="relative"
style={{
aspectRatio: imageNaturalSize ? `${imageNaturalSize.w} / ${imageNaturalSize.h}` : '3 / 4',
}}
>
{cells.map(cell => {
if (!cell.bbox_pct || !cell.text) return null
const col = colPositions.get(cell.col_index)
const cellX = col?.x ?? cell.bbox_pct.x
const cellW = col?.w ?? cell.bbox_pct.w
const aspect = imageNaturalSize ? imageNaturalSize.h / imageNaturalSize.w : 4 / 3
const containerH = reconWidth * aspect
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
const wordPos = cellWordPositions.get(cell.cell_id)
// Pixel-analysed: render word-groups at detected positions
if (wordPos) {
return wordPos.map((wp, i) => {
// Auto font-size from pixel analysis, scaled by user slider
const autoFontPx = cellHeightPx * wp.fontRatio * fontScale
const fs = Math.max(6, autoFontPx)
return (
<span
key={`${cell.cell_id}_${i}`}
className="absolute leading-none pointer-events-none select-none"
style={{
left: `${wp.xPct}%`,
top: `${cell.bbox_pct.y}%`,
width: `${wp.wPct}%`,
height: `${cell.bbox_pct.h}%`,
fontSize: `${fs}px`,
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
fontFamily: "'Liberation Sans', Arial, sans-serif",
display: 'flex',
alignItems: 'center',
whiteSpace: 'nowrap',
overflow: 'visible',
color: '#1a1a1a',
}}
>
{wp.text}
</span>
)
})
}
// Fallback: no pixel data — single span for entire cell
const fontSize = Math.max(6, cellHeightPx * fontScale)
return (
<span
key={cell.cell_id}
className="absolute leading-none pointer-events-none select-none"
style={{
left: `${cellX}%`,
top: `${cell.bbox_pct.y}%`,
width: `${cellW}%`,
height: `${cell.bbox_pct.h}%`,
fontSize: `${fontSize}px`,
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
paddingLeft: `${leftPaddingPct}%`,
fontFamily: "'Liberation Sans', Arial, sans-serif",
display: 'flex',
alignItems: 'center',
whiteSpace: 'pre',
overflow: 'visible',
color: '#1a1a1a',
}}
>
{cell.text}
</span>
)
})}
</div>
</div>
</>
<LlmReviewOverlay
cells={cells}
imageNaturalSize={imageNaturalSize}
fontScale={fontScale}
leftPaddingPct={leftPaddingPct}
globalBold={globalBold}
cellWordPositions={cellWordPositions}
/>
)}
</div>
</div>
{/* Done state: summary + actions */}
{status === 'done' && (
<div className="space-y-4">
{/* Summary */}
<div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-3 text-xs text-gray-500 dark:text-gray-400">
{changes.length === 0 ? (
<span>Keine Korrekturen noetig alle Eintraege sind korrekt.</span>
) : (
<span>
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden ·{' '}
{accepted.size} ausgewaehlt ·{' '}
{meta?.skipped || 0} uebersprungen (Lautschrift) ·{' '}
{totalDuration}ms
</span>
)}
</div>
{/* Corrections detail list (if any) */}
{changes.length > 0 && (
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<div className="bg-gray-50 dark:bg-gray-800 px-3 py-2 border-b border-gray-200 dark:border-gray-700">
<span className="text-xs font-medium text-gray-600 dark:text-gray-400">
Korrekturvorschlaege ({accepted.size}/{changes.length} ausgewaehlt)
</span>
</div>
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50/50 dark:bg-gray-800/50 border-b border-gray-200 dark:border-gray-700">
<th className="w-10 px-3 py-1.5 text-center">
<input type="checkbox" checked={accepted.size === changes.length} onChange={toggleAll}
className="rounded border-gray-300 dark:border-gray-600" />
</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Zeile</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Feld</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Vorher</th>
<th className="px-2 py-1.5 text-left text-gray-500 dark:text-gray-400 font-medium text-xs">Nachher</th>
</tr>
</thead>
<tbody>
{changes.map((change, idx) => (
<tr key={idx} className={`border-b border-gray-100 dark:border-gray-700/50 ${
accepted.has(idx) ? 'bg-teal-50/50 dark:bg-teal-900/10' : ''
}`}>
<td className="px-3 py-1.5 text-center">
<input type="checkbox" checked={accepted.has(idx)} onChange={() => toggleChange(idx)}
className="rounded border-gray-300 dark:border-gray-600" />
</td>
<td className="px-2 py-1.5 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
<td className="px-2 py-1.5">
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
{FIELD_LABELS[change.field] || change.field}
</span>
</td>
<td className="px-2 py-1.5"><span className="line-through text-red-500 dark:text-red-400 text-xs">{change.old}</span></td>
<td className="px-2 py-1.5"><span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span></td>
</tr>
))}
</tbody>
</table>
</div>
)}
{/* Actions */}
<div className="flex items-center justify-between pt-2">
<p className="text-xs text-gray-400">
{changes.length > 0 ? `${accepted.size} von ${changes.length} ausgewaehlt` : ''}
</p>
<div className="flex gap-3">
{changes.length > 0 && (
<button onClick={onNext}
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
Alle ablehnen
</button>
)}
{changes.length > 0 ? (
<button onClick={applyChanges} disabled={applying || accepted.size === 0}
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium">
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
</button>
) : (
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Weiter
</button>
)}
</div>
</div>
</div>
<LlmReviewCorrections
changes={changes}
accepted={accepted}
meta={meta}
totalDuration={totalDuration}
applying={applying}
onToggleChange={toggleChange}
onToggleAll={toggleAll}
onApply={applyChanges}
onNext={onNext}
/>
)}
</div>
)
}
/** Cell content with inline diff for corrections */
function CellContent({ text, field, rowChanges }: {
text: string
field: string
rowChanges?: LlmChange[]
}) {
const change = rowChanges?.find(c => c.field === field)
if (!text && !change) {
return <span className="text-gray-300 dark:text-gray-600">&mdash;</span>
}
if (change) {
return (
<span>
<span className="line-through text-red-400 dark:text-red-500 text-xs mr-1">{change.old}</span>
<span className="text-green-600 dark:text-green-400 font-medium text-xs">{change.new}</span>
</span>
)
}
return <span className="text-gray-700 dark:text-gray-300 text-xs">{text}</span>
}
/** Status icon for each row */
function StatusIcon({ status }: { status: RowStatus }) {
switch (status) {
case 'pending':
return <span className="text-gray-300 dark:text-gray-600 text-xs"></span>
case 'active':
return (
<span className="inline-block w-3 h-3 rounded-full bg-yellow-400 animate-pulse" title="Wird geprueft" />
)
case 'reviewed':
return (
<svg className="w-4 h-4 text-green-500 inline-block" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M5 13l4 4L19 7" />
</svg>
)
case 'corrected':
return (
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-teal-100 dark:bg-teal-900/30 text-teal-700 dark:text-teal-400">
korr.
</span>
)
case 'skipped':
return (
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-medium bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400">
skip
</span>
)
}
}