Add Box-Grid-Review step (Step 11) to OCR pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 44s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m52s
CI / test-python-agent-core (push) Successful in 36s
CI / test-nodejs-website (push) Successful in 37s

New pipeline step between Gutter Repair and Ground Truth that processes
embedded boxes (grammar tips, exercises) independently from the main grid.

Backend:
- cv_box_layout.py: classify_box_layout() detects flowing/columnar/
  bullet_list/header_only layout types per box
- build_box_zone_grid(): layout-aware grid building (single-column for
  flowing text, independent columns for tabular content)
- POST /sessions/{id}/build-box-grids endpoint with SmartSpellChecker
- Layout type overridable per box via request body

Frontend:
- StepBoxGridReview.tsx: shows each box with cropped image + editable
  GridTable. Layout type dropdown per box. Auto-builds on first load.
- Auto-skip when no boxes detected on page
- Pipeline steps updated: 13 steps (0-12), Ground Truth moved to 12

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-12 17:26:06 +02:00
parent 52637778b9
commit 5da9a550bf
6 changed files with 661 additions and 2 deletions

View File

@@ -16,6 +16,7 @@ import { StepStructure } from '@/components/ocr-kombi/StepStructure'
import { StepGridBuild } from '@/components/ocr-kombi/StepGridBuild'
import { StepGridReview } from '@/components/ocr-kombi/StepGridReview'
import { StepGutterRepair } from '@/components/ocr-kombi/StepGutterRepair'
import { StepBoxGridReview } from '@/components/ocr-kombi/StepBoxGridReview'
import { StepGroundTruth } from '@/components/ocr-kombi/StepGroundTruth'
import { useKombiPipeline } from './useKombiPipeline'
@@ -97,6 +98,8 @@ function OcrKombiContent() {
case 10:
return <StepGutterRepair sessionId={sessionId} onNext={handleNext} />
case 11:
return <StepBoxGridReview sessionId={sessionId} onNext={handleNext} />
case 12:
return (
<StepGroundTruth
sessionId={sessionId}

View File

@@ -40,6 +40,7 @@ export const KOMBI_V2_STEPS: PipelineStep[] = [
{ id: 'grid-build', name: 'Grid-Aufbau', icon: '🧱', status: 'pending' },
{ id: 'grid-review', name: 'Grid-Review', icon: '📊', status: 'pending' },
{ id: 'gutter-repair', name: 'Wortkorrektur', icon: '🩹', status: 'pending' },
{ id: 'box-review', name: 'Box-Review', icon: '📦', status: 'pending' },
{ id: 'ground-truth', name: 'Ground Truth', icon: '✅', status: 'pending' },
]
@@ -56,7 +57,8 @@ export const KOMBI_V2_UI_TO_DB: Record<number, number> = {
8: 10, // grid-build
9: 11, // grid-review
10: 11, // gutter-repair (shares DB step with grid-review)
11: 12, // ground-truth
11: 11, // box-review (shares DB step with grid-review)
12: 12, // ground-truth
}
/** Map from DB step to Kombi V2 UI step index */
@@ -70,7 +72,7 @@ export function dbStepToKombiV2Ui(dbStep: number): number {
if (dbStep === 9) return 7 // structure
if (dbStep === 10) return 8 // grid-build
if (dbStep === 11) return 9 // grid-review
return 11 // ground-truth
return 12 // ground-truth
}
/** Document group: groups multiple sessions from a multi-page upload */