Fix NameError: _text_has_garbled_ipa not imported in grid_editor_helpers

After refactoring grid_editor_api.py into helpers, the function _text_has_garbled_ipa was used in _detect_heading_rows_by_single_cell but never imported from cv_ocr_engines. This caused HTTP 500 on build-grid for sessions that trigger single-cell heading detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
fix: remove nested scrollbar in grid editor
2026-03-24 15:11:29 +01:00 · 2026-03-24 15:06:28 +01:00 · 2026-03-24 14:39:33 +01:00 · 2026-03-24 14:31:16 +01:00 · 2026-03-24 14:17:26 +01:00 · 2026-03-24 14:10:43 +01:00
13 changed files with 1738 additions and 1414 deletions
--- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
@@ -136,6 +136,7 @@ export default function OcrOverlayPage() {
            if (uiStep < 4) uiStep = 4
          } else if (dbStep >= 2) {
            skipIds.push('orientation')
+            if (uiStep < 1) uiStep = 1 // advance past skipped orientation to deskew
          }
        }
      }
@@ -382,13 +383,13 @@ export default function OcrOverlayPage() {
    if (mode === 'paddle-direct' || mode === 'kombi') {
      switch (currentStep) {
        case 0:
-          return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
+          return <StepOrientation key={sessionId} sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
        case 1:
-          return <StepDeskew sessionId={sessionId} onNext={handleNext} />
+          return <StepDeskew key={sessionId} sessionId={sessionId} onNext={handleNext} />
        case 2:
-          return <StepDewarp sessionId={sessionId} onNext={handleNext} />
+          return <StepDewarp key={sessionId} sessionId={sessionId} onNext={handleNext} />
        case 3:
-          return <StepCrop sessionId={sessionId} onNext={handleNext} />
+          return <StepCrop key={sessionId} sessionId={sessionId} onNext={handleNext} />
        case 4:
          if (mode === 'kombi') {
            return (
@@ -420,13 +421,13 @@ export default function OcrOverlayPage() {
    }
    switch (currentStep) {
      case 0:
-        return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
+        return <StepOrientation key={sessionId} sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
      case 1:
-        return <StepDeskew sessionId={sessionId} onNext={handleNext} />
+        return <StepDeskew key={sessionId} sessionId={sessionId} onNext={handleNext} />
      case 2:
-        return <StepDewarp sessionId={sessionId} onNext={handleNext} />
+        return <StepDewarp key={sessionId} sessionId={sessionId} onNext={handleNext} />
      case 3:
-        return <StepCrop sessionId={sessionId} onNext={handleNext} />
+        return <StepCrop key={sessionId} sessionId={sessionId} onNext={handleNext} />
      case 4:
        return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
      case 5:
--- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx
@@ -108,6 +108,7 @@ export default function OcrPipelinePage() {
        } else if (dbStep >= 2) {
          // Page-split sub-session: parent orientation applied, skip only orientation
          if (!skipSteps.includes('orientation')) skipSteps.push('orientation')
+          if (uiStep < 1) uiStep = 1 // advance past skipped orientation to deskew
        }
        // dbStep === 1: page-split from original image, needs full pipeline
      }
@@ -397,13 +398,13 @@ export default function OcrPipelinePage() {
  const renderStep = () => {
    switch (currentStep) {
      case 0:
-        return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
+        return <StepOrientation key={sessionId} sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
      case 1:
-        return <StepDeskew sessionId={sessionId} onNext={handleNext} />
+        return <StepDeskew key={sessionId} sessionId={sessionId} onNext={handleNext} />
      case 2:
-        return <StepDewarp sessionId={sessionId} onNext={handleNext} />
+        return <StepDewarp key={sessionId} sessionId={sessionId} onNext={handleNext} />
      case 3:
-        return <StepCrop sessionId={sessionId} onNext={handleCropNext} />
+        return <StepCrop key={sessionId} sessionId={sessionId} onNext={handleCropNext} />
      case 4:
        return <StepColumnDetection sessionId={sessionId} onNext={handleNext} onBoxSessionsCreated={handleBoxSessionsCreated} />
      case 5:
--- a/admin-lehrer/components/ocr-pipeline/StepCrop.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepCrop.tsx
@@ -17,13 +17,6 @@ export function StepCrop({ sessionId, onNext }: StepCropProps) {
  const [error, setError] = useState<string | null>(null)
  const [hasRun, setHasRun] = useState(false)

-  // Reset state when sessionId changes (e.g. switching sub-sessions)
-  useEffect(() => {
-    setCropResult(null)
-    setHasRun(false)
-    setError(null)
-  }, [sessionId])
-
  // Auto-trigger crop on mount
  useEffect(() => {
    if (!sessionId || hasRun) return
--- a/admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepDeskew.tsx
@@ -22,14 +22,6 @@ export function StepDeskew({ sessionId, onNext }: StepDeskewProps) {
  const [error, setError] = useState<string | null>(null)
  const [hasAutoRun, setHasAutoRun] = useState(false)

-  // Reset state when sessionId changes (e.g. switching sub-sessions)
-  useEffect(() => {
-    setSession(null)
-    setDeskewResult(null)
-    setHasAutoRun(false)
-    setError(null)
-  }, [sessionId])
-
  // Load session and auto-trigger deskew
  useEffect(() => {
    if (!sessionId || session) return
--- a/admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
@@ -20,13 +20,6 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
  const [showGrid, setShowGrid] = useState(true)
  const [error, setError] = useState<string | null>(null)

-  // Reset state when sessionId changes (e.g. switching sub-sessions)
-  useEffect(() => {
-    setDewarpResult(null)
-    setDeskewResult(null)
-    setError(null)
-  }, [sessionId])
-
  // Load session info to get deskew_result (for fine-tuning init values)
  useEffect(() => {
    if (!sessionId) return
--- a/admin-lehrer/components/ocr-pipeline/StepGridReview.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepGridReview.tsx
@@ -383,7 +383,7 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
                    {group.map((zone) => (
                      <div
                        key={zone.zone_index}
-                        className={`${group.length > 1 ? 'flex-1 min-w-0' : ''} bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden`}
+                        className={`${group.length > 1 ? 'flex-1 min-w-0' : ''} bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700`}
                      >
                        <GridTable
                          zone={zone}
--- a/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx
@@ -30,14 +30,6 @@ export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSes
  const [dragOver, setDragOver] = useState(false)
  const [sessionName, setSessionName] = useState('')

-  // Reset state when sessionId changes
-  useEffect(() => {
-    setSession(null)
-    setOrientationResult(null)
-    setPageSplitResult(null)
-    setError(null)
-  }, [existingSessionId])
-
  // Reload session data when navigating back
  useEffect(() => {
    if (!existingSessionId || session) return
--- a/klausur-service/backend/cv_ocr_engines.py
+++ b/klausur-service/backend/cv_ocr_engines.py
@@ -481,8 +481,9 @@ _CHAR_CONFUSION_RULES = [
    (re.compile(r'\b1([a-z])'), r'I\1'),           # 1ch → Ich, 1want → Iwant
    # Standalone "1" → "I" (English pronoun), but NOT "1." or "1," (list number)
    (re.compile(r'(?<!\d)\b1\b(?![\d.,])'), 'I'),  # "1 want" → "I want"
-    # "|" → "I", but NOT "|." or "|," (those are "1." list prefixes → spell-checker handles them)
-    (re.compile(r'(?<!\|)\|(?!\||[.,])'), 'I'),    # |ch → Ich, | want → I want
+    # "|" → "I", but NOT when embedded between letters (syllable divider: Ka|me|rad)
+    # and NOT "|." or "|," (those are "1." list prefixes → spell-checker handles them)
+    (re.compile(r'(?<![a-zA-ZäöüÄÖÜß])\|(?!\||[.,])'), 'I'),  # |ch → Ich, | want → I want
 ]

 # Cross-language indicators: if DE has these, EN "1" is almost certainly "I"
--- a/klausur-service/backend/cv_syllable_detect.py
+++ b/klausur-service/backend/cv_syllable_detect.py
@@ -0,0 +1,155 @@
+"""
+CV-based syllable divider detection and insertion for dictionary pages.
+
+Two-step approach:
+  1. CV: morphological vertical line detection checks if a word_box image
+     contains thin, isolated pipe-like vertical lines (syllable dividers).
+  2. pyphen: inserts syllable breaks at linguistically correct positions
+     for words where CV confirmed the presence of dividers.
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+import re
+from typing import Any, Dict, List
+
+import cv2
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+def _word_has_pipe_lines(img_gray: np.ndarray, wb: Dict) -> bool:
+    """CV check: does this word_box image show thin vertical pipe dividers?
+
+    Uses morphological opening with a tall thin kernel to isolate vertical
+    structures, then filters for thin (≤4px), isolated contours that are
+    NOT at the word edges (those would be l, I, 1 etc.).
+    """
+    x = wb.get("left", 0)
+    y = wb.get("top", 0)
+    w = wb.get("width", 0)
+    h = wb.get("height", 0)
+    if w < 30 or h < 12:
+        return False
+    ih, iw = img_gray.shape[:2]
+    y1, y2 = max(0, y), min(ih, y + h)
+    x1, x2 = max(0, x), min(iw, x + w)
+    roi = img_gray[y1:y2, x1:x2]
+    if roi.size == 0:
+        return False
+    rh, rw = roi.shape
+
+    # Binarize (ink = white on black background)
+    _, binary = cv2.threshold(
+        roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
+    )
+
+    # Morphological opening: keep only tall vertical structures (≥55% height)
+    kern_h = max(int(rh * 0.55), 8)
+    kernel = np.ones((kern_h, 1), np.uint8)
+    vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
+
+    # Find surviving contours
+    contours, _ = cv2.findContours(
+        vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+    )
+
+    margin = max(int(rw * 0.08), 3)
+    for cnt in contours:
+        cx, cy, cw, ch = cv2.boundingRect(cnt)
+        if cw > 4:
+            continue  # too wide for a pipe
+        if cx < margin or cx + cw > rw - margin:
+            continue  # at word edge — likely l, I, 1
+        # Check isolation: adjacent columns should be mostly empty (ink-free)
+        left_zone = binary[cy:cy + ch, max(0, cx - 3):cx]
+        right_zone = binary[cy:cy + ch, cx + cw:min(rw, cx + cw + 3)]
+        left_ink = np.mean(left_zone) if left_zone.size else 255
+        right_ink = np.mean(right_zone) if right_zone.size else 255
+        if left_ink < 80 and right_ink < 80:
+            return True  # isolated thin vertical line = pipe divider
+    return False
+
+
+# IPA/phonetic bracket pattern — don't hyphenate transcriptions
+_IPA_RE = re.compile(r'[\[\]ˈˌːʃʒθðŋɑɒæɔəɛɜɪʊʌ]')
+
+
+def insert_syllable_dividers(
+    zones_data: List[Dict],
+    img_bgr: np.ndarray,
+    session_id: str,
+) -> int:
+    """Insert pipe syllable dividers into dictionary cells where CV confirms them.
+
+    For each cell on a dictionary page:
+      1. Check if ANY word_box has CV-detected pipe lines
+      2. If yes, apply pyphen to EACH word (≥4 chars) in the cell
+      3. Try DE hyphenation first, then EN
+
+    Returns the number of cells modified.
+    """
+    try:
+        import pyphen
+    except ImportError:
+        logger.warning("pyphen not installed — skipping syllable insertion")
+        return 0
+
+    _hyph_de = pyphen.Pyphen(lang='de_DE')
+    _hyph_en = pyphen.Pyphen(lang='en_US')
+    img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+
+    insertions = 0
+    for z in zones_data:
+        for cell in z.get("cells", []):
+            ct = cell.get("col_type", "")
+            if not ct.startswith("column_"):
+                continue
+            text = cell.get("text", "")
+            if not text or "|" in text:
+                continue
+            if _IPA_RE.search(text):
+                continue
+
+            # CV gate: check if ANY word_box in this cell has pipe lines
+            wbs = cell.get("word_boxes") or []
+            if not any(_word_has_pipe_lines(img_gray, wb) for wb in wbs):
+                continue
+
+            # Apply pyphen to each significant word in the cell
+            tokens = re.split(r'(\s+|[,;]+\s*)', text)
+            new_tokens = []
+            changed = False
+            for tok in tokens:
+                # Skip whitespace/punctuation separators
+                if re.match(r'^[\s,;]+$', tok):
+                    new_tokens.append(tok)
+                    continue
+                # Only hyphenate words ≥ 4 alpha chars
+                clean = re.sub(r'[().\-]', '', tok)
+                if len(clean) < 4 or not re.search(r'[a-zA-ZäöüÄÖÜß]', clean):
+                    new_tokens.append(tok)
+                    continue
+                # Try DE first, then EN
+                hyph = _hyph_de.inserted(tok, hyphen='|')
+                if '|' not in hyph:
+                    hyph = _hyph_en.inserted(tok, hyphen='|')
+                if '|' in hyph and hyph != tok:
+                    new_tokens.append(hyph)
+                    changed = True
+                else:
+                    new_tokens.append(tok)
+            if changed:
+                cell["text"] = ''.join(new_tokens)
+                insertions += 1
+
+    if insertions:
+        logger.info(
+            "build-grid session %s: inserted syllable dividers in %d cells "
+            "(CV-validated)",
+            session_id, insertions,
+        )
+    return insertions
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
--- a/klausur-service/backend/grid_editor_helpers.py
+++ b/klausur-service/backend/grid_editor_helpers.py
--- a/klausur-service/backend/ocr_pipeline_regression.py
+++ b/klausur-service/backend/ocr_pipeline_regression.py
@@ -258,9 +258,17 @@ async def mark_ground_truth(
    gt["build_grid_reference"] = reference
    await update_session_db(session_id, ground_truth=gt, current_step=11)

+    # Compare with auto-snapshot if available (shows what the user corrected)
+    auto_snapshot = gt.get("auto_grid_snapshot")
+    correction_diff = None
+    if auto_snapshot:
+        correction_diff = compare_grids(auto_snapshot, reference)
+
    logger.info(
-        "Ground truth marked for session %s: %d cells",
-        session_id, len(reference["cells"]),
+        "Ground truth marked for session %s: %d cells (corrections: %s)",
+        session_id,
+        len(reference["cells"]),
+        correction_diff["summary"] if correction_diff else "no auto-snapshot",
    )

    return {
@@ -268,6 +276,7 @@ async def mark_ground_truth(
        "session_id": session_id,
        "cells_saved": len(reference["cells"]),
        "summary": reference["summary"],
+        "correction_diff": correction_diff,
    }


@@ -289,6 +298,68 @@ async def unmark_ground_truth(session_id: str):
    return {"status": "ok", "session_id": session_id}


+@router.get("/sessions/{session_id}/correction-diff")
+async def get_correction_diff(session_id: str):
+    """Compare automatic OCR grid with manually corrected ground truth.
+
+    Returns a diff showing exactly which cells the user corrected,
+    broken down by col_type (english, german, ipa, etc.).
+    """
+    session = await get_session_db(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+    gt = session.get("ground_truth") or {}
+    auto_snapshot = gt.get("auto_grid_snapshot")
+    reference = gt.get("build_grid_reference")
+
+    if not auto_snapshot:
+        raise HTTPException(
+            status_code=404,
+            detail="No auto_grid_snapshot found. Re-run build-grid to create one.",
+        )
+    if not reference:
+        raise HTTPException(
+            status_code=404,
+            detail="No ground truth reference found. Mark as ground truth first.",
+        )
+
+    diff = compare_grids(auto_snapshot, reference)
+
+    # Enrich with per-col_type breakdown
+    col_type_stats: Dict[str, Dict[str, int]] = {}
+    for cell_diff in diff.get("cell_diffs", []):
+        if cell_diff["type"] != "text_change":
+            continue
+        # Find col_type from reference cells
+        cell_id = cell_diff["cell_id"]
+        ref_cell = next(
+            (c for c in reference.get("cells", []) if c["cell_id"] == cell_id),
+            None,
+        )
+        ct = ref_cell.get("col_type", "unknown") if ref_cell else "unknown"
+        if ct not in col_type_stats:
+            col_type_stats[ct] = {"total": 0, "corrected": 0}
+        col_type_stats[ct]["corrected"] += 1
+
+    # Count total cells per col_type from reference
+    for cell in reference.get("cells", []):
+        ct = cell.get("col_type", "unknown")
+        if ct not in col_type_stats:
+            col_type_stats[ct] = {"total": 0, "corrected": 0}
+        col_type_stats[ct]["total"] += 1
+
+    # Calculate accuracy per col_type
+    for ct, stats in col_type_stats.items():
+        total = stats["total"]
+        corrected = stats["corrected"]
+        stats["accuracy_pct"] = round((total - corrected) / total * 100, 1) if total > 0 else 100.0
+
+    diff["col_type_breakdown"] = col_type_stats
+
+    return diff
+
+
@router.get("/ground-truth-sessions")
 async def list_ground_truth_sessions():
    """List all sessions that have a ground-truth reference."""
--- a/klausur-service/backend/requirements.txt
+++ b/klausur-service/backend/requirements.txt
@@ -38,6 +38,9 @@ eng-to-ipa
 # Spell-checker for rule-based OCR correction (MIT license)
 pyspellchecker>=0.8.1

+# Syllable hyphenation for dictionary pipe-divider insertion (MIT license)
+pyphen>=0.16.0
+
 # PostgreSQL (for metrics storage)
 psycopg2-binary>=2.9.0
 asyncpg>=0.29.0
Author	SHA1	Message	Date
Benjamin Admin	52b66ebe07	Fix NameError: _text_has_garbled_ipa not imported in grid_editor_helpers Some checks failed CI / go-lint (push) Has been skipped Details CI / python-lint (push) Has been skipped Details CI / nodejs-lint (push) Has been skipped Details CI / test-go-school (push) Successful in 26s Details CI / test-go-edu-search (push) Successful in 26s Details CI / test-python-klausur (push) Failing after 1m52s Details CI / test-python-agent-core (push) Successful in 15s Details CI / test-nodejs-website (push) Successful in 16s Details After refactoring grid_editor_api.py into helpers, the function _text_has_garbled_ipa was used in _detect_heading_rows_by_single_cell but never imported from cv_ocr_engines. This caused HTTP 500 on build-grid for sessions that trigger single-cell heading detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 15:11:29 +01:00
Benjamin Admin	424e5c51d4	fix: remove nested scrollbar in grid editor Removed overflow-y-auto and maxHeight from the grid container div. The page itself handles scrolling — nested scroll containers caused the bottom rows to be cut off after editing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 15:06:28 +01:00
Benjamin Admin	12b4c61bac	refactor: extract grid helpers + generic CV-gated syllable insertion 1. Extracted 1367 lines of helper functions from grid_editor_api.py (3051→1620 lines) into grid_editor_helpers.py (filters, detectors, zone grid building). 2. Created cv_syllable_detect.py with generic CV+pyphen logic: - Checks EVERY word_box for vertical pipe lines (not just first word) - No article-column dependency — works with any dictionary layout - CV morphological detection gates pyphen insertion 3. Grid editor scroll: calc(100vh-200px) for reliable scrolling. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 14:39:33 +01:00
Benjamin Admin	d9b2aa82e9	fix: CV-gated syllable insertion + grid editor scroll 1. Syllable dividers now require CV validation: morphological vertical line detection checks if word_box image actually shows thin isolated pipe lines before applying pyphen. Only first word per cell gets pipes (matching dictionary print layout). 2. Grid editor scroll: changed maxHeight from 80vh to calc(100vh-200px) so editor remains scrollable after edits. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 14:31:16 +01:00
Benjamin Admin	364086b86e	feat: auto-insert syllable dividers via pyphen on dictionary pages OCR engines don't detect \| pipe chars used as syllable dividers in dictionaries. After dictionary detection (is_dict=True), use pyphen (MIT) to insert syllable breaks into headword cells. Tries DE first, then EN. Skips IPA content, short words, and cells already containing \|. Also adds pyphen>=0.16.0 to requirements.txt. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 14:17:26 +01:00
Benjamin Admin	fe754398c0	fix: Step 4f sidebar detection uses avg text length instead of fill ratio Column_1 data showed avg_len=1.0 with 13 single-char cells (alphabet letters from sidebar). Old fill_ratio check (76% > 35%) missed it. New criteria: avg_len ≤ 1.5 AND ≥ 70% single chars → removes column. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 14:10:43 +01:00
Benjamin Admin	be86a7d14d	fix: preserve pipe syllable dividers + detect alphabet sidebar columns 1. Pipe divider fix: Changed OCR char-confusion regex so \| between letters (Ka\|me\|rad) is NOT converted to I. Only standalone/ word-boundary pipes are converted (\|ch → Ich, \| want → I want). 2. Alphabet sidebar detection improvements: - _filter_decorative_margin() now considers 2-char words (OCR reads "Aa", "Bb" from sidebars), lowered min strip from 8→6 - _filter_border_strip_words() lowered decorative threshold from 50%→45% - New step 4f: grid-level thin-edge-column filter as safety net — removes edge columns with <35% fill rate and >60% short text Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 13:52:11 +01:00
Benjamin Admin	19a5f69272	fix: make Grid Editor vertically scrollable so all rows are visible The right panel (grid area) had no vertical overflow handling, causing the last ~5 rows to be clipped and invisible. Added overflow-y-auto with max-height 80vh, and removed overflow-hidden from the GridTable wrapper that was cutting off content. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 13:33:52 +01:00
Benjamin Admin	ea09fc75df	fix: resolve circular import with lazy import for _build_reference_snapshot Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 13:18:21 +01:00
Benjamin Admin	410d36f3de	feat: save automatic grid snapshot before manual edits for GT comparison - build-grid now saves the automatic OCR result as ground_truth.auto_grid_snapshot - mark-ground-truth includes a correction_diff comparing auto vs corrected - New endpoint GET /correction-diff returns detailed diff with per-col_type accuracy breakdown (english, german, ipa, etc.) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 13:16:44 +01:00
Benjamin Admin	72ce4420cb	fix: advance uiStep past skipped orientation for page-split sub-sessions Page-split sub-sessions (current_step=2) had orientation marked as skipped but uiStep remained at 0 (orientation step), causing StepOrientation to render for a sub-session that has no orientation data. Now advances to uiStep=1 (deskew) when orientation is skipped. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 12:59:36 +01:00
Benjamin Admin	63dfb4d06f	fix: replace reset useEffects with key prop for step component remount The reset useEffects in StepOrientation/Deskew/Dewarp/Crop were clearing orientationResult when sessionId changed (e.g. during handleOrientationComplete), causing the right side of ImageCompareView to show nothing. Using key={sessionId} on the step components instead forces React to remount with fresh state when switching sessions, without interfering with the upload/orientation flow. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 12:20:50 +01:00