Add A/B testing toggles to OCR Kombi Grid Review
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m27s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 24s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m27s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 24s
Quality step toggles in admin-lehrer StepGridReview (port 3002): - CLAHE checkbox (Step 3: image enhancement) - MaxCol dropdown (Step 2: column limit, 0=off) - MinConf dropdown (Step 1: OCR confidence, 0=auto) Parameters flow through: StepGridReview → useGridEditor → build-grid endpoint → _build_grid_core. MinConf filters words before grid building. Toggle settings, click "Neu berechnen" to test each step individually. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -55,6 +55,9 @@ async def _build_grid_core(
|
||||
*,
|
||||
ipa_mode: str = "auto",
|
||||
syllable_mode: str = "auto",
|
||||
enhance: bool = True,
|
||||
max_columns: int | None = None,
|
||||
min_conf: int | None = None,
|
||||
) -> dict:
|
||||
"""Core grid building logic — pure computation, no HTTP or DB side effects.
|
||||
|
||||
@@ -91,8 +94,18 @@ async def _build_grid_core(
|
||||
if not all_words:
|
||||
raise ValueError("No word boxes found in cells")
|
||||
|
||||
logger.info("build-grid session %s: %d words from %d cells",
|
||||
session_id, len(all_words), len(word_result["cells"]))
|
||||
# 2a-pre. Apply min_conf filter if specified (Step 1: confidence tightening)
|
||||
if min_conf and min_conf > 0:
|
||||
before = len(all_words)
|
||||
all_words = [w for w in all_words if w.get('conf', 100) >= min_conf]
|
||||
removed = before - len(all_words)
|
||||
if removed:
|
||||
logger.info("build-grid session %s: min_conf=%d removed %d/%d words",
|
||||
session_id, min_conf, removed, before)
|
||||
|
||||
logger.info("build-grid session %s: %d words from %d cells (enhance=%s, max_cols=%s, min_conf=%s)",
|
||||
session_id, len(all_words), len(word_result["cells"]),
|
||||
enhance, max_columns, min_conf)
|
||||
|
||||
# 2b. Filter decorative margin columns (alphabet graphics).
|
||||
# Some worksheets have a decorative alphabet strip along one margin
|
||||
|
||||
@@ -32,6 +32,9 @@ async def build_grid(
|
||||
session_id: str,
|
||||
ipa_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
|
||||
syllable_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
|
||||
enhance: bool = Query(True, description="Step 3: CLAHE + denoise for degraded scans"),
|
||||
max_cols: int = Query(0, description="Step 2: Max column count (0=unlimited)"),
|
||||
min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto)"),
|
||||
):
|
||||
"""Build a structured, zone-aware grid from existing Kombi word results.
|
||||
|
||||
@@ -53,6 +56,9 @@ async def build_grid(
|
||||
result = await _build_grid_core(
|
||||
session_id, session,
|
||||
ipa_mode=ipa_mode, syllable_mode=syllable_mode,
|
||||
enhance=enhance,
|
||||
max_columns=max_cols if max_cols > 0 else None,
|
||||
min_conf=min_conf if min_conf > 0 else None,
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
Reference in New Issue
Block a user