feat: Sprint 1 — IPA hardening, regression framework, ground-truth review

Track A (Backend): - Compound word IPA decomposition (schoolbag→school+bag) - Trailing garbled IPA fragment removal after brackets (R21 fix) - Regression runner with DB persistence, history endpoints - Page crop determinism verified with tests Track B (Frontend): - OCR Regression dashboard (/ai/ocr-regression) - Ground Truth Review workflow (/ai/ocr-ground-truth) with split-view, confidence highlighting, inline edit, batch mark, progress tracking Track C (Docs): - OCR-Pipeline.md v5.0 (Steps 5e-5h) - Regression testing guide - mkdocs.yml nav update Track D (Infra): - TrOCR baseline benchmark script - run-regression.sh shell script - Migration 008: regression_runs table Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:21:27 +01:00
parent f5d5d6c59c
commit a1e079b911
13 changed files with 1796 additions and 15 deletions
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# Run OCR pipeline regression tests and exit non-zero on failure.
+#
+# Usage:
+#   ./scripts/run-regression.sh                    # default: macmini:8086
+#   ./scripts/run-regression.sh http://localhost:8086
+#
+# Exit codes:
+#   0 = all pass
+#   1 = failures or errors
+#   2 = connection error
+
+set -euo pipefail
+
+BASE_URL="${1:-http://macmini:8086}"
+ENDPOINT="${BASE_URL}/api/v1/ocr-pipeline/regression/run?triggered_by=script"
+
+echo "=== OCR Pipeline Regression Suite ==="
+echo "Endpoint: ${ENDPOINT}"
+echo ""
+
+RESPONSE=$(curl -sf -X POST "${ENDPOINT}" -H "Content-Type: application/json" 2>&1) || {
+    echo "ERROR: Could not reach ${ENDPOINT}"
+    exit 2
+}
+
+STATUS=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
+TOTAL=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['total'])")
+PASSED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['passed'])")
+FAILED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['failed'])")
+ERRORS=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['errors'])")
+DURATION=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('duration_ms', '?'))")
+
+echo "Status:   ${STATUS}"
+echo "Total:    ${TOTAL}"
+echo "Passed:   ${PASSED}"
+echo "Failed:   ${FAILED}"
+echo "Errors:   ${ERRORS}"
+echo "Duration: ${DURATION}ms"
+echo ""
+
+if [ "${STATUS}" = "pass" ]; then
+    echo "PASS — All regression tests passed."
+    exit 0
+else
+    echo "FAIL — Regression failures detected!"
+    # Print failure details
+    echo "${RESPONSE}" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+for r in data.get('results', []):
+    if r['status'] != 'pass':
+        print(f\"  {r['status'].upper()}: {r.get('name', r['session_id'])}\")
+        if 'error' in r:
+            print(f\"    Error: {r['error']}\")
+        ds = r.get('diff_summary', {})
+        if ds:
+            print(f\"    Structural: {ds.get('structural_changes', 0)}, Text: {ds.get('text_changes', 0)}, Missing: {ds.get('cells_missing', 0)}, Added: {ds.get('cells_added', 0)}\")
+"
+    exit 1
+fi