Files
breakpilot-lehrer/scripts/run-regression.sh
Benjamin Admin a1e079b911
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s
feat: Sprint 1 — IPA hardening, regression framework, ground-truth review
Track A (Backend):
- Compound word IPA decomposition (schoolbag→school+bag)
- Trailing garbled IPA fragment removal after brackets (R21 fix)
- Regression runner with DB persistence, history endpoints
- Page crop determinism verified with tests

Track B (Frontend):
- OCR Regression dashboard (/ai/ocr-regression)
- Ground Truth Review workflow (/ai/ocr-ground-truth)
  with split-view, confidence highlighting, inline edit,
  batch mark, progress tracking

Track C (Docs):
- OCR-Pipeline.md v5.0 (Steps 5e-5h)
- Regression testing guide
- mkdocs.yml nav update

Track D (Infra):
- TrOCR baseline benchmark script
- run-regression.sh shell script
- Migration 008: regression_runs table

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:21:27 +01:00

62 lines
2.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# Run OCR pipeline regression tests and exit non-zero on failure.
#
# Usage:
# ./scripts/run-regression.sh # default: macmini:8086
# ./scripts/run-regression.sh http://localhost:8086
#
# Exit codes:
# 0 = all pass
# 1 = failures or errors
# 2 = connection error
set -euo pipefail
BASE_URL="${1:-http://macmini:8086}"
ENDPOINT="${BASE_URL}/api/v1/ocr-pipeline/regression/run?triggered_by=script"
echo "=== OCR Pipeline Regression Suite ==="
echo "Endpoint: ${ENDPOINT}"
echo ""
RESPONSE=$(curl -sf -X POST "${ENDPOINT}" -H "Content-Type: application/json" 2>&1) || {
echo "ERROR: Could not reach ${ENDPOINT}"
exit 2
}
STATUS=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
TOTAL=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['total'])")
PASSED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['passed'])")
FAILED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['failed'])")
ERRORS=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['errors'])")
DURATION=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('duration_ms', '?'))")
echo "Status: ${STATUS}"
echo "Total: ${TOTAL}"
echo "Passed: ${PASSED}"
echo "Failed: ${FAILED}"
echo "Errors: ${ERRORS}"
echo "Duration: ${DURATION}ms"
echo ""
if [ "${STATUS}" = "pass" ]; then
echo "PASS — All regression tests passed."
exit 0
else
echo "FAIL — Regression failures detected!"
# Print failure details
echo "${RESPONSE}" | python3 -c "
import sys, json
data = json.load(sys.stdin)
for r in data.get('results', []):
if r['status'] != 'pass':
print(f\" {r['status'].upper()}: {r.get('name', r['session_id'])}\")
if 'error' in r:
print(f\" Error: {r['error']}\")
ds = r.get('diff_summary', {})
if ds:
print(f\" Structural: {ds.get('structural_changes', 0)}, Text: {ds.get('text_changes', 0)}, Missing: {ds.get('cells_missing', 0)}, Added: {ds.get('cells_added', 0)}\")
"
exit 1
fi