feat: Sprint 1 — IPA hardening, regression framework, ground-truth review
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s
Track A (Backend): - Compound word IPA decomposition (schoolbag→school+bag) - Trailing garbled IPA fragment removal after brackets (R21 fix) - Regression runner with DB persistence, history endpoints - Page crop determinism verified with tests Track B (Frontend): - OCR Regression dashboard (/ai/ocr-regression) - Ground Truth Review workflow (/ai/ocr-ground-truth) with split-view, confidence highlighting, inline edit, batch mark, progress tracking Track C (Docs): - OCR-Pipeline.md v5.0 (Steps 5e-5h) - Regression testing guide - mkdocs.yml nav update Track D (Infra): - TrOCR baseline benchmark script - run-regression.sh shell script - Migration 008: regression_runs table Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,11 @@ Lizenz: Apache 2.0
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -16,6 +20,7 @@ from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from grid_editor_api import _build_grid_core
|
||||
from ocr_pipeline_session_store import (
|
||||
get_pool,
|
||||
get_session_db,
|
||||
list_ground_truth_sessions_db,
|
||||
update_session_db,
|
||||
@@ -26,6 +31,60 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["regression"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DB persistence for regression runs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _init_regression_table():
|
||||
"""Ensure regression_runs table exists (idempotent)."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
migration_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"migrations/008_regression_runs.sql",
|
||||
)
|
||||
if os.path.exists(migration_path):
|
||||
with open(migration_path, "r") as f:
|
||||
sql = f.read()
|
||||
await conn.execute(sql)
|
||||
|
||||
|
||||
async def _persist_regression_run(
|
||||
status: str,
|
||||
summary: dict,
|
||||
results: list,
|
||||
duration_ms: int,
|
||||
triggered_by: str = "manual",
|
||||
) -> str:
|
||||
"""Save a regression run to the database. Returns the run ID."""
|
||||
try:
|
||||
await _init_regression_table()
|
||||
pool = await get_pool()
|
||||
run_id = str(uuid.uuid4())
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO regression_runs
|
||||
(id, status, total, passed, failed, errors, duration_ms, results, triggered_by)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9)
|
||||
""",
|
||||
run_id,
|
||||
status,
|
||||
summary.get("total", 0),
|
||||
summary.get("passed", 0),
|
||||
summary.get("failed", 0),
|
||||
summary.get("errors", 0),
|
||||
duration_ms,
|
||||
json.dumps(results),
|
||||
triggered_by,
|
||||
)
|
||||
logger.info("Regression run %s persisted: %s", run_id, status)
|
||||
return run_id
|
||||
except Exception as e:
|
||||
logger.warning("Failed to persist regression run: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -299,8 +358,11 @@ async def run_single_regression(session_id: str):
|
||||
|
||||
|
||||
@router.post("/regression/run")
|
||||
async def run_all_regressions():
|
||||
async def run_all_regressions(
|
||||
triggered_by: str = Query("manual", description="Who triggered: manual, script, ci"),
|
||||
):
|
||||
"""Re-run build_grid for ALL ground-truth sessions and compare."""
|
||||
start_time = time.monotonic()
|
||||
sessions = await list_ground_truth_sessions_db()
|
||||
|
||||
if not sessions:
|
||||
@@ -370,19 +432,105 @@ async def run_all_regressions():
|
||||
results.append(entry)
|
||||
|
||||
overall = "pass" if failed == 0 and errors == 0 else "fail"
|
||||
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||
|
||||
summary = {
|
||||
"total": len(results),
|
||||
"passed": passed,
|
||||
"failed": failed,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Regression suite: %s — %d passed, %d failed, %d errors (of %d)",
|
||||
overall, passed, failed, errors, len(results),
|
||||
"Regression suite: %s — %d passed, %d failed, %d errors (of %d) in %dms",
|
||||
overall, passed, failed, errors, len(results), duration_ms,
|
||||
)
|
||||
|
||||
# Persist to DB
|
||||
run_id = await _persist_regression_run(
|
||||
status=overall,
|
||||
summary=summary,
|
||||
results=results,
|
||||
duration_ms=duration_ms,
|
||||
triggered_by=triggered_by,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": overall,
|
||||
"run_id": run_id,
|
||||
"duration_ms": duration_ms,
|
||||
"results": results,
|
||||
"summary": {
|
||||
"total": len(results),
|
||||
"passed": passed,
|
||||
"failed": failed,
|
||||
"errors": errors,
|
||||
},
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/regression/history")
|
||||
async def get_regression_history(
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""Get recent regression run history from the database."""
|
||||
try:
|
||||
await _init_regression_table()
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id, run_at, status, total, passed, failed, errors,
|
||||
duration_ms, triggered_by
|
||||
FROM regression_runs
|
||||
ORDER BY run_at DESC
|
||||
LIMIT $1
|
||||
""",
|
||||
limit,
|
||||
)
|
||||
return {
|
||||
"runs": [
|
||||
{
|
||||
"id": str(row["id"]),
|
||||
"run_at": row["run_at"].isoformat() if row["run_at"] else None,
|
||||
"status": row["status"],
|
||||
"total": row["total"],
|
||||
"passed": row["passed"],
|
||||
"failed": row["failed"],
|
||||
"errors": row["errors"],
|
||||
"duration_ms": row["duration_ms"],
|
||||
"triggered_by": row["triggered_by"],
|
||||
}
|
||||
for row in rows
|
||||
],
|
||||
"count": len(rows),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch regression history: %s", e)
|
||||
return {"runs": [], "count": 0, "error": str(e)}
|
||||
|
||||
|
||||
@router.get("/regression/history/{run_id}")
|
||||
async def get_regression_run_detail(run_id: str):
|
||||
"""Get detailed results of a specific regression run."""
|
||||
try:
|
||||
await _init_regression_table()
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT * FROM regression_runs WHERE id = $1",
|
||||
run_id,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Run not found")
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"run_at": row["run_at"].isoformat() if row["run_at"] else None,
|
||||
"status": row["status"],
|
||||
"total": row["total"],
|
||||
"passed": row["passed"],
|
||||
"failed": row["failed"],
|
||||
"errors": row["errors"],
|
||||
"duration_ms": row["duration_ms"],
|
||||
"triggered_by": row["triggered_by"],
|
||||
"results": json.loads(row["results"]) if row["results"] else [],
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
Reference in New Issue
Block a user