feat: Sprint 2 — TrOCR ONNX, PP-DocLayout, Model Management

D2: TrOCR ONNX export script (printed + handwritten, int8 quantization) D3: PP-DocLayout ONNX export script (download or Docker-based conversion) B3: Model Management admin page (PyTorch vs ONNX status, benchmarks, config) A4: TrOCR ONNX service with runtime routing (auto/pytorch/onnx via TROCR_BACKEND) A5: PP-DocLayout ONNX detection with OpenCV fallback (via GRAPHIC_DETECT_BACKEND) B4: Structure Detection UI toggle (OpenCV vs PP-DocLayout) with class color coding C3: TrOCR-ONNX.md documentation C4: OCR-Pipeline.md ONNX section added C5: mkdocs.yml nav updated, optimum added to requirements.txt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:53:02 +01:00
parent c695b659fb
commit be7f5f1872
16 changed files with 3616 additions and 60 deletions
@@ -0,0 +1,546 @@
+#!/usr/bin/env python3
+"""
+PP-DocLayout ONNX Export — exports PP-DocLayout model to ONNX for document layout detection.
+
+PP-DocLayout detects: table, figure, title, text, list regions on document pages.
+Since PaddlePaddle doesn't work natively on ARM Mac, this script either:
+  1. Downloads a pre-exported ONNX model
+  2. Uses Docker (linux/amd64) for the conversion
+
+Usage:
+    python scripts/export-doclayout-onnx.py
+    python scripts/export-doclayout-onnx.py --method docker
+"""
+
+import argparse
+import hashlib
+import json
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import urllib.request
+from pathlib import Path
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%H:%M:%S",
+)
+log = logging.getLogger("export-doclayout")
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+# 10 PP-DocLayout class labels in standard order
+CLASS_LABELS = [
+    "table",
+    "figure",
+    "title",
+    "text",
+    "list",
+    "header",
+    "footer",
+    "equation",
+    "reference",
+    "abstract",
+]
+
+# Known download sources for pre-exported ONNX models.
+# Ordered by preference — first successful download wins.
+DOWNLOAD_SOURCES = [
+    {
+        "name": "PaddleOCR PP-DocLayout (ppyoloe_plus_sod, HuggingFace)",
+        "url": "https://huggingface.co/SWHL/PP-DocLayout/resolve/main/pp_doclayout_onnx/model.onnx",
+        "filename": "model.onnx",
+        "sha256": None,  # populated once a known-good hash is available
+    },
+    {
+        "name": "PaddleOCR PP-DocLayout (RapidOCR mirror)",
+        "url": "https://huggingface.co/SWHL/PP-DocLayout/resolve/main/pp_doclayout_onnx/model.onnx",
+        "filename": "model.onnx",
+        "sha256": None,
+    },
+]
+
+# Paddle inference model URLs (for Docker-based conversion).
+PADDLE_MODEL_URL = (
+    "https://paddleocr.bj.bcebos.com/PP-DocLayout/PP-DocLayout_plus.tar"
+)
+
+# Expected input shape for the model (batch, channels, height, width).
+MODEL_INPUT_SHAPE = (1, 3, 800, 800)
+
+# Docker image name used for conversion.
+DOCKER_IMAGE_TAG = "breakpilot/paddle2onnx-converter:latest"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def sha256_file(path: Path) -> str:
+    """Compute SHA-256 hex digest for a file."""
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(1 << 20), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def download_file(url: str, dest: Path, desc: str = "") -> bool:
+    """Download a file with progress reporting. Returns True on success."""
+    label = desc or url.split("/")[-1]
+    log.info("Downloading %s ...", label)
+    log.info("  URL: %s", url)
+
+    try:
+        req = urllib.request.Request(url, headers={"User-Agent": "breakpilot-export/1.0"})
+        with urllib.request.urlopen(req, timeout=120) as resp:
+            total = resp.headers.get("Content-Length")
+            total = int(total) if total else None
+            downloaded = 0
+
+            dest.parent.mkdir(parents=True, exist_ok=True)
+            with open(dest, "wb") as f:
+                while True:
+                    chunk = resp.read(1 << 18)  # 256 KB
+                    if not chunk:
+                        break
+                    f.write(chunk)
+                    downloaded += len(chunk)
+                    if total:
+                        pct = downloaded * 100 / total
+                        mb = downloaded / (1 << 20)
+                        total_mb = total / (1 << 20)
+                        print(
+                            f"\r  {mb:.1f}/{total_mb:.1f} MB ({pct:.0f}%)",
+                            end="",
+                            flush=True,
+                        )
+            if total:
+                print()  # newline after progress
+
+        size_mb = dest.stat().st_size / (1 << 20)
+        log.info("  Downloaded %.1f MB -> %s", size_mb, dest)
+        return True
+
+    except Exception as exc:
+        log.warning("  Download failed: %s", exc)
+        if dest.exists():
+            dest.unlink()
+        return False
+
+
+def verify_onnx(model_path: Path) -> bool:
+    """Load the ONNX model with onnxruntime, run a dummy inference, check outputs."""
+    log.info("Verifying ONNX model: %s", model_path)
+
+    try:
+        import numpy as np
+    except ImportError:
+        log.error("numpy is required for verification: pip install numpy")
+        return False
+
+    try:
+        import onnxruntime as ort
+    except ImportError:
+        log.error("onnxruntime is required for verification: pip install onnxruntime")
+        return False
+
+    try:
+        # Load the model
+        opts = ort.SessionOptions()
+        opts.log_severity_level = 3  # suppress verbose logs
+        session = ort.InferenceSession(str(model_path), sess_options=opts)
+
+        # Inspect inputs
+        inputs = session.get_inputs()
+        log.info("  Model inputs:")
+        for inp in inputs:
+            log.info("    %s: shape=%s dtype=%s", inp.name, inp.shape, inp.type)
+
+        # Inspect outputs
+        outputs = session.get_outputs()
+        log.info("  Model outputs:")
+        for out in outputs:
+            log.info("    %s: shape=%s dtype=%s", out.name, out.shape, out.type)
+
+        # Build dummy input — use the first input's name and expected shape.
+        input_name = inputs[0].name
+        input_shape = inputs[0].shape
+
+        # Replace dynamic dims (strings or None) with concrete sizes.
+        concrete_shape = []
+        for i, dim in enumerate(input_shape):
+            if isinstance(dim, (int,)) and dim > 0:
+                concrete_shape.append(dim)
+            elif i == 0:
+                concrete_shape.append(1)  # batch
+            elif i == 1:
+                concrete_shape.append(3)  # channels
+            else:
+                concrete_shape.append(800)  # spatial
+        concrete_shape = tuple(concrete_shape)
+
+        # Fallback if shape looks wrong — use standard MODEL_INPUT_SHAPE.
+        if len(concrete_shape) != 4:
+            concrete_shape = MODEL_INPUT_SHAPE
+
+        log.info("  Running dummy inference with shape %s ...", concrete_shape)
+        dummy = np.random.randn(*concrete_shape).astype(np.float32)
+        result = session.run(None, {input_name: dummy})
+
+        log.info("  Inference succeeded — %d output tensors:", len(result))
+        for i, r in enumerate(result):
+            arr = np.asarray(r)
+            log.info("    output[%d]: shape=%s dtype=%s", i, arr.shape, arr.dtype)
+
+        # Basic sanity checks
+        if len(result) == 0:
+            log.error("  Model produced no outputs!")
+            return False
+
+        # Check for at least one output with a bounding-box-like shape (N, 4) or
+        # a detection-like structure. Be lenient — different ONNX exports vary.
+        has_plausible_output = False
+        for r in result:
+            arr = np.asarray(r)
+            # Common detection output shapes: (1, N, 6), (N, 4), (N, 6), (1, N, 5+C), etc.
+            if arr.ndim >= 2 and any(d >= 4 for d in arr.shape):
+                has_plausible_output = True
+            # Some models output (N,) labels or scores
+            if arr.ndim >= 1 and arr.size > 0:
+                has_plausible_output = True
+
+        if has_plausible_output:
+            log.info("  Verification PASSED")
+            return True
+        else:
+            log.warning("  Output shapes look unexpected, but model loaded OK.")
+            log.warning("  Treating as PASSED (shapes may differ by export variant).")
+            return True
+
+    except Exception as exc:
+        log.error("  Verification FAILED: %s", exc)
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Method: Download
+# ---------------------------------------------------------------------------
+
+
+def try_download(output_dir: Path) -> bool:
+    """Attempt to download a pre-exported ONNX model. Returns True on success."""
+    log.info("=== Method: DOWNLOAD ===")
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    model_path = output_dir / "model.onnx"
+
+    for source in DOWNLOAD_SOURCES:
+        log.info("Trying source: %s", source["name"])
+        tmp_path = output_dir / f".{source['filename']}.tmp"
+
+        if not download_file(source["url"], tmp_path, desc=source["name"]):
+            continue
+
+        # Check SHA-256 if known.
+        if source["sha256"]:
+            actual_hash = sha256_file(tmp_path)
+            if actual_hash != source["sha256"]:
+                log.warning(
+                    "  SHA-256 mismatch: expected %s, got %s",
+                    source["sha256"],
+                    actual_hash,
+                )
+                tmp_path.unlink()
+                continue
+
+        # Basic sanity: file should be > 1 MB (a real ONNX model, not an error page).
+        size = tmp_path.stat().st_size
+        if size < 1 << 20:
+            log.warning("  File too small (%.1f KB) — probably not a valid model.", size / 1024)
+            tmp_path.unlink()
+            continue
+
+        # Move into place.
+        shutil.move(str(tmp_path), str(model_path))
+        log.info("Model saved to %s (%.1f MB)", model_path, model_path.stat().st_size / (1 << 20))
+        return True
+
+    log.warning("All download sources failed.")
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Method: Docker
+# ---------------------------------------------------------------------------
+
+DOCKERFILE_CONTENT = r"""
+FROM --platform=linux/amd64 python:3.11-slim
+
+RUN pip install --no-cache-dir \
+    paddlepaddle==3.0.0 \
+    paddle2onnx==1.3.1 \
+    onnx==1.17.0 \
+    requests
+
+WORKDIR /work
+
+# Download + extract the PP-DocLayout Paddle inference model.
+RUN python3 -c "
+import urllib.request, tarfile, os
+url = 'PADDLE_MODEL_URL_PLACEHOLDER'
+print(f'Downloading {url} ...')
+dest = '/work/pp_doclayout.tar'
+urllib.request.urlretrieve(url, dest)
+print('Extracting ...')
+with tarfile.open(dest) as t:
+    t.extractall('/work/paddle_model')
+os.remove(dest)
+# List what we extracted
+for root, dirs, files in os.walk('/work/paddle_model'):
+    for f in files:
+        fp = os.path.join(root, f)
+        sz = os.path.getsize(fp)
+        print(f'  {fp} ({sz} bytes)')
+"
+
+# Convert Paddle model to ONNX.
+# paddle2onnx expects model_dir with model.pdmodel + model.pdiparams
+RUN python3 -c "
+import os, glob, subprocess
+
+# Find the inference model files
+model_dir = '/work/paddle_model'
+pdmodel_files = glob.glob(os.path.join(model_dir, '**', '*.pdmodel'), recursive=True)
+pdiparams_files = glob.glob(os.path.join(model_dir, '**', '*.pdiparams'), recursive=True)
+
+if not pdmodel_files:
+    raise FileNotFoundError('No .pdmodel file found in extracted archive')
+
+pdmodel = pdmodel_files[0]
+pdiparams = pdiparams_files[0] if pdiparams_files else None
+model_dir_actual = os.path.dirname(pdmodel)
+pdmodel_name = os.path.basename(pdmodel).replace('.pdmodel', '')
+
+print(f'Found model: {pdmodel}')
+print(f'Found params: {pdiparams}')
+print(f'Model dir: {model_dir_actual}')
+print(f'Model name prefix: {pdmodel_name}')
+
+cmd = [
+    'paddle2onnx',
+    '--model_dir', model_dir_actual,
+    '--model_filename', os.path.basename(pdmodel),
+]
+if pdiparams:
+    cmd += ['--params_filename', os.path.basename(pdiparams)]
+cmd += [
+    '--save_file', '/work/output/model.onnx',
+    '--opset_version', '14',
+    '--enable_onnx_checker', 'True',
+]
+
+os.makedirs('/work/output', exist_ok=True)
+print(f'Running: {\" \".join(cmd)}')
+subprocess.run(cmd, check=True)
+
+out_size = os.path.getsize('/work/output/model.onnx')
+print(f'Conversion done: /work/output/model.onnx ({out_size} bytes)')
+"
+
+CMD ["cp", "-v", "/work/output/model.onnx", "/output/model.onnx"]
+""".replace(
+    "PADDLE_MODEL_URL_PLACEHOLDER", PADDLE_MODEL_URL
+)
+
+
+def try_docker(output_dir: Path) -> bool:
+    """Build a Docker image to convert the Paddle model to ONNX. Returns True on success."""
+    log.info("=== Method: DOCKER (linux/amd64) ===")
+
+    # Check Docker is available.
+    docker_bin = shutil.which("docker") or "/usr/local/bin/docker"
+    try:
+        subprocess.run(
+            [docker_bin, "version"],
+            capture_output=True,
+            check=True,
+            timeout=15,
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired) as exc:
+        log.error("Docker is not available: %s", exc)
+        return False
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    with tempfile.TemporaryDirectory(prefix="doclayout-export-") as tmpdir:
+        tmpdir = Path(tmpdir)
+
+        # Write Dockerfile.
+        dockerfile_path = tmpdir / "Dockerfile"
+        dockerfile_path.write_text(DOCKERFILE_CONTENT)
+        log.info("Wrote Dockerfile to %s", dockerfile_path)
+
+        # Build image.
+        log.info("Building Docker image (this downloads ~2 GB, may take a while) ...")
+        build_cmd = [
+            docker_bin, "build",
+            "--platform", "linux/amd64",
+            "-t", DOCKER_IMAGE_TAG,
+            "-f", str(dockerfile_path),
+            str(tmpdir),
+        ]
+        log.info("  %s", " ".join(build_cmd))
+        build_result = subprocess.run(
+            build_cmd,
+            capture_output=False,  # stream output to terminal
+            timeout=1200,  # 20 min
+        )
+        if build_result.returncode != 0:
+            log.error("Docker build failed (exit code %d).", build_result.returncode)
+            return False
+
+        # Run container — mount output_dir as /output, the CMD copies model.onnx there.
+        log.info("Running conversion container ...")
+        run_cmd = [
+            docker_bin, "run",
+            "--rm",
+            "--platform", "linux/amd64",
+            "-v", f"{output_dir.resolve()}:/output",
+            DOCKER_IMAGE_TAG,
+        ]
+        log.info("  %s", " ".join(run_cmd))
+        run_result = subprocess.run(
+            run_cmd,
+            capture_output=False,
+            timeout=300,
+        )
+        if run_result.returncode != 0:
+            log.error("Docker run failed (exit code %d).", run_result.returncode)
+            return False
+
+    model_path = output_dir / "model.onnx"
+    if model_path.exists():
+        size_mb = model_path.stat().st_size / (1 << 20)
+        log.info("Model exported: %s (%.1f MB)", model_path, size_mb)
+        return True
+    else:
+        log.error("Expected output file not found: %s", model_path)
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Write metadata
+# ---------------------------------------------------------------------------
+
+
+def write_metadata(output_dir: Path, method: str) -> None:
+    """Write a metadata JSON next to the model for provenance tracking."""
+    model_path = output_dir / "model.onnx"
+    if not model_path.exists():
+        return
+
+    meta = {
+        "model": "PP-DocLayout",
+        "format": "ONNX",
+        "export_method": method,
+        "class_labels": CLASS_LABELS,
+        "input_shape": list(MODEL_INPUT_SHAPE),
+        "file_size_bytes": model_path.stat().st_size,
+        "sha256": sha256_file(model_path),
+    }
+    meta_path = output_dir / "metadata.json"
+    with open(meta_path, "w") as f:
+        json.dump(meta, f, indent=2)
+    log.info("Metadata written to %s", meta_path)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Export PP-DocLayout model to ONNX for document layout detection.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path("models/onnx/pp-doclayout"),
+        help="Directory for the exported ONNX model (default: models/onnx/pp-doclayout/)",
+    )
+    parser.add_argument(
+        "--method",
+        choices=["auto", "download", "docker"],
+        default="auto",
+        help="Export method: auto (try download then docker), download, or docker.",
+    )
+    parser.add_argument(
+        "--skip-verify",
+        action="store_true",
+        help="Skip ONNX model verification after export.",
+    )
+    args = parser.parse_args()
+
+    output_dir: Path = args.output_dir
+    model_path = output_dir / "model.onnx"
+
+    # Check if model already exists.
+    if model_path.exists():
+        size_mb = model_path.stat().st_size / (1 << 20)
+        log.info("Model already exists: %s (%.1f MB)", model_path, size_mb)
+        log.info("Delete it first if you want to re-export.")
+        if not args.skip_verify:
+            if not verify_onnx(model_path):
+                log.error("Existing model failed verification!")
+                return 1
+        return 0
+
+    success = False
+    used_method = None
+
+    if args.method in ("auto", "download"):
+        success = try_download(output_dir)
+        if success:
+            used_method = "download"
+
+    if not success and args.method in ("auto", "docker"):
+        success = try_docker(output_dir)
+        if success:
+            used_method = "docker"
+
+    if not success:
+        log.error("All export methods failed.")
+        if args.method == "download":
+            log.info("Hint: try --method docker to convert via Docker (linux/amd64).")
+        elif args.method == "docker":
+            log.info("Hint: ensure Docker is running and has internet access.")
+        else:
+            log.info("Hint: check your internet connection and Docker installation.")
+        return 1
+
+    # Write metadata.
+    write_metadata(output_dir, used_method)
+
+    # Verify.
+    if not args.skip_verify:
+        if not verify_onnx(model_path):
+            log.error("Exported model failed verification!")
+            log.info("The file is kept at %s — inspect manually.", model_path)
+            return 1
+    else:
+        log.info("Skipping verification (--skip-verify).")
+
+    log.info("Done. Model ready at %s", model_path)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+"""
+TrOCR ONNX Export — exports TrOCR models to ONNX with int8 quantization.
+
+Supported models:
+- microsoft/trocr-base-printed
+- microsoft/trocr-base-handwritten
+
+Steps per model:
+1. Load PyTorch model via optimum ORTModelForVision2Seq (export=True)
+2. Save ONNX to output directory
+3. Quantize to int8 via ORTQuantizer + AutoQuantizationConfig
+4. Verify: compare PyTorch vs ONNX outputs (diff < 2%)
+5. Report model sizes before/after quantization
+
+Usage:
+    python scripts/export-trocr-onnx.py
+    python scripts/export-trocr-onnx.py --model printed
+    python scripts/export-trocr-onnx.py --model handwritten --skip-verify
+    python scripts/export-trocr-onnx.py --output-dir models/onnx --skip-quantize
+"""
+
+import argparse
+import os
+import platform
+import sys
+import time
+from pathlib import Path
+
+# Add backend to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'klausur-service', 'backend'))
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+MODELS = {
+    "printed": "microsoft/trocr-base-printed",
+    "handwritten": "microsoft/trocr-base-handwritten",
+}
+
+DEFAULT_OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', 'models', 'onnx')
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def dir_size_mb(path: str) -> float:
+    """Return total size of all files under *path* in MB."""
+    total = 0
+    for root, _dirs, files in os.walk(path):
+        for f in files:
+            total += os.path.getsize(os.path.join(root, f))
+    return total / (1024 * 1024)
+
+
+def log(msg: str) -> None:
+    """Print a timestamped log message to stderr."""
+    print(f"[export-onnx] {msg}", file=sys.stderr, flush=True)
+
+
+def _create_test_image():
+    """Create a simple synthetic text-line image for verification."""
+    from PIL import Image
+
+    w, h = 384, 48
+    img = Image.new('RGB', (w, h), 'white')
+    pixels = img.load()
+    # Draw a dark region to simulate printed text
+    for x in range(60, 220):
+        for y in range(10, 38):
+            pixels[x, y] = (25, 25, 25)
+    return img
+
+
+# ---------------------------------------------------------------------------
+# Export
+# ---------------------------------------------------------------------------
+
+def export_to_onnx(model_name: str, output_dir: str) -> str:
+    """Export a HuggingFace TrOCR model to ONNX via optimum.
+
+    Returns the path to the saved ONNX directory.
+    """
+    from optimum.onnxruntime import ORTModelForVision2Seq
+
+    short_name = model_name.split("/")[-1]
+    onnx_path = os.path.join(output_dir, short_name)
+
+    log(f"Exporting {model_name} to ONNX ...")
+    t0 = time.monotonic()
+
+    model = ORTModelForVision2Seq.from_pretrained(model_name, export=True)
+    model.save_pretrained(onnx_path)
+
+    elapsed = time.monotonic() - t0
+    size = dir_size_mb(onnx_path)
+    log(f"  Exported in {elapsed:.1f}s — {size:.1f} MB on disk: {onnx_path}")
+
+    return onnx_path
+
+
+# ---------------------------------------------------------------------------
+# Quantization
+# ---------------------------------------------------------------------------
+
+def quantize_onnx(onnx_path: str) -> str:
+    """Apply int8 dynamic quantization to an ONNX model directory.
+
+    Returns the path to the quantized model directory.
+    """
+    from optimum.onnxruntime import ORTQuantizer
+    from optimum.onnxruntime.configuration import AutoQuantizationConfig
+
+    quantized_path = onnx_path + "-int8"
+    log(f"Quantizing to int8 → {quantized_path} ...")
+    t0 = time.monotonic()
+
+    # Pick quantization config based on platform.
+    # arm64 (Apple Silicon) does not have AVX-512; use arm64 config when
+    # available, otherwise fall back to avx512_vnni which still works for
+    # dynamic quantisation (weights-only).
+    machine = platform.machine().lower()
+    if "arm" in machine or "aarch" in machine:
+        try:
+            qconfig = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
+            log("  Using arm64 quantization config")
+        except AttributeError:
+            # Older optimum versions may lack arm64(); fall back.
+            qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
+            log("  arm64 config not available, falling back to avx512_vnni")
+    else:
+        qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
+        log("  Using avx512_vnni quantization config")
+
+    quantizer = ORTQuantizer.from_pretrained(onnx_path)
+    quantizer.quantize(save_dir=quantized_path, quantization_config=qconfig)
+
+    elapsed = time.monotonic() - t0
+    size = dir_size_mb(quantized_path)
+    log(f"  Quantized in {elapsed:.1f}s — {size:.1f} MB on disk")
+
+    return quantized_path
+
+
+# ---------------------------------------------------------------------------
+# Verification
+# ---------------------------------------------------------------------------
+
+def verify_outputs(model_name: str, onnx_path: str) -> dict:
+    """Compare PyTorch and ONNX model outputs on a synthetic image.
+
+    Returns a dict with verification results including the max relative
+    difference of generated token IDs and decoded text from both backends.
+    """
+    import numpy as np
+    import torch
+    from optimum.onnxruntime import ORTModelForVision2Seq
+    from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+
+    log(f"Verifying ONNX output against PyTorch for {model_name} ...")
+    test_image = _create_test_image()
+
+    # --- PyTorch inference ---
+    processor = TrOCRProcessor.from_pretrained(model_name)
+    pt_model = VisionEncoderDecoderModel.from_pretrained(model_name)
+    pt_model.eval()
+
+    pixel_values = processor(images=test_image, return_tensors="pt").pixel_values
+    with torch.no_grad():
+        pt_ids = pt_model.generate(pixel_values, max_new_tokens=50)
+    pt_text = processor.batch_decode(pt_ids, skip_special_tokens=True)[0]
+
+    # --- ONNX inference ---
+    ort_model = ORTModelForVision2Seq.from_pretrained(onnx_path)
+    ort_pixel_values = processor(images=test_image, return_tensors="pt").pixel_values
+    ort_ids = ort_model.generate(ort_pixel_values, max_new_tokens=50)
+    ort_text = processor.batch_decode(ort_ids, skip_special_tokens=True)[0]
+
+    # --- Compare ---
+    pt_arr = pt_ids[0].numpy().astype(np.float64)
+    ort_arr = ort_ids[0].numpy().astype(np.float64)
+
+    # Pad to equal length for comparison
+    max_len = max(len(pt_arr), len(ort_arr))
+    if len(pt_arr) < max_len:
+        pt_arr = np.pad(pt_arr, (0, max_len - len(pt_arr)))
+    if len(ort_arr) < max_len:
+        ort_arr = np.pad(ort_arr, (0, max_len - len(ort_arr)))
+
+    # Relative diff on token ids (treat 0-ids as 1 to avoid div-by-zero)
+    denom = np.where(np.abs(pt_arr) > 0, np.abs(pt_arr), 1.0)
+    rel_diff = np.abs(pt_arr - ort_arr) / denom
+    max_diff_pct = float(np.max(rel_diff)) * 100.0
+    exact_match = bool(np.array_equal(pt_ids[0].numpy(), ort_ids[0].numpy()))
+
+    passed = max_diff_pct < 2.0
+
+    result = {
+        "passed": passed,
+        "exact_token_match": exact_match,
+        "max_relative_diff_pct": round(max_diff_pct, 4),
+        "pytorch_text": pt_text,
+        "onnx_text": ort_text,
+        "text_match": pt_text == ort_text,
+    }
+
+    status = "PASS" if passed else "FAIL"
+    log(f"  Verification {status}: max_diff={max_diff_pct:.4f}%  exact_match={exact_match}")
+    log(f"    PyTorch : '{pt_text}'")
+    log(f"    ONNX    : '{ort_text}'")
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Per-model pipeline
+# ---------------------------------------------------------------------------
+
+def process_model(
+    model_name: str,
+    output_dir: str,
+    skip_verify: bool = False,
+    skip_quantize: bool = False,
+) -> dict:
+    """Run the full export pipeline for one model.
+
+    Returns a summary dict with paths, sizes, and verification results.
+    """
+    short_name = model_name.split("/")[-1]
+    summary: dict = {
+        "model": model_name,
+        "short_name": short_name,
+        "onnx_path": None,
+        "onnx_size_mb": None,
+        "quantized_path": None,
+        "quantized_size_mb": None,
+        "size_reduction_pct": None,
+        "verification_fp32": None,
+        "verification_int8": None,
+        "error": None,
+    }
+
+    log(f"{'='*60}")
+    log(f"Processing: {model_name}")
+    log(f"{'='*60}")
+
+    # Step 1 + 2: Export to ONNX
+    try:
+        onnx_path = export_to_onnx(model_name, output_dir)
+        summary["onnx_path"] = onnx_path
+        summary["onnx_size_mb"] = round(dir_size_mb(onnx_path), 1)
+    except Exception as e:
+        summary["error"] = f"ONNX export failed: {e}"
+        log(f"  ERROR: {summary['error']}")
+        return summary
+
+    # Step 3: Verify fp32 ONNX
+    if not skip_verify:
+        try:
+            summary["verification_fp32"] = verify_outputs(model_name, onnx_path)
+        except Exception as e:
+            log(f"  WARNING: fp32 verification failed: {e}")
+            summary["verification_fp32"] = {"passed": False, "error": str(e)}
+
+    # Step 4: Quantize to int8
+    if not skip_quantize:
+        try:
+            quantized_path = quantize_onnx(onnx_path)
+            summary["quantized_path"] = quantized_path
+            q_size = dir_size_mb(quantized_path)
+            summary["quantized_size_mb"] = round(q_size, 1)
+
+            if summary["onnx_size_mb"] and summary["onnx_size_mb"] > 0:
+                reduction = (1 - q_size / dir_size_mb(onnx_path)) * 100
+                summary["size_reduction_pct"] = round(reduction, 1)
+        except Exception as e:
+            summary["error"] = f"Quantization failed: {e}"
+            log(f"  ERROR: {summary['error']}")
+            return summary
+
+        # Step 5: Verify int8 ONNX
+        if not skip_verify:
+            try:
+                summary["verification_int8"] = verify_outputs(model_name, quantized_path)
+            except Exception as e:
+                log(f"  WARNING: int8 verification failed: {e}")
+                summary["verification_int8"] = {"passed": False, "error": str(e)}
+
+    return summary
+
+
+# ---------------------------------------------------------------------------
+# Summary printing
+# ---------------------------------------------------------------------------
+
+def print_summary(results: list[dict]) -> None:
+    """Print a human-readable summary table."""
+    print("\n" + "=" * 72, file=sys.stderr)
+    print("  EXPORT SUMMARY", file=sys.stderr)
+    print("=" * 72, file=sys.stderr)
+
+    for r in results:
+        print(f"\n  Model: {r['model']}", file=sys.stderr)
+
+        if r.get("error"):
+            print(f"  ERROR: {r['error']}", file=sys.stderr)
+            continue
+
+        # Sizes
+        onnx_mb = r.get("onnx_size_mb", "?")
+        q_mb = r.get("quantized_size_mb", "?")
+        reduction = r.get("size_reduction_pct", "?")
+
+        print(f"  ONNX fp32  : {onnx_mb} MB  ({r.get('onnx_path', '?')})", file=sys.stderr)
+        if q_mb != "?":
+            print(f"  ONNX int8  : {q_mb} MB  ({r.get('quantized_path', '?')})", file=sys.stderr)
+            print(f"  Reduction  : {reduction}%", file=sys.stderr)
+
+        # Verification
+        for label, key in [("fp32 verify", "verification_fp32"), ("int8 verify", "verification_int8")]:
+            v = r.get(key)
+            if v is None:
+                print(f"  {label}: skipped", file=sys.stderr)
+            elif v.get("error"):
+                print(f"  {label}: ERROR — {v['error']}", file=sys.stderr)
+            else:
+                status = "PASS" if v["passed"] else "FAIL"
+                diff = v.get("max_relative_diff_pct", "?")
+                match = v.get("text_match", "?")
+                print(f"  {label}: {status}  (max_diff={diff}%, text_match={match})", file=sys.stderr)
+
+    print("\n" + "=" * 72 + "\n", file=sys.stderr)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Export TrOCR models to ONNX with int8 quantization",
+    )
+    parser.add_argument(
+        "--model",
+        choices=["printed", "handwritten", "both"],
+        default="both",
+        help="Which model to export (default: both)",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=DEFAULT_OUTPUT_DIR,
+        help=f"Output directory for ONNX models (default: {DEFAULT_OUTPUT_DIR})",
+    )
+    parser.add_argument(
+        "--skip-verify",
+        action="store_true",
+        help="Skip output verification against PyTorch",
+    )
+    parser.add_argument(
+        "--skip-quantize",
+        action="store_true",
+        help="Skip int8 quantization step",
+    )
+    args = parser.parse_args()
+
+    # Resolve output directory
+    output_dir = os.path.abspath(args.output_dir)
+    os.makedirs(output_dir, exist_ok=True)
+    log(f"Output directory: {output_dir}")
+
+    # Determine which models to process
+    if args.model == "both":
+        model_names = list(MODELS.values())
+    else:
+        model_names = [MODELS[args.model]]
+
+    # Process each model
+    results = []
+    for model_name in model_names:
+        result = process_model(
+            model_name=model_name,
+            output_dir=output_dir,
+            skip_verify=args.skip_verify,
+            skip_quantize=args.skip_quantize,
+        )
+        results.append(result)
+
+    # Print summary
+    print_summary(results)
+
+    # Exit with error code if any model failed verification
+    any_fail = False
+    for r in results:
+        if r.get("error"):
+            any_fail = True
+        for vkey in ("verification_fp32", "verification_int8"):
+            v = r.get(vkey)
+            if v and not v.get("passed", True):
+                any_fail = True
+
+    if any_fail:
+        log("One or more steps failed or did not pass verification.")
+        sys.exit(1)
+    else:
+        log("All exports completed successfully.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()