feat: Sprint 2 — TrOCR ONNX, PP-DocLayout, Model Management

D2: TrOCR ONNX export script (printed + handwritten, int8 quantization)
D3: PP-DocLayout ONNX export script (download or Docker-based conversion)
B3: Model Management admin page (PyTorch vs ONNX status, benchmarks, config)
A4: TrOCR ONNX service with runtime routing (auto/pytorch/onnx via TROCR_BACKEND)
A5: PP-DocLayout ONNX detection with OpenCV fallback (via GRAPHIC_DETECT_BACKEND)
B4: Structure Detection UI toggle (OpenCV vs PP-DocLayout) with class color coding
C3: TrOCR-ONNX.md documentation
C4: OCR-Pipeline.md ONNX section added
C5: mkdocs.yml nav updated, optimum added to requirements.txt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 09:53:02 +01:00
parent c695b659fb
commit be7f5f1872
16 changed files with 3616 additions and 60 deletions

View File

@@ -120,6 +120,57 @@ def detect_graphic_elements(
if img_bgr is None:
return []
# ------------------------------------------------------------------
# Try PP-DocLayout ONNX first if available
# ------------------------------------------------------------------
import os
backend = os.environ.get("GRAPHIC_DETECT_BACKEND", "auto")
if backend in ("doclayout", "auto"):
try:
from cv_doclayout_detect import detect_layout_regions, is_doclayout_available
if is_doclayout_available():
regions = detect_layout_regions(img_bgr)
if regions:
_LABEL_TO_COLOR = {
"figure": ("image", "green", _COLOR_HEX.get("green", "#16a34a")),
"table": ("image", "blue", _COLOR_HEX.get("blue", "#2563eb")),
}
converted: List[GraphicElement] = []
for r in regions:
shape, color_name, color_hex = _LABEL_TO_COLOR.get(
r.label,
(r.label, "gray", _COLOR_HEX.get("gray", "#6b7280")),
)
converted.append(GraphicElement(
x=r.x,
y=r.y,
width=r.width,
height=r.height,
area=r.width * r.height,
shape=shape,
color_name=color_name,
color_hex=color_hex,
confidence=r.confidence,
contour=None,
))
converted.sort(key=lambda g: g.area, reverse=True)
result = converted[:max_elements]
if result:
shape_counts: Dict[str, int] = {}
for g in result:
shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
logger.info(
"GraphicDetect (PP-DocLayout): %d elements (%s)",
len(result),
", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
)
return result
except Exception as e:
logger.warning("PP-DocLayout failed, falling back to OpenCV: %s", e)
# ------------------------------------------------------------------
# OpenCV fallback (original logic)
# ------------------------------------------------------------------
h, w = img_bgr.shape[:2]
logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",