feat: Sprint 2 — TrOCR ONNX, PP-DocLayout, Model Management

D2: TrOCR ONNX export script (printed + handwritten, int8 quantization) D3: PP-DocLayout ONNX export script (download or Docker-based conversion) B3: Model Management admin page (PyTorch vs ONNX status, benchmarks, config) A4: TrOCR ONNX service with runtime routing (auto/pytorch/onnx via TROCR_BACKEND) A5: PP-DocLayout ONNX detection with OpenCV fallback (via GRAPHIC_DETECT_BACKEND) B4: Structure Detection UI toggle (OpenCV vs PP-DocLayout) with class color coding C3: TrOCR-ONNX.md documentation C4: OCR-Pipeline.md ONNX section added C5: mkdocs.yml nav updated, optimum added to requirements.txt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:53:02 +01:00
parent c695b659fb
commit be7f5f1872
16 changed files with 3616 additions and 60 deletions
--- a/klausur-service/backend/cv_graphic_detect.py
+++ b/klausur-service/backend/cv_graphic_detect.py
@@ -120,6 +120,57 @@ def detect_graphic_elements(
    if img_bgr is None:
        return []

+    # ------------------------------------------------------------------
+    # Try PP-DocLayout ONNX first if available
+    # ------------------------------------------------------------------
+    import os
+    backend = os.environ.get("GRAPHIC_DETECT_BACKEND", "auto")
+    if backend in ("doclayout", "auto"):
+        try:
+            from cv_doclayout_detect import detect_layout_regions, is_doclayout_available
+            if is_doclayout_available():
+                regions = detect_layout_regions(img_bgr)
+                if regions:
+                    _LABEL_TO_COLOR = {
+                        "figure": ("image", "green", _COLOR_HEX.get("green", "#16a34a")),
+                        "table":  ("image", "blue",  _COLOR_HEX.get("blue", "#2563eb")),
+                    }
+                    converted: List[GraphicElement] = []
+                    for r in regions:
+                        shape, color_name, color_hex = _LABEL_TO_COLOR.get(
+                            r.label,
+                            (r.label, "gray", _COLOR_HEX.get("gray", "#6b7280")),
+                        )
+                        converted.append(GraphicElement(
+                            x=r.x,
+                            y=r.y,
+                            width=r.width,
+                            height=r.height,
+                            area=r.width * r.height,
+                            shape=shape,
+                            color_name=color_name,
+                            color_hex=color_hex,
+                            confidence=r.confidence,
+                            contour=None,
+                        ))
+                    converted.sort(key=lambda g: g.area, reverse=True)
+                    result = converted[:max_elements]
+                    if result:
+                        shape_counts: Dict[str, int] = {}
+                        for g in result:
+                            shape_counts[g.shape] = shape_counts.get(g.shape, 0) + 1
+                        logger.info(
+                            "GraphicDetect (PP-DocLayout): %d elements (%s)",
+                            len(result),
+                            ", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
+                        )
+                    return result
+        except Exception as e:
+            logger.warning("PP-DocLayout failed, falling back to OpenCV: %s", e)
+    # ------------------------------------------------------------------
+    # OpenCV fallback (original logic)
+    # ------------------------------------------------------------------
+
    h, w = img_bgr.shape[:2]

    logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",