refactor: remove unused pages and backends (model-management, OCR legacy, GPU/vast.ai, video-chat, matrix)

Deleted pages: - /ai/model-management (mock data only, no real backend) - /ai/ocr-compare (old /vocab/ backend, replaced by ocr-kombi) - /ai/ocr-pipeline (minimal session browser, redundant) - /ai/ocr-overlay (legacy monolith, redundant) - /ai/gpu (vast.ai GPU management, no longer used) - /infrastructure/gpu (same) - /communication/video-chat (moved to core) - /communication/matrix (moved to core) Deleted backends: - backend-lehrer/infra/vast_client.py + vast_power.py - backend-lehrer/meetings_api.py + jitsi_api.py - website/app/api/admin/gpu/ - edu-search-service/scripts/vast_ai_extractor.py Total: ~7,800 LOC removed. All code preserved in git history. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-23 13:14:12 +02:00
parent 5abdfa202e
commit f39cbe9283
30 changed files with 1089 additions and 9567 deletions
--- a/klausur-service/backend/tests/debug_shear.py
+++ b/klausur-service/backend/tests/debug_shear.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+"""Debug script: analyze text line slopes on deskewed image to determine true residual shear."""
+import sys, math, asyncio
+sys.path.insert(0, "/app/backend")
+
+import cv2
+import numpy as np
+import pytesseract
+from ocr_pipeline_session_store import get_session_db
+
+SESSION_ID = "3dcb1897-09a6-4b80-91b5-7e4207980bf3"
+
+async def main():
+    s = await get_session_db(SESSION_ID)
+    if not s:
+        print("Session not found")
+        return
+
+    deskewed_png = s.get("deskewed_png")
+    if not deskewed_png:
+        print("No deskewed_png stored")
+        return
+
+    arr = np.frombuffer(deskewed_png, dtype=np.uint8)
+    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+    h, w = img.shape[:2]
+    print(f"Deskewed image: {w}x{h}")
+
+    # Detect text line slopes using Tesseract word positions
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT, config="--psm 6")
+
+    lines = {}
+    for i in range(len(data["text"])):
+        txt = (data["text"][i] or "").strip()
+        if len(txt) < 2 or data["conf"][i] < 30:
+            continue
+        key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
+        cx = data["left"][i] + data["width"][i] / 2
+        cy = data["top"][i] + data["height"][i] / 2
+        if key not in lines:
+            lines[key] = []
+        lines[key].append((cx, cy))
+
+    slopes = []
+    for key, pts in lines.items():
+        if len(pts) < 3:
+            continue
+        pts.sort(key=lambda p: p[0])
+        xs = np.array([p[0] for p in pts])
+        ys = np.array([p[1] for p in pts])
+        if xs[-1] - xs[0] < w * 0.2:
+            continue
+        A = np.vstack([xs, np.ones(len(xs))]).T
+        result = np.linalg.lstsq(A, ys, rcond=None)
+        slope = result[0][0]
+        angle_deg = math.degrees(math.atan(slope))
+        slopes.append(angle_deg)
+
+    if not slopes:
+        print("No text lines detected")
+        return
+
+    median_slope = sorted(slopes)[len(slopes) // 2]
+    mean_slope = sum(slopes) / len(slopes)
+    print(f"Text lines found: {len(slopes)}")
+    print(f"Median slope: {median_slope:.4f} deg")
+    print(f"Mean slope:   {mean_slope:.4f} deg")
+    print(f"Range: [{min(slopes):.4f}, {max(slopes):.4f}]")
+    print()
+    print("Individual line slopes:")
+    for s in sorted(slopes):
+        print(f"  {s:+.4f}")
+
+    # Also test the 4 dewarp methods directly
+    print("\n--- Dewarp method results on deskewed image ---")
+    from cv_vocab_pipeline import (
+        _detect_shear_angle, _detect_shear_by_projection,
+        _detect_shear_by_hough, _detect_shear_by_text_lines,
+    )
+    for name, fn in [
+        ("vertical_edge", _detect_shear_angle),
+        ("projection", _detect_shear_by_projection),
+        ("hough_lines", _detect_shear_by_hough),
+        ("text_lines", _detect_shear_by_text_lines),
+    ]:
+        r = fn(img)
+        print(f"  {name}: shear={r['shear_degrees']:.4f} conf={r['confidence']:.3f}")
+
+    # The user says "right side needs to come down 3mm"
+    # For a ~85mm wide image (1002px at ~300DPI), 3mm ~ 35px
+    # shear angle = atan(35 / 1556) ~ 1.29 degrees
+    # Let's check: what does the image look like if we apply 0.5, 1.0, 1.5 deg shear?
+    print("\n--- Pixel shift at right edge for various shear angles ---")
+    for deg in [0.5, 0.8, 1.0, 1.3, 1.5, 2.0]:
+        shift_px = h * math.tan(math.radians(deg))
+        shift_mm = shift_px / (w / 85.0)  # approximate mm
+        print(f"  {deg:.1f} deg -> {shift_px:.0f}px shift -> ~{shift_mm:.1f}mm")
+
+asyncio.run(main())