refactor: remove unused pages and backends (model-management, OCR legacy, GPU/vast.ai, video-chat, matrix)
Deleted pages: - /ai/model-management (mock data only, no real backend) - /ai/ocr-compare (old /vocab/ backend, replaced by ocr-kombi) - /ai/ocr-pipeline (minimal session browser, redundant) - /ai/ocr-overlay (legacy monolith, redundant) - /ai/gpu (vast.ai GPU management, no longer used) - /infrastructure/gpu (same) - /communication/video-chat (moved to core) - /communication/matrix (moved to core) Deleted backends: - backend-lehrer/infra/vast_client.py + vast_power.py - backend-lehrer/meetings_api.py + jitsi_api.py - website/app/api/admin/gpu/ - edu-search-service/scripts/vast_ai_extractor.py Total: ~7,800 LOC removed. All code preserved in git history. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
100
klausur-service/backend/tests/debug_shear.py
Normal file
100
klausur-service/backend/tests/debug_shear.py
Normal file
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Debug script: analyze text line slopes on deskewed image to determine true residual shear."""
|
||||
import sys, math, asyncio
|
||||
sys.path.insert(0, "/app/backend")
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pytesseract
|
||||
from ocr_pipeline_session_store import get_session_db
|
||||
|
||||
SESSION_ID = "3dcb1897-09a6-4b80-91b5-7e4207980bf3"
|
||||
|
||||
async def main():
|
||||
s = await get_session_db(SESSION_ID)
|
||||
if not s:
|
||||
print("Session not found")
|
||||
return
|
||||
|
||||
deskewed_png = s.get("deskewed_png")
|
||||
if not deskewed_png:
|
||||
print("No deskewed_png stored")
|
||||
return
|
||||
|
||||
arr = np.frombuffer(deskewed_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
h, w = img.shape[:2]
|
||||
print(f"Deskewed image: {w}x{h}")
|
||||
|
||||
# Detect text line slopes using Tesseract word positions
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT, config="--psm 6")
|
||||
|
||||
lines = {}
|
||||
for i in range(len(data["text"])):
|
||||
txt = (data["text"][i] or "").strip()
|
||||
if len(txt) < 2 or data["conf"][i] < 30:
|
||||
continue
|
||||
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
|
||||
cx = data["left"][i] + data["width"][i] / 2
|
||||
cy = data["top"][i] + data["height"][i] / 2
|
||||
if key not in lines:
|
||||
lines[key] = []
|
||||
lines[key].append((cx, cy))
|
||||
|
||||
slopes = []
|
||||
for key, pts in lines.items():
|
||||
if len(pts) < 3:
|
||||
continue
|
||||
pts.sort(key=lambda p: p[0])
|
||||
xs = np.array([p[0] for p in pts])
|
||||
ys = np.array([p[1] for p in pts])
|
||||
if xs[-1] - xs[0] < w * 0.2:
|
||||
continue
|
||||
A = np.vstack([xs, np.ones(len(xs))]).T
|
||||
result = np.linalg.lstsq(A, ys, rcond=None)
|
||||
slope = result[0][0]
|
||||
angle_deg = math.degrees(math.atan(slope))
|
||||
slopes.append(angle_deg)
|
||||
|
||||
if not slopes:
|
||||
print("No text lines detected")
|
||||
return
|
||||
|
||||
median_slope = sorted(slopes)[len(slopes) // 2]
|
||||
mean_slope = sum(slopes) / len(slopes)
|
||||
print(f"Text lines found: {len(slopes)}")
|
||||
print(f"Median slope: {median_slope:.4f} deg")
|
||||
print(f"Mean slope: {mean_slope:.4f} deg")
|
||||
print(f"Range: [{min(slopes):.4f}, {max(slopes):.4f}]")
|
||||
print()
|
||||
print("Individual line slopes:")
|
||||
for s in sorted(slopes):
|
||||
print(f" {s:+.4f}")
|
||||
|
||||
# Also test the 4 dewarp methods directly
|
||||
print("\n--- Dewarp method results on deskewed image ---")
|
||||
from cv_vocab_pipeline import (
|
||||
_detect_shear_angle, _detect_shear_by_projection,
|
||||
_detect_shear_by_hough, _detect_shear_by_text_lines,
|
||||
)
|
||||
for name, fn in [
|
||||
("vertical_edge", _detect_shear_angle),
|
||||
("projection", _detect_shear_by_projection),
|
||||
("hough_lines", _detect_shear_by_hough),
|
||||
("text_lines", _detect_shear_by_text_lines),
|
||||
]:
|
||||
r = fn(img)
|
||||
print(f" {name}: shear={r['shear_degrees']:.4f} conf={r['confidence']:.3f}")
|
||||
|
||||
# The user says "right side needs to come down 3mm"
|
||||
# For a ~85mm wide image (1002px at ~300DPI), 3mm ~ 35px
|
||||
# shear angle = atan(35 / 1556) ~ 1.29 degrees
|
||||
# Let's check: what does the image look like if we apply 0.5, 1.0, 1.5 deg shear?
|
||||
print("\n--- Pixel shift at right edge for various shear angles ---")
|
||||
for deg in [0.5, 0.8, 1.0, 1.3, 1.5, 2.0]:
|
||||
shift_px = h * math.tan(math.radians(deg))
|
||||
shift_mm = shift_px / (w / 85.0) # approximate mm
|
||||
print(f" {deg:.1f} deg -> {shift_px:.0f}px shift -> ~{shift_mm:.1f}mm")
|
||||
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user