feat(klausur-service): OCR-Pipeline Optimierungen (Improvements 2-4)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m46s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m46s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s
## Improvement 2: VLM-basierter Dewarp
- Neuer Query-Parameter `method` für POST /sessions/{id}/dewarp
Optionen: ensemble (default) | vlm | cv
- `_detect_shear_with_vlm()`: fragt qwen2.5vl:32b per Ollama nach
dem Scherwinkel — gibt Zahlenwert + Konfidenz zurück
- `os`, `Query` zu ocr_pipeline_api.py Imports hinzugefügt
- `_apply_shear` aus cv_vocab_pipeline importiert
## Improvement 4: 3-Methoden Ensemble-Dewarp
- `_detect_shear_by_projection()`: Varianz-Sweep ±3° / 0.25°-Schritte
auf horizontalen Text-Zeilen-Projektionen (~30ms)
- `_detect_shear_by_hough()`: Gewichteter Median über HoughLinesP
auf Tabellen-Linien, Vorzeichen-Inversion (~20ms)
- `_ensemble_shear()`: Kombiniert alle 3 Methoden (conf >= 0.3),
Ausreißer-Filter bei >1° Abweichung, Bonus bei Agreement <0.5°
- `dewarp_image()` nutzt jetzt alle 3 Methoden parallel,
`use_ensemble: bool = True` für Rückwärtskompatibilität
- auto_dewarp Response enthält jetzt `detections`-Array
## Improvement 3: Vollautomatik-Endpoint
- POST /sessions/{id}/run-auto mit RunAutoRequest:
from_step (1-6), ocr_engine, pronunciation,
skip_llm_review, dewarp_method
- SSE-Streaming für alle 5+1 Schritte (deskew→dewarp→columns→rows→words→llm-review)
- Jeder Schritt: start / done / skipped / error Events
- Abschluss-Event: {steps_run, steps_skipped}
- LLM-Review-Fehler sind nicht-fatal (Pipeline läuft weiter)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import asdict
|
||||
@@ -25,7 +26,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile
|
||||
from fastapi import APIRouter, File, Form, HTTPException, Query, Request, UploadFile
|
||||
from fastapi.responses import Response, StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -50,6 +51,7 @@ from cv_vocab_pipeline import (
|
||||
deskew_image_by_word_alignment,
|
||||
detect_column_geometry,
|
||||
detect_row_geometry,
|
||||
_apply_shear,
|
||||
dewarp_image,
|
||||
dewarp_image_manual,
|
||||
llm_review_entries,
|
||||
@@ -544,9 +546,75 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
|
||||
# Dewarp Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _detect_shear_with_vlm(image_bytes: bytes) -> Dict[str, Any]:
|
||||
"""Ask qwen2.5vl:32b to estimate the vertical shear angle of a scanned page.
|
||||
|
||||
The VLM is shown the image and asked: are the column/table borders tilted?
|
||||
If yes, by how many degrees? Returns a dict with shear_degrees and confidence.
|
||||
Confidence is 0.0 if Ollama is unavailable or parsing fails.
|
||||
"""
|
||||
import httpx
|
||||
import base64
|
||||
import re
|
||||
|
||||
ollama_base = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
model = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
|
||||
|
||||
prompt = (
|
||||
"This is a scanned vocabulary worksheet. Look at the vertical borders of the table columns. "
|
||||
"Are they perfectly vertical, or do they tilt slightly? "
|
||||
"If they tilt, estimate the tilt angle in degrees (positive = top tilts right, negative = top tilts left). "
|
||||
"Reply with ONLY a JSON object like: {\"shear_degrees\": 1.2, \"confidence\": 0.8} "
|
||||
"Use confidence 0.0-1.0 based on how clearly you can see the tilt. "
|
||||
"If the columns look straight, return {\"shear_degrees\": 0.0, \"confidence\": 0.9}"
|
||||
)
|
||||
|
||||
img_b64 = base64.b64encode(image_bytes).decode("utf-8")
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"images": [img_b64],
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
resp = await client.post(f"{ollama_base}/api/generate", json=payload)
|
||||
resp.raise_for_status()
|
||||
text = resp.json().get("response", "")
|
||||
|
||||
# Parse JSON from response (may have surrounding text)
|
||||
match = re.search(r'\{[^}]+\}', text)
|
||||
if match:
|
||||
import json
|
||||
data = json.loads(match.group(0))
|
||||
shear = float(data.get("shear_degrees", 0.0))
|
||||
conf = float(data.get("confidence", 0.0))
|
||||
# Clamp to reasonable range
|
||||
shear = max(-3.0, min(3.0, shear))
|
||||
conf = max(0.0, min(1.0, conf))
|
||||
return {"method": "vlm_qwen2.5vl", "shear_degrees": round(shear, 3), "confidence": round(conf, 2)}
|
||||
except Exception as e:
|
||||
logger.warning(f"VLM dewarp failed: {e}")
|
||||
|
||||
return {"method": "vlm_qwen2.5vl", "shear_degrees": 0.0, "confidence": 0.0}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/dewarp")
|
||||
async def auto_dewarp(session_id: str):
|
||||
"""Detect and correct vertical shear on the deskewed image."""
|
||||
async def auto_dewarp(
|
||||
session_id: str,
|
||||
method: str = Query("ensemble", description="Detection method: ensemble | vlm | cv"),
|
||||
):
|
||||
"""Detect and correct vertical shear on the deskewed image.
|
||||
|
||||
Methods:
|
||||
- **ensemble** (default): 3-method CV ensemble (vertical edges + projection + Hough)
|
||||
- **cv**: CV ensemble only (same as ensemble)
|
||||
- **vlm**: Ask qwen2.5vl:32b to estimate the shear angle visually
|
||||
"""
|
||||
if method not in ("ensemble", "cv", "vlm"):
|
||||
raise HTTPException(status_code=400, detail="method must be one of: ensemble, cv, vlm")
|
||||
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
@@ -556,7 +624,26 @@ async def auto_dewarp(session_id: str):
|
||||
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||
|
||||
t0 = time.time()
|
||||
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||
|
||||
if method == "vlm":
|
||||
# Encode deskewed image to PNG for VLM
|
||||
success, png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
img_bytes = png_buf.tobytes() if success else b""
|
||||
vlm_det = await _detect_shear_with_vlm(img_bytes)
|
||||
shear_deg = vlm_det["shear_degrees"]
|
||||
if abs(shear_deg) >= 0.05 and vlm_det["confidence"] >= 0.3:
|
||||
dewarped_bgr = _apply_shear(deskewed_bgr, -shear_deg)
|
||||
else:
|
||||
dewarped_bgr = deskewed_bgr
|
||||
dewarp_info = {
|
||||
"method": vlm_det["method"],
|
||||
"shear_degrees": shear_deg,
|
||||
"confidence": vlm_det["confidence"],
|
||||
"detections": [vlm_det],
|
||||
}
|
||||
else:
|
||||
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
# Encode as PNG
|
||||
@@ -568,6 +655,7 @@ async def auto_dewarp(session_id: str):
|
||||
"shear_degrees": dewarp_info["shear_degrees"],
|
||||
"confidence": dewarp_info["confidence"],
|
||||
"duration_seconds": round(duration, 2),
|
||||
"detections": dewarp_info.get("detections", []),
|
||||
}
|
||||
|
||||
# Update cache
|
||||
@@ -2000,3 +2088,505 @@ async def remove_handwriting_endpoint(session_id: str, req: RemoveHandwritingReq
|
||||
"image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/clean",
|
||||
"session_id": session_id,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto-Mode Endpoint (Improvement 3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class RunAutoRequest(BaseModel):
|
||||
from_step: int = 1 # 1=deskew, 2=dewarp, 3=columns, 4=rows, 5=words, 6=llm-review
|
||||
ocr_engine: str = "auto" # "auto" | "rapid" | "tesseract"
|
||||
pronunciation: str = "british"
|
||||
skip_llm_review: bool = False
|
||||
dewarp_method: str = "ensemble" # "ensemble" | "vlm" | "cv"
|
||||
|
||||
|
||||
async def _auto_sse_event(step: str, status: str, data: Dict[str, Any]) -> str:
|
||||
"""Format a single SSE event line."""
|
||||
import json as _json
|
||||
payload = {"step": step, "status": status, **data}
|
||||
return f"data: {_json.dumps(payload)}\n\n"
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/run-auto")
|
||||
async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
|
||||
"""Run the full OCR pipeline automatically from a given step, streaming SSE progress.
|
||||
|
||||
Steps:
|
||||
1. Deskew — straighten the scan
|
||||
2. Dewarp — correct vertical shear (ensemble CV or VLM)
|
||||
3. Columns — detect column layout
|
||||
4. Rows — detect row layout
|
||||
5. Words — OCR each cell
|
||||
6. LLM review — correct OCR errors (optional)
|
||||
|
||||
Already-completed steps are skipped unless `from_step` forces a rerun.
|
||||
Yields SSE events of the form:
|
||||
data: {"step": "deskew", "status": "start"|"done"|"skipped"|"error", ...}
|
||||
|
||||
Final event:
|
||||
data: {"step": "complete", "status": "done", "steps_run": [...], "steps_skipped": [...]}
|
||||
"""
|
||||
if req.from_step < 1 or req.from_step > 6:
|
||||
raise HTTPException(status_code=400, detail="from_step must be 1-6")
|
||||
if req.dewarp_method not in ("ensemble", "vlm", "cv"):
|
||||
raise HTTPException(status_code=400, detail="dewarp_method must be: ensemble, vlm, cv")
|
||||
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
|
||||
async def _generate():
|
||||
steps_run: List[str] = []
|
||||
steps_skipped: List[str] = []
|
||||
error_step: Optional[str] = None
|
||||
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
yield await _auto_sse_event("error", "error", {"message": f"Session {session_id} not found"})
|
||||
return
|
||||
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Step 1: Deskew
|
||||
# -----------------------------------------------------------------
|
||||
if req.from_step <= 1:
|
||||
yield await _auto_sse_event("deskew", "start", {})
|
||||
try:
|
||||
t0 = time.time()
|
||||
orig_bgr = cached.get("original_bgr")
|
||||
if orig_bgr is None:
|
||||
raise ValueError("Original image not loaded")
|
||||
|
||||
# Method 1: Hough lines
|
||||
try:
|
||||
deskewed_hough, angle_hough = deskew_image(orig_bgr.copy())
|
||||
except Exception:
|
||||
deskewed_hough, angle_hough = orig_bgr, 0.0
|
||||
|
||||
# Method 2: Word alignment
|
||||
success_enc, png_orig = cv2.imencode(".png", orig_bgr)
|
||||
orig_bytes = png_orig.tobytes() if success_enc else b""
|
||||
try:
|
||||
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
||||
except Exception:
|
||||
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
|
||||
|
||||
# Pick best method
|
||||
if abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
||||
method_used = "word_alignment"
|
||||
angle_applied = angle_wa
|
||||
wa_arr = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
||||
deskewed_bgr = cv2.imdecode(wa_arr, cv2.IMREAD_COLOR)
|
||||
if deskewed_bgr is None:
|
||||
deskewed_bgr = deskewed_hough
|
||||
method_used = "hough"
|
||||
angle_applied = angle_hough
|
||||
else:
|
||||
method_used = "hough"
|
||||
angle_applied = angle_hough
|
||||
deskewed_bgr = deskewed_hough
|
||||
|
||||
success, png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
deskewed_png = png_buf.tobytes() if success else b""
|
||||
|
||||
deskew_result = {
|
||||
"method_used": method_used,
|
||||
"rotation_degrees": round(float(angle_applied), 3),
|
||||
"duration_seconds": round(time.time() - t0, 2),
|
||||
}
|
||||
|
||||
cached["deskewed_bgr"] = deskewed_bgr
|
||||
cached["deskew_result"] = deskew_result
|
||||
await update_session_db(
|
||||
session_id,
|
||||
deskewed_png=deskewed_png,
|
||||
deskew_result=deskew_result,
|
||||
auto_rotation_degrees=float(angle_applied),
|
||||
current_step=2,
|
||||
)
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
steps_run.append("deskew")
|
||||
yield await _auto_sse_event("deskew", "done", deskew_result)
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-mode deskew failed for {session_id}: {e}")
|
||||
error_step = "deskew"
|
||||
yield await _auto_sse_event("deskew", "error", {"message": str(e)})
|
||||
yield await _auto_sse_event("complete", "error", {"error_step": error_step})
|
||||
return
|
||||
else:
|
||||
steps_skipped.append("deskew")
|
||||
yield await _auto_sse_event("deskew", "skipped", {"reason": "from_step > 1"})
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Step 2: Dewarp
|
||||
# -----------------------------------------------------------------
|
||||
if req.from_step <= 2:
|
||||
yield await _auto_sse_event("dewarp", "start", {"method": req.dewarp_method})
|
||||
try:
|
||||
t0 = time.time()
|
||||
deskewed_bgr = cached.get("deskewed_bgr")
|
||||
if deskewed_bgr is None:
|
||||
raise ValueError("Deskewed image not available")
|
||||
|
||||
if req.dewarp_method == "vlm":
|
||||
success_enc, png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
img_bytes = png_buf.tobytes() if success_enc else b""
|
||||
vlm_det = await _detect_shear_with_vlm(img_bytes)
|
||||
shear_deg = vlm_det["shear_degrees"]
|
||||
if abs(shear_deg) >= 0.05 and vlm_det["confidence"] >= 0.3:
|
||||
dewarped_bgr = _apply_shear(deskewed_bgr, -shear_deg)
|
||||
else:
|
||||
dewarped_bgr = deskewed_bgr
|
||||
dewarp_info = {
|
||||
"method": vlm_det["method"],
|
||||
"shear_degrees": shear_deg,
|
||||
"confidence": vlm_det["confidence"],
|
||||
"detections": [vlm_det],
|
||||
}
|
||||
else:
|
||||
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||
|
||||
success_enc, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||
dewarped_png = png_buf.tobytes() if success_enc else b""
|
||||
|
||||
dewarp_result = {
|
||||
"method_used": dewarp_info["method"],
|
||||
"shear_degrees": dewarp_info["shear_degrees"],
|
||||
"confidence": dewarp_info["confidence"],
|
||||
"duration_seconds": round(time.time() - t0, 2),
|
||||
"detections": dewarp_info.get("detections", []),
|
||||
}
|
||||
|
||||
cached["dewarped_bgr"] = dewarped_bgr
|
||||
cached["dewarp_result"] = dewarp_result
|
||||
await update_session_db(
|
||||
session_id,
|
||||
dewarped_png=dewarped_png,
|
||||
dewarp_result=dewarp_result,
|
||||
auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
|
||||
current_step=3,
|
||||
)
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
steps_run.append("dewarp")
|
||||
yield await _auto_sse_event("dewarp", "done", dewarp_result)
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-mode dewarp failed for {session_id}: {e}")
|
||||
error_step = "dewarp"
|
||||
yield await _auto_sse_event("dewarp", "error", {"message": str(e)})
|
||||
yield await _auto_sse_event("complete", "error", {"error_step": error_step})
|
||||
return
|
||||
else:
|
||||
steps_skipped.append("dewarp")
|
||||
yield await _auto_sse_event("dewarp", "skipped", {"reason": "from_step > 2"})
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Step 3: Columns
|
||||
# -----------------------------------------------------------------
|
||||
if req.from_step <= 3:
|
||||
yield await _auto_sse_event("columns", "start", {})
|
||||
try:
|
||||
t0 = time.time()
|
||||
dewarped_bgr = cached.get("dewarped_bgr")
|
||||
if dewarped_bgr is None:
|
||||
raise ValueError("Dewarped image not available")
|
||||
|
||||
ocr_img = create_ocr_image(dewarped_bgr)
|
||||
h, w = ocr_img.shape[:2]
|
||||
|
||||
geo_result = detect_column_geometry(ocr_img, dewarped_bgr)
|
||||
if geo_result is None:
|
||||
layout_img = create_layout_image(dewarped_bgr)
|
||||
regions = analyze_layout(layout_img, ocr_img)
|
||||
cached["_word_dicts"] = None
|
||||
cached["_inv"] = None
|
||||
cached["_content_bounds"] = None
|
||||
else:
|
||||
geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
|
||||
content_w = right_x - left_x
|
||||
cached["_word_dicts"] = word_dicts
|
||||
cached["_inv"] = inv
|
||||
cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
|
||||
|
||||
header_y, footer_y = _detect_header_footer_gaps(inv, w, h) if inv is not None else (None, None)
|
||||
geometries = _detect_sub_columns(geometries, content_w, left_x=left_x,
|
||||
top_y=top_y, header_y=header_y, footer_y=footer_y)
|
||||
regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y,
|
||||
left_x=left_x, right_x=right_x, inv=inv)
|
||||
|
||||
columns = [asdict(r) for r in regions]
|
||||
column_result = {
|
||||
"columns": columns,
|
||||
"classification_methods": list({c.get("classification_method", "") for c in columns if c.get("classification_method")}),
|
||||
"duration_seconds": round(time.time() - t0, 2),
|
||||
}
|
||||
|
||||
cached["column_result"] = column_result
|
||||
await update_session_db(session_id, column_result=column_result,
|
||||
row_result=None, word_result=None, current_step=4)
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
steps_run.append("columns")
|
||||
yield await _auto_sse_event("columns", "done", {
|
||||
"column_count": len(columns),
|
||||
"duration_seconds": column_result["duration_seconds"],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-mode columns failed for {session_id}: {e}")
|
||||
error_step = "columns"
|
||||
yield await _auto_sse_event("columns", "error", {"message": str(e)})
|
||||
yield await _auto_sse_event("complete", "error", {"error_step": error_step})
|
||||
return
|
||||
else:
|
||||
steps_skipped.append("columns")
|
||||
yield await _auto_sse_event("columns", "skipped", {"reason": "from_step > 3"})
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Step 4: Rows
|
||||
# -----------------------------------------------------------------
|
||||
if req.from_step <= 4:
|
||||
yield await _auto_sse_event("rows", "start", {})
|
||||
try:
|
||||
t0 = time.time()
|
||||
dewarped_bgr = cached.get("dewarped_bgr")
|
||||
session = await get_session_db(session_id)
|
||||
column_result = session.get("column_result") or cached.get("column_result")
|
||||
if not column_result or not column_result.get("columns"):
|
||||
raise ValueError("Column detection must complete first")
|
||||
|
||||
col_regions = [
|
||||
PageRegion(
|
||||
type=c["type"], x=c["x"], y=c["y"],
|
||||
width=c["width"], height=c["height"],
|
||||
classification_confidence=c.get("classification_confidence", 1.0),
|
||||
classification_method=c.get("classification_method", ""),
|
||||
)
|
||||
for c in column_result["columns"]
|
||||
]
|
||||
|
||||
word_dicts = cached.get("_word_dicts")
|
||||
inv = cached.get("_inv")
|
||||
content_bounds = cached.get("_content_bounds")
|
||||
|
||||
if word_dicts is None or inv is None or content_bounds is None:
|
||||
ocr_img_tmp = create_ocr_image(dewarped_bgr)
|
||||
geo_result = detect_column_geometry(ocr_img_tmp, dewarped_bgr)
|
||||
if geo_result is None:
|
||||
raise ValueError("Column geometry detection failed — cannot detect rows")
|
||||
_g, lx, rx, ty, by, word_dicts, inv = geo_result
|
||||
cached["_word_dicts"] = word_dicts
|
||||
cached["_inv"] = inv
|
||||
cached["_content_bounds"] = (lx, rx, ty, by)
|
||||
content_bounds = (lx, rx, ty, by)
|
||||
|
||||
left_x, right_x, top_y, bottom_y = content_bounds
|
||||
row_geoms = detect_row_geometry(inv, word_dicts, left_x, right_x, top_y, bottom_y)
|
||||
|
||||
row_list = [
|
||||
{
|
||||
"index": r.index, "x": r.x, "y": r.y,
|
||||
"width": r.width, "height": r.height,
|
||||
"word_count": r.word_count,
|
||||
"row_type": r.row_type,
|
||||
"gap_before": r.gap_before,
|
||||
}
|
||||
for r in row_geoms
|
||||
]
|
||||
row_result = {
|
||||
"rows": row_list,
|
||||
"row_count": len(row_list),
|
||||
"content_rows": len([r for r in row_geoms if r.row_type == "content"]),
|
||||
"duration_seconds": round(time.time() - t0, 2),
|
||||
}
|
||||
|
||||
cached["row_result"] = row_result
|
||||
await update_session_db(session_id, row_result=row_result, current_step=5)
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
steps_run.append("rows")
|
||||
yield await _auto_sse_event("rows", "done", {
|
||||
"row_count": len(row_list),
|
||||
"content_rows": row_result["content_rows"],
|
||||
"duration_seconds": row_result["duration_seconds"],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-mode rows failed for {session_id}: {e}")
|
||||
error_step = "rows"
|
||||
yield await _auto_sse_event("rows", "error", {"message": str(e)})
|
||||
yield await _auto_sse_event("complete", "error", {"error_step": error_step})
|
||||
return
|
||||
else:
|
||||
steps_skipped.append("rows")
|
||||
yield await _auto_sse_event("rows", "skipped", {"reason": "from_step > 4"})
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Step 5: Words (OCR)
|
||||
# -----------------------------------------------------------------
|
||||
if req.from_step <= 5:
|
||||
yield await _auto_sse_event("words", "start", {"engine": req.ocr_engine})
|
||||
try:
|
||||
t0 = time.time()
|
||||
dewarped_bgr = cached.get("dewarped_bgr")
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
column_result = session.get("column_result") or cached.get("column_result")
|
||||
row_result = session.get("row_result") or cached.get("row_result")
|
||||
|
||||
col_regions = [
|
||||
PageRegion(
|
||||
type=c["type"], x=c["x"], y=c["y"],
|
||||
width=c["width"], height=c["height"],
|
||||
classification_confidence=c.get("classification_confidence", 1.0),
|
||||
classification_method=c.get("classification_method", ""),
|
||||
)
|
||||
for c in column_result["columns"]
|
||||
]
|
||||
row_geoms = [
|
||||
RowGeometry(
|
||||
index=r["index"], x=r["x"], y=r["y"],
|
||||
width=r["width"], height=r["height"],
|
||||
word_count=r.get("word_count", 0), words=[],
|
||||
row_type=r.get("row_type", "content"),
|
||||
gap_before=r.get("gap_before", 0),
|
||||
)
|
||||
for r in row_result["rows"]
|
||||
]
|
||||
|
||||
word_dicts = cached.get("_word_dicts")
|
||||
if word_dicts is not None:
|
||||
content_bounds = cached.get("_content_bounds")
|
||||
top_y = content_bounds[2] if content_bounds else min(r.y for r in row_geoms)
|
||||
for row in row_geoms:
|
||||
row_y_rel = row.y - top_y
|
||||
row_bottom_rel = row_y_rel + row.height
|
||||
row.words = [
|
||||
w for w in word_dicts
|
||||
if row_y_rel <= w['top'] + w['height'] / 2 < row_bottom_rel
|
||||
]
|
||||
row.word_count = len(row.words)
|
||||
|
||||
ocr_img = create_ocr_image(dewarped_bgr)
|
||||
img_h, img_w = dewarped_bgr.shape[:2]
|
||||
|
||||
cells, columns_meta = build_cell_grid(
|
||||
ocr_img, col_regions, row_geoms, img_w, img_h,
|
||||
ocr_engine=req.ocr_engine, img_bgr=dewarped_bgr,
|
||||
)
|
||||
duration = time.time() - t0
|
||||
|
||||
col_types = {c['type'] for c in columns_meta}
|
||||
is_vocab = bool(col_types & {'column_en', 'column_de'})
|
||||
n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
|
||||
used_engine = cells[0].get("ocr_engine", "tesseract") if cells else req.ocr_engine
|
||||
|
||||
word_result_data = {
|
||||
"cells": cells,
|
||||
"grid_shape": {
|
||||
"rows": n_content_rows,
|
||||
"cols": len(columns_meta),
|
||||
"total_cells": len(cells),
|
||||
},
|
||||
"columns_used": columns_meta,
|
||||
"layout": "vocab" if is_vocab else "generic",
|
||||
"image_width": img_w,
|
||||
"image_height": img_h,
|
||||
"duration_seconds": round(duration, 2),
|
||||
"ocr_engine": used_engine,
|
||||
"summary": {
|
||||
"total_cells": len(cells),
|
||||
"non_empty_cells": sum(1 for c in cells if c.get("text")),
|
||||
"low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
|
||||
},
|
||||
}
|
||||
|
||||
if is_vocab:
|
||||
entries = _cells_to_vocab_entries(cells, columns_meta)
|
||||
entries = _fix_character_confusion(entries)
|
||||
entries = _fix_phonetic_brackets(entries, pronunciation=req.pronunciation)
|
||||
word_result_data["vocab_entries"] = entries
|
||||
word_result_data["entries"] = entries
|
||||
word_result_data["entry_count"] = len(entries)
|
||||
word_result_data["summary"]["total_entries"] = len(entries)
|
||||
|
||||
await update_session_db(session_id, word_result=word_result_data, current_step=6)
|
||||
cached["word_result"] = word_result_data
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
steps_run.append("words")
|
||||
yield await _auto_sse_event("words", "done", {
|
||||
"total_cells": len(cells),
|
||||
"layout": word_result_data["layout"],
|
||||
"duration_seconds": round(duration, 2),
|
||||
"ocr_engine": used_engine,
|
||||
"summary": word_result_data["summary"],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-mode words failed for {session_id}: {e}")
|
||||
error_step = "words"
|
||||
yield await _auto_sse_event("words", "error", {"message": str(e)})
|
||||
yield await _auto_sse_event("complete", "error", {"error_step": error_step})
|
||||
return
|
||||
else:
|
||||
steps_skipped.append("words")
|
||||
yield await _auto_sse_event("words", "skipped", {"reason": "from_step > 5"})
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Step 6: LLM Review (optional)
|
||||
# -----------------------------------------------------------------
|
||||
if req.from_step <= 6 and not req.skip_llm_review:
|
||||
yield await _auto_sse_event("llm_review", "start", {"model": OLLAMA_REVIEW_MODEL})
|
||||
try:
|
||||
session = await get_session_db(session_id)
|
||||
word_result = session.get("word_result") or cached.get("word_result")
|
||||
entries = word_result.get("entries") or word_result.get("vocab_entries") or []
|
||||
|
||||
if not entries:
|
||||
yield await _auto_sse_event("llm_review", "skipped", {"reason": "no entries"})
|
||||
steps_skipped.append("llm_review")
|
||||
else:
|
||||
reviewed = await llm_review_entries(entries)
|
||||
|
||||
session = await get_session_db(session_id)
|
||||
word_result_updated = dict(session.get("word_result") or {})
|
||||
word_result_updated["entries"] = reviewed
|
||||
word_result_updated["vocab_entries"] = reviewed
|
||||
word_result_updated["llm_reviewed"] = True
|
||||
word_result_updated["llm_model"] = OLLAMA_REVIEW_MODEL
|
||||
|
||||
await update_session_db(session_id, word_result=word_result_updated, current_step=7)
|
||||
cached["word_result"] = word_result_updated
|
||||
|
||||
steps_run.append("llm_review")
|
||||
yield await _auto_sse_event("llm_review", "done", {
|
||||
"entries_reviewed": len(reviewed),
|
||||
"model": OLLAMA_REVIEW_MODEL,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Auto-mode llm_review failed for {session_id} (non-fatal): {e}")
|
||||
yield await _auto_sse_event("llm_review", "error", {"message": str(e), "fatal": False})
|
||||
steps_skipped.append("llm_review")
|
||||
else:
|
||||
steps_skipped.append("llm_review")
|
||||
reason = "skipped by request" if req.skip_llm_review else "from_step > 6"
|
||||
yield await _auto_sse_event("llm_review", "skipped", {"reason": reason})
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Final event
|
||||
# -----------------------------------------------------------------
|
||||
yield await _auto_sse_event("complete", "done", {
|
||||
"steps_run": steps_run,
|
||||
"steps_skipped": steps_skipped,
|
||||
})
|
||||
|
||||
return StreamingResponse(
|
||||
_generate(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user