Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m21s
CI / test-python-agent-core (push) Successful in 28s
CI / test-nodejs-website (push) Successful in 20s
Step 1: scan_quality.py — Laplacian blur + contrast scoring, adjusts OCR confidence threshold (40 for good scans, 30 for degraded). Quality report included in API response + shown in frontend. Step 2: max_columns parameter in cv_words_first.py — limits column detection to 3 for vocab tables, preventing phantom columns D/E from degraded OCR fragments. Step 3: ocr_image_enhance.py — CLAHE contrast + bilateral filter denoising + unsharp mask, only for degraded scans (gated by quality score). Pattern from handwriting_htr_api.py. Frontend: quality info shown in extraction status after processing. Reprocess button now derives pages from vocabulary data. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
93 lines
3.0 KiB
Python
93 lines
3.0 KiB
Python
"""
|
|
OCR Image Enhancement — Improve scan quality before OCR.
|
|
|
|
Applies CLAHE contrast enhancement + bilateral filter denoising
|
|
to degraded scans. Only runs when scan_quality.is_degraded is True.
|
|
|
|
Pattern adapted from handwriting_htr_api.py (lines 50-68) and
|
|
cv_layout.py (lines 229-241).
|
|
|
|
All operations use OpenCV (Apache-2.0).
|
|
"""
|
|
|
|
import logging
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def enhance_for_ocr(
|
|
img_bgr: np.ndarray,
|
|
is_degraded: bool = False,
|
|
clip_limit: float = 3.0,
|
|
tile_size: int = 8,
|
|
denoise_d: int = 9,
|
|
denoise_sigma_color: float = 75,
|
|
denoise_sigma_space: float = 75,
|
|
sharpen: bool = True,
|
|
) -> np.ndarray:
|
|
"""
|
|
Enhance image quality for OCR processing.
|
|
|
|
Only applies aggressive enhancement when is_degraded is True.
|
|
For good scans, applies minimal enhancement (light CLAHE only).
|
|
|
|
Args:
|
|
img_bgr: Input BGR image
|
|
is_degraded: Whether the scan is degraded (from ScanQualityReport)
|
|
clip_limit: CLAHE clip limit (higher = more contrast)
|
|
tile_size: CLAHE tile grid size
|
|
denoise_d: Bilateral filter diameter
|
|
denoise_sigma_color: Bilateral filter sigma for color
|
|
denoise_sigma_space: Bilateral filter sigma for space
|
|
sharpen: Apply unsharp mask for blurry scans
|
|
|
|
Returns:
|
|
Enhanced BGR image
|
|
"""
|
|
if not is_degraded:
|
|
# For good scans: light CLAHE only (preserves quality)
|
|
lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
|
|
l_channel, a_channel, b_channel = cv2.split(lab)
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
l_enhanced = clahe.apply(l_channel)
|
|
lab_enhanced = cv2.merge([l_enhanced, a_channel, b_channel])
|
|
result = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
|
|
logger.info("enhance_for_ocr: light CLAHE applied (good scan)")
|
|
return result
|
|
|
|
# Degraded scan: full enhancement pipeline
|
|
logger.info(
|
|
f"enhance_for_ocr: full enhancement "
|
|
f"(CLAHE clip={clip_limit}, denoise d={denoise_d}, sharpen={sharpen})"
|
|
)
|
|
|
|
# 1. CLAHE on L-channel of LAB colorspace (preserves color for RapidOCR)
|
|
lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
|
|
l_channel, a_channel, b_channel = cv2.split(lab)
|
|
clahe = cv2.createCLAHE(
|
|
clipLimit=clip_limit,
|
|
tileGridSize=(tile_size, tile_size),
|
|
)
|
|
l_enhanced = clahe.apply(l_channel)
|
|
lab_enhanced = cv2.merge([l_enhanced, a_channel, b_channel])
|
|
enhanced = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
|
|
|
|
# 2. Bilateral filter: denoises while preserving edges
|
|
enhanced = cv2.bilateralFilter(
|
|
enhanced,
|
|
d=denoise_d,
|
|
sigmaColor=denoise_sigma_color,
|
|
sigmaSpace=denoise_sigma_space,
|
|
)
|
|
|
|
# 3. Unsharp mask for sharpening blurry text
|
|
if sharpen:
|
|
gaussian = cv2.GaussianBlur(enhanced, (0, 0), 3)
|
|
enhanced = cv2.addWeighted(enhanced, 1.5, gaussian, -0.5, 0)
|
|
|
|
logger.info("enhance_for_ocr: full enhancement pipeline complete")
|
|
return enhanced
|