Add scan quality scoring, column limit, image enhancement (Steps 1-3)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m21s
CI / test-python-agent-core (push) Successful in 28s
CI / test-nodejs-website (push) Successful in 20s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m21s
CI / test-python-agent-core (push) Successful in 28s
CI / test-nodejs-website (push) Successful in 20s
Step 1: scan_quality.py — Laplacian blur + contrast scoring, adjusts OCR confidence threshold (40 for good scans, 30 for degraded). Quality report included in API response + shown in frontend. Step 2: max_columns parameter in cv_words_first.py — limits column detection to 3 for vocab tables, preventing phantom columns D/E from degraded OCR fragments. Step 3: ocr_image_enhance.py — CLAHE contrast + bilateral filter denoising + unsharp mask, only for degraded scans (gated by quality score). Pattern from handwriting_htr_api.py. Frontend: quality info shown in extraction status after processing. Reprocess button now derives pages from vocabulary data. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
102
klausur-service/backend/scan_quality.py
Normal file
102
klausur-service/backend/scan_quality.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Scan Quality Assessment — Measures image quality before OCR.
|
||||
|
||||
Computes blur score, contrast score, and an overall quality rating.
|
||||
Used to gate enhancement steps and warn users about degraded scans.
|
||||
|
||||
All operations use OpenCV (Apache-2.0), no additional dependencies.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import Dict, Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Thresholds (empirically tuned on textbook scans)
|
||||
BLUR_THRESHOLD = 100.0 # Laplacian variance below this = blurry
|
||||
CONTRAST_THRESHOLD = 40.0 # Grayscale stddev below this = low contrast
|
||||
CONFIDENCE_GOOD = 40 # OCR min confidence for good scans
|
||||
CONFIDENCE_DEGRADED = 30 # OCR min confidence for degraded scans
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanQualityReport:
|
||||
"""Result of scan quality assessment."""
|
||||
blur_score: float # Laplacian variance (higher = sharper)
|
||||
contrast_score: float # Grayscale std deviation (higher = more contrast)
|
||||
brightness: float # Mean grayscale value (0-255)
|
||||
is_blurry: bool
|
||||
is_low_contrast: bool
|
||||
is_degraded: bool # True if any quality issue detected
|
||||
quality_pct: int # 0-100 overall quality estimate
|
||||
recommended_min_conf: int # Recommended OCR confidence threshold
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
def score_scan_quality(img_bgr: np.ndarray) -> ScanQualityReport:
|
||||
"""
|
||||
Assess the quality of a scanned image.
|
||||
|
||||
Uses:
|
||||
- Laplacian variance for blur detection
|
||||
- Grayscale standard deviation for contrast
|
||||
- Mean brightness for exposure assessment
|
||||
|
||||
Args:
|
||||
img_bgr: BGR image (numpy array from OpenCV)
|
||||
|
||||
Returns:
|
||||
ScanQualityReport with scores and recommendations
|
||||
"""
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Blur detection: Laplacian variance
|
||||
# Higher = sharper edges = better quality
|
||||
laplacian = cv2.Laplacian(gray, cv2.CV_64F)
|
||||
blur_score = float(laplacian.var())
|
||||
|
||||
# Contrast: standard deviation of grayscale
|
||||
contrast_score = float(np.std(gray))
|
||||
|
||||
# Brightness: mean grayscale
|
||||
brightness = float(np.mean(gray))
|
||||
|
||||
# Quality flags
|
||||
is_blurry = blur_score < BLUR_THRESHOLD
|
||||
is_low_contrast = contrast_score < CONTRAST_THRESHOLD
|
||||
is_degraded = is_blurry or is_low_contrast
|
||||
|
||||
# Overall quality percentage (simple weighted combination)
|
||||
blur_pct = min(100, blur_score / BLUR_THRESHOLD * 50)
|
||||
contrast_pct = min(100, contrast_score / CONTRAST_THRESHOLD * 50)
|
||||
quality_pct = int(min(100, blur_pct + contrast_pct))
|
||||
|
||||
# Recommended confidence threshold
|
||||
recommended_min_conf = CONFIDENCE_DEGRADED if is_degraded else CONFIDENCE_GOOD
|
||||
|
||||
report = ScanQualityReport(
|
||||
blur_score=round(blur_score, 1),
|
||||
contrast_score=round(contrast_score, 1),
|
||||
brightness=round(brightness, 1),
|
||||
is_blurry=is_blurry,
|
||||
is_low_contrast=is_low_contrast,
|
||||
is_degraded=is_degraded,
|
||||
quality_pct=quality_pct,
|
||||
recommended_min_conf=recommended_min_conf,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Scan quality: blur={report.blur_score} "
|
||||
f"contrast={report.contrast_score} "
|
||||
f"quality={report.quality_pct}% "
|
||||
f"degraded={report.is_degraded} "
|
||||
f"min_conf={report.recommended_min_conf}"
|
||||
)
|
||||
|
||||
return report
|
||||
Reference in New Issue
Block a user