backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
389 lines
12 KiB
Python
389 lines
12 KiB
Python
"""
|
|
Page Crop - Edge Detection Helpers
|
|
|
|
Spine shadow detection, gutter continuity analysis, projection-based
|
|
edge detection, and narrow-run filtering for content cropping.
|
|
|
|
Extracted from page_crop.py to keep files under 500 LOC.
|
|
License: Apache 2.0
|
|
"""
|
|
|
|
import logging
|
|
from typing import Optional, Tuple
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Minimum ink density (fraction of pixels) to count a row/column as "content"
|
|
_INK_THRESHOLD = 0.003 # 0.3%
|
|
|
|
# Minimum run length (fraction of dimension) to keep — shorter runs are noise
|
|
_MIN_RUN_FRAC = 0.005 # 0.5%
|
|
|
|
|
|
def _detect_spine_shadow(
|
|
gray: np.ndarray,
|
|
search_region: np.ndarray,
|
|
offset_x: int,
|
|
w: int,
|
|
side: str,
|
|
) -> Optional[int]:
|
|
"""Find the book spine center (darkest point) in a scanner shadow.
|
|
|
|
The scanner produces a gray strip where the book spine presses against
|
|
the glass. The darkest column in that strip is the spine center —
|
|
that's where we crop.
|
|
|
|
Distinguishes real spine shadows from text content by checking:
|
|
1. Strong brightness range (> 40 levels)
|
|
2. Darkest point is genuinely dark (< 180 mean brightness)
|
|
3. The dark area is a NARROW valley, not a text-content plateau
|
|
4. Brightness rises significantly toward the page content side
|
|
|
|
Args:
|
|
gray: Full grayscale image (for context).
|
|
search_region: Column slice of the grayscale image to search in.
|
|
offset_x: X offset of search_region relative to full image.
|
|
w: Full image width.
|
|
side: 'left' or 'right' (for logging).
|
|
|
|
Returns:
|
|
X coordinate (in full image) of the spine center, or None.
|
|
"""
|
|
region_w = search_region.shape[1]
|
|
if region_w < 10:
|
|
return None
|
|
|
|
# Column-mean brightness in the search region
|
|
col_means = np.mean(search_region, axis=0).astype(np.float64)
|
|
|
|
# Smooth with boxcar kernel (width = 1% of image width, min 5)
|
|
kernel_size = max(5, w // 100)
|
|
if kernel_size % 2 == 0:
|
|
kernel_size += 1
|
|
kernel = np.ones(kernel_size) / kernel_size
|
|
smoothed_raw = np.convolve(col_means, kernel, mode="same")
|
|
|
|
# Trim convolution edge artifacts (edges are zero-padded -> artificially low)
|
|
margin = kernel_size // 2
|
|
if region_w <= 2 * margin + 10:
|
|
return None
|
|
smoothed = smoothed_raw[margin:region_w - margin]
|
|
trim_offset = margin # offset of smoothed[0] relative to search_region
|
|
|
|
val_min = float(np.min(smoothed))
|
|
val_max = float(np.max(smoothed))
|
|
shadow_range = val_max - val_min
|
|
|
|
# --- Check 1: Strong brightness gradient ---
|
|
if shadow_range <= 40:
|
|
logger.debug(
|
|
"%s edge: no spine (range=%.0f <= 40)", side.capitalize(), shadow_range,
|
|
)
|
|
return None
|
|
|
|
# --- Check 2: Darkest point must be genuinely dark ---
|
|
if val_min > 180:
|
|
logger.debug(
|
|
"%s edge: no spine (darkest=%.0f > 180, likely text)", side.capitalize(), val_min,
|
|
)
|
|
return None
|
|
|
|
spine_idx = int(np.argmin(smoothed)) # index in trimmed array
|
|
spine_local = spine_idx + trim_offset # index in search_region
|
|
trimmed_len = len(smoothed)
|
|
|
|
# --- Check 3: Valley width (spine is narrow, text plateau is wide) ---
|
|
valley_thresh = val_min + shadow_range * 0.20
|
|
valley_mask = smoothed < valley_thresh
|
|
valley_width = int(np.sum(valley_mask))
|
|
max_valley_frac = 0.50
|
|
if valley_width > trimmed_len * max_valley_frac:
|
|
logger.debug(
|
|
"%s edge: no spine (valley too wide: %d/%d = %.0f%%)",
|
|
side.capitalize(), valley_width, trimmed_len,
|
|
100.0 * valley_width / trimmed_len,
|
|
)
|
|
return None
|
|
|
|
# --- Check 4: Brightness must rise toward page content ---
|
|
rise_check_w = max(5, trimmed_len // 5)
|
|
if side == "left":
|
|
right_start = min(spine_idx + 5, trimmed_len - 1)
|
|
right_end = min(right_start + rise_check_w, trimmed_len)
|
|
if right_end > right_start:
|
|
rise_brightness = float(np.mean(smoothed[right_start:right_end]))
|
|
rise = rise_brightness - val_min
|
|
if rise < shadow_range * 0.3:
|
|
logger.debug(
|
|
"%s edge: no spine (insufficient rise: %.0f, need %.0f)",
|
|
side.capitalize(), rise, shadow_range * 0.3,
|
|
)
|
|
return None
|
|
else: # right
|
|
left_end = max(spine_idx - 5, 0)
|
|
left_start = max(left_end - rise_check_w, 0)
|
|
if left_end > left_start:
|
|
rise_brightness = float(np.mean(smoothed[left_start:left_end]))
|
|
rise = rise_brightness - val_min
|
|
if rise < shadow_range * 0.3:
|
|
logger.debug(
|
|
"%s edge: no spine (insufficient rise: %.0f, need %.0f)",
|
|
side.capitalize(), rise, shadow_range * 0.3,
|
|
)
|
|
return None
|
|
|
|
spine_x = offset_x + spine_local
|
|
|
|
logger.info(
|
|
"%s edge: spine center at x=%d (brightness=%.0f, range=%.0f, valley=%dpx)",
|
|
side.capitalize(), spine_x, val_min, shadow_range, valley_width,
|
|
)
|
|
return spine_x
|
|
|
|
|
|
def _detect_gutter_continuity(
|
|
gray: np.ndarray,
|
|
search_region: np.ndarray,
|
|
offset_x: int,
|
|
w: int,
|
|
side: str,
|
|
) -> Optional[int]:
|
|
"""Detect gutter shadow via vertical continuity analysis.
|
|
|
|
Camera book scans produce a subtle brightness gradient at the gutter
|
|
that is too faint for scanner-shadow detection (range < 40). However,
|
|
the gutter shadow has a unique property: it runs **continuously from
|
|
top to bottom** without interruption.
|
|
|
|
Algorithm:
|
|
1. Divide image into N horizontal strips (~60px each)
|
|
2. For each column, compute what fraction of strips are darker than
|
|
the page median (from the center 50% of the full image)
|
|
3. A "gutter column" has >= 75% of strips darker than page_median - d
|
|
4. Smooth the dark-fraction profile and find the transition point
|
|
5. Validate: gutter band must be 0.5%-10% of image width
|
|
"""
|
|
region_h, region_w = search_region.shape[:2]
|
|
if region_w < 20 or region_h < 100:
|
|
return None
|
|
|
|
# --- 1. Divide into horizontal strips ---
|
|
strip_target_h = 60
|
|
n_strips = max(10, region_h // strip_target_h)
|
|
strip_h = region_h // n_strips
|
|
|
|
strip_means = np.zeros((n_strips, region_w), dtype=np.float64)
|
|
for s in range(n_strips):
|
|
y0 = s * strip_h
|
|
y1 = min((s + 1) * strip_h, region_h)
|
|
strip_means[s] = np.mean(search_region[y0:y1, :], axis=0)
|
|
|
|
# --- 2. Page median from center 50% of full image ---
|
|
center_lo = w // 4
|
|
center_hi = 3 * w // 4
|
|
page_median = float(np.median(gray[:, center_lo:center_hi]))
|
|
|
|
dark_thresh = page_median - 5.0
|
|
|
|
if page_median < 180:
|
|
return None
|
|
|
|
# --- 3. Per-column dark fraction ---
|
|
dark_count = np.sum(strip_means < dark_thresh, axis=0).astype(np.float64)
|
|
dark_frac = dark_count / n_strips
|
|
|
|
# --- 4. Smooth and find transition ---
|
|
smooth_w = max(5, w // 100)
|
|
if smooth_w % 2 == 0:
|
|
smooth_w += 1
|
|
kernel = np.ones(smooth_w) / smooth_w
|
|
frac_smooth = np.convolve(dark_frac, kernel, mode="same")
|
|
|
|
margin = smooth_w // 2
|
|
if region_w <= 2 * margin + 10:
|
|
return None
|
|
|
|
transition_thresh = 0.50
|
|
peak_frac = float(np.max(frac_smooth[margin:region_w - margin]))
|
|
|
|
if peak_frac < 0.70:
|
|
logger.debug(
|
|
"%s gutter: peak dark fraction %.2f < 0.70", side.capitalize(), peak_frac,
|
|
)
|
|
return None
|
|
|
|
peak_x = int(np.argmax(frac_smooth[margin:region_w - margin])) + margin
|
|
gutter_inner = None
|
|
|
|
if side == "right":
|
|
for x in range(peak_x, margin, -1):
|
|
if frac_smooth[x] < transition_thresh:
|
|
gutter_inner = x + 1
|
|
break
|
|
else:
|
|
for x in range(peak_x, region_w - margin):
|
|
if frac_smooth[x] < transition_thresh:
|
|
gutter_inner = x - 1
|
|
break
|
|
|
|
if gutter_inner is None:
|
|
return None
|
|
|
|
# --- 5. Validate gutter width ---
|
|
if side == "right":
|
|
gutter_width = region_w - gutter_inner
|
|
else:
|
|
gutter_width = gutter_inner
|
|
|
|
min_gutter = max(3, int(w * 0.005))
|
|
max_gutter = int(w * 0.10)
|
|
|
|
if gutter_width < min_gutter:
|
|
logger.debug(
|
|
"%s gutter: too narrow (%dpx < %dpx)", side.capitalize(),
|
|
gutter_width, min_gutter,
|
|
)
|
|
return None
|
|
|
|
if gutter_width > max_gutter:
|
|
logger.debug(
|
|
"%s gutter: too wide (%dpx > %dpx)", side.capitalize(),
|
|
gutter_width, max_gutter,
|
|
)
|
|
return None
|
|
|
|
if side == "right":
|
|
gutter_brightness = float(np.mean(strip_means[:, gutter_inner:]))
|
|
else:
|
|
gutter_brightness = float(np.mean(strip_means[:, :gutter_inner]))
|
|
|
|
brightness_drop = page_median - gutter_brightness
|
|
if brightness_drop < 3:
|
|
logger.debug(
|
|
"%s gutter: insufficient brightness drop (%.1f levels)",
|
|
side.capitalize(), brightness_drop,
|
|
)
|
|
return None
|
|
|
|
gutter_x = offset_x + gutter_inner
|
|
|
|
logger.info(
|
|
"%s gutter (continuity): x=%d, width=%dpx (%.1f%%), "
|
|
"brightness=%.0f vs page=%.0f (drop=%.0f), frac@edge=%.2f",
|
|
side.capitalize(), gutter_x, gutter_width,
|
|
100.0 * gutter_width / w, gutter_brightness, page_median,
|
|
brightness_drop, float(frac_smooth[gutter_inner]),
|
|
)
|
|
return gutter_x
|
|
|
|
|
|
def _detect_left_edge_shadow(
|
|
gray: np.ndarray,
|
|
binary: np.ndarray,
|
|
w: int,
|
|
h: int,
|
|
) -> int:
|
|
"""Detect left content edge, accounting for book-spine shadow.
|
|
|
|
Tries three methods in order:
|
|
1. Scanner spine-shadow (dark gradient, range > 40)
|
|
2. Camera gutter continuity (subtle shadow running top-to-bottom)
|
|
3. Binary projection fallback (first ink column)
|
|
"""
|
|
search_w = max(1, w // 4)
|
|
spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
|
|
if spine_x is not None:
|
|
return spine_x
|
|
|
|
gutter_x = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left")
|
|
if gutter_x is not None:
|
|
return gutter_x
|
|
|
|
return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
|
|
|
|
|
|
def _detect_right_edge_shadow(
|
|
gray: np.ndarray,
|
|
binary: np.ndarray,
|
|
w: int,
|
|
h: int,
|
|
) -> int:
|
|
"""Detect right content edge, accounting for book-spine shadow.
|
|
|
|
Tries three methods in order:
|
|
1. Scanner spine-shadow (dark gradient, range > 40)
|
|
2. Camera gutter continuity (subtle shadow running top-to-bottom)
|
|
3. Binary projection fallback (last ink column)
|
|
"""
|
|
search_w = max(1, w // 4)
|
|
right_start = w - search_w
|
|
spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
|
|
if spine_x is not None:
|
|
return spine_x
|
|
|
|
gutter_x = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right")
|
|
if gutter_x is not None:
|
|
return gutter_x
|
|
|
|
return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)
|
|
|
|
|
|
def _detect_top_bottom_edges(binary: np.ndarray, w: int, h: int) -> Tuple[int, int]:
|
|
"""Detect top and bottom content edges via binary horizontal projection."""
|
|
top = _detect_edge_projection(binary, axis=1, from_start=True, dim=h)
|
|
bottom = _detect_edge_projection(binary, axis=1, from_start=False, dim=h)
|
|
return top, bottom
|
|
|
|
|
|
def _detect_edge_projection(
|
|
binary: np.ndarray,
|
|
axis: int,
|
|
from_start: bool,
|
|
dim: int,
|
|
) -> int:
|
|
"""Find the first/last row or column with ink density above threshold.
|
|
|
|
axis=0 -> project vertically (column densities) -> returns x position
|
|
axis=1 -> project horizontally (row densities) -> returns y position
|
|
|
|
Filters out narrow noise runs shorter than _MIN_RUN_FRAC of the dimension.
|
|
"""
|
|
projection = np.mean(binary, axis=axis) / 255.0
|
|
|
|
ink_mask = projection >= _INK_THRESHOLD
|
|
|
|
min_run = max(1, int(dim * _MIN_RUN_FRAC))
|
|
ink_mask = _filter_narrow_runs(ink_mask, min_run)
|
|
|
|
ink_positions = np.where(ink_mask)[0]
|
|
if len(ink_positions) == 0:
|
|
return 0 if from_start else dim
|
|
|
|
if from_start:
|
|
return int(ink_positions[0])
|
|
else:
|
|
return int(ink_positions[-1])
|
|
|
|
|
|
def _filter_narrow_runs(mask: np.ndarray, min_run: int) -> np.ndarray:
|
|
"""Remove True-runs shorter than min_run pixels."""
|
|
if min_run <= 1:
|
|
return mask
|
|
|
|
result = mask.copy()
|
|
n = len(result)
|
|
i = 0
|
|
while i < n:
|
|
if result[i]:
|
|
start = i
|
|
while i < n and result[i]:
|
|
i += 1
|
|
if i - start < min_run:
|
|
result[start:i] = False
|
|
else:
|
|
i += 1
|
|
return result
|