feat: add color detection for OCR word boxes
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-nodejs-website (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-nodejs-website (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
New cv_color_detect.py module: - detect_word_colors(): annotates existing words with text color (HSV analysis) - recover_colored_text(): finds colored text regions missed by standard OCR (e.g. red ! markers) using HSV masks + contour detection Integrated into build-grid: words get color/color_name fields, recovered colored regions are merged into the word list before grid building. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,7 @@ import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
|
||||
from cv_box_detect import detect_boxes, split_page_into_zones
|
||||
from cv_color_detect import detect_word_colors, recover_colored_text
|
||||
from cv_words_first import _cluster_rows, _build_cells
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
@@ -438,15 +439,30 @@ async def build_grid(session_id: str):
|
||||
|
||||
zones_data: List[Dict[str, Any]] = []
|
||||
boxes_detected = 0
|
||||
recovered_count = 0
|
||||
img_bgr = None
|
||||
|
||||
content_x, content_y, content_w, content_h = _get_content_bounds(all_words)
|
||||
|
||||
if img_png:
|
||||
# Decode image for box detection
|
||||
# Decode image for color detection + box detection
|
||||
arr = np.frombuffer(img_png, dtype=np.uint8)
|
||||
img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
|
||||
if img_bgr is not None:
|
||||
# --- Color detection: annotate existing words ---
|
||||
detect_word_colors(img_bgr, all_words)
|
||||
|
||||
# --- Recover colored text that OCR missed ---
|
||||
recovered = recover_colored_text(img_bgr, all_words)
|
||||
if recovered:
|
||||
recovered_count = len(recovered)
|
||||
all_words.extend(recovered)
|
||||
logger.info(
|
||||
"build-grid session %s: +%d recovered colored words",
|
||||
session_id, recovered_count,
|
||||
)
|
||||
|
||||
# Detect bordered boxes
|
||||
boxes = detect_boxes(
|
||||
img_bgr,
|
||||
@@ -529,6 +545,14 @@ async def build_grid(session_id: str):
|
||||
total_columns = sum(len(z.get("columns", [])) for z in zones_data)
|
||||
total_rows = sum(len(z.get("rows", [])) for z in zones_data)
|
||||
|
||||
# Collect color statistics from all word_boxes in cells
|
||||
color_stats: Dict[str, int] = {}
|
||||
for z in zones_data:
|
||||
for cell in z.get("cells", []):
|
||||
for wb in cell.get("word_boxes", []):
|
||||
cn = wb.get("color_name", "black")
|
||||
color_stats[cn] = color_stats.get(cn, 0) + 1
|
||||
|
||||
result = {
|
||||
"session_id": session_id,
|
||||
"image_width": img_w,
|
||||
@@ -541,6 +565,8 @@ async def build_grid(session_id: str):
|
||||
"total_rows": total_rows,
|
||||
"total_cells": total_cells,
|
||||
"total_words": len(all_words),
|
||||
"recovered_colored": recovered_count,
|
||||
"color_stats": color_stats,
|
||||
},
|
||||
"formatting": {
|
||||
"bold_columns": [],
|
||||
|
||||
Reference in New Issue
Block a user