fix: filter words and color recoveries inside graphic/image regions
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m8s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 21s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m8s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 21s
- Load structure_result from session to get detected graphic bounds - Exclude OCR words whose center falls inside a graphic region - Exclude recovered colored text inside graphic regions - Reject color recovery regions wider than 4x median word height Fixes garbage characters (!, ?, •) in box zones and false OCR detections (N, ?) in image areas. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -256,6 +256,9 @@ def recover_colored_text(
|
|||||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||||
if bh < 6:
|
if bh < 6:
|
||||||
continue
|
continue
|
||||||
|
# Reject regions too wide to be single characters
|
||||||
|
if bw > median_h * 4:
|
||||||
|
continue
|
||||||
candidates.append((area, bx, by, bw, bh))
|
candidates.append((area, bx, by, bw, bh))
|
||||||
|
|
||||||
# Keep largest first, limited count
|
# Keep largest first, limited count
|
||||||
|
|||||||
@@ -613,6 +613,36 @@ async def build_grid(session_id: str):
|
|||||||
logger.info("build-grid session %s: %d words from %d cells",
|
logger.info("build-grid session %s: %d words from %d cells",
|
||||||
session_id, len(all_words), len(word_result["cells"]))
|
session_id, len(all_words), len(word_result["cells"]))
|
||||||
|
|
||||||
|
# 2b. Filter words inside detected graphic/image regions
|
||||||
|
structure_result = session.get("structure_result")
|
||||||
|
graphic_rects = []
|
||||||
|
if structure_result:
|
||||||
|
for g in structure_result.get("graphics", []):
|
||||||
|
graphic_rects.append({
|
||||||
|
"x": g["x"], "y": g["y"],
|
||||||
|
"w": g["w"], "h": g["h"],
|
||||||
|
})
|
||||||
|
if graphic_rects:
|
||||||
|
before = len(all_words)
|
||||||
|
filtered = []
|
||||||
|
for w in all_words:
|
||||||
|
w_cx = w["left"] + w.get("width", 0) / 2
|
||||||
|
w_cy = w["top"] + w.get("height", 0) / 2
|
||||||
|
inside = any(
|
||||||
|
gr["x"] <= w_cx <= gr["x"] + gr["w"]
|
||||||
|
and gr["y"] <= w_cy <= gr["y"] + gr["h"]
|
||||||
|
for gr in graphic_rects
|
||||||
|
)
|
||||||
|
if not inside:
|
||||||
|
filtered.append(w)
|
||||||
|
removed = before - len(filtered)
|
||||||
|
if removed:
|
||||||
|
all_words = filtered
|
||||||
|
logger.info(
|
||||||
|
"build-grid session %s: removed %d words inside %d graphic region(s)",
|
||||||
|
session_id, removed, len(graphic_rects),
|
||||||
|
)
|
||||||
|
|
||||||
# 3. Load image for box detection
|
# 3. Load image for box detection
|
||||||
img_png = await get_session_image(session_id, "cropped")
|
img_png = await get_session_image(session_id, "cropped")
|
||||||
if not img_png:
|
if not img_png:
|
||||||
@@ -635,6 +665,16 @@ async def build_grid(session_id: str):
|
|||||||
if img_bgr is not None:
|
if img_bgr is not None:
|
||||||
# --- Recover colored text that OCR missed (before grid building) ---
|
# --- Recover colored text that OCR missed (before grid building) ---
|
||||||
recovered = recover_colored_text(img_bgr, all_words)
|
recovered = recover_colored_text(img_bgr, all_words)
|
||||||
|
if recovered and graphic_rects:
|
||||||
|
# Filter recovered chars inside graphic regions
|
||||||
|
recovered = [
|
||||||
|
r for r in recovered
|
||||||
|
if not any(
|
||||||
|
gr["x"] <= r["left"] + r.get("width", 0) / 2 <= gr["x"] + gr["w"]
|
||||||
|
and gr["y"] <= r["top"] + r.get("height", 0) / 2 <= gr["y"] + gr["h"]
|
||||||
|
for gr in graphic_rects
|
||||||
|
)
|
||||||
|
]
|
||||||
if recovered:
|
if recovered:
|
||||||
recovered_count = len(recovered)
|
recovered_count = len(recovered)
|
||||||
all_words.extend(recovered)
|
all_words.extend(recovered)
|
||||||
|
|||||||
Reference in New Issue
Block a user