feat: ImageLayoutEditor, arrow-key nav, multi-select bold, wider columns
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 18s

- New ImageLayoutEditor: SVG overlay on original scan with draggable
  column dividers, horizontal guidelines (margins/header/footer),
  double-click to add columns, x-button to delete
- GridTable: MIN_COL_WIDTH 40→80px for better readability
- Arrow up/down keys navigate between rows in the grid editor
- Ctrl+Click for multi-cell selection, Ctrl+B to toggle bold on selection
- getAdjacentCell works for cells that don't exist yet (new rows/cols)
- deleteColumn now merges x-boundaries correctly
- Session restore fix: grid_editor_result/structure_result in session GET
- Footer row 3-state cycle, auto-create cells for empty footer rows
- Grid save/build/GT-mark now advance current_step=11

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-24 07:45:39 +01:00
parent 4e668660a7
commit 65f4ce1947
12 changed files with 1422 additions and 90 deletions

View File

@@ -1201,7 +1201,7 @@ def _filter_decorative_margin(
img_w: int,
log: Any,
session_id: str,
) -> None:
) -> Dict[str, Any]:
"""Remove words that belong to a decorative alphabet strip on a margin.
Some vocabulary worksheets have a vertical AZ alphabet graphic along
@@ -1220,9 +1220,13 @@ def _filter_decorative_margin(
artifacts like "Vv" that belong to the same decorative element.
Modifies *words* in place.
Returns:
Dict with 'found' (bool), 'side' (str), 'letters_detected' (int).
"""
no_strip: Dict[str, Any] = {"found": False, "side": "", "letters_detected": 0}
if not words or img_w <= 0:
return
return no_strip
margin_cutoff = img_w * 0.30
# Phase 1: find candidate strips using single-char words
@@ -1278,6 +1282,9 @@ def _filter_decorative_margin(
"(strip x=%d-%d)",
session_id, removed, side, strip_x_lo, strip_x_hi,
)
return {"found": True, "side": side, "letters_detected": len(strip)}
return no_strip
def _filter_footer_words(
@@ -1427,7 +1434,11 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
# Some worksheets have a decorative alphabet strip along one margin
# (A-Z in a graphic). OCR reads these as single-char words aligned
# vertically. Detect and remove them before grid building.
_filter_decorative_margin(all_words, img_w, logger, session_id)
margin_strip_info = _filter_decorative_margin(all_words, img_w, logger, session_id)
margin_strip_detected = margin_strip_info.get("found", False)
# Read document_category from session (user-selected or auto-detected)
document_category = session.get("document_category")
# 2c. Filter footer rows (page numbers at the very bottom).
# Isolated short text in the bottom 5% of the page is typically a
@@ -1997,18 +2008,21 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
removed_pipes, z.get("zone_index", 0),
)
# Also strip pipe chars from word_box text and cell text that may remain
# from OCR reading syllable-separation marks (e.g. "zu|trau|en" → "zutrauen").
# Strip pipe chars ONLY from word_boxes/cells where the pipe is an
# OCR column-divider artifact. Preserve pipes that are embedded in
# words as syllable separators (e.g. "zu|trau|en") — these are
# intentional and used in dictionary Ground Truth.
for z in zones_data:
for cell in z.get("cells", []):
for wb in cell.get("word_boxes", []):
wbt = wb.get("text", "")
if "|" in wbt:
wb["text"] = wbt.replace("|", "")
# Only strip if the ENTIRE word_box is just pipe(s)
# (handled by _PIPE_RE above) — leave embedded pipes alone
text = cell.get("text", "")
if "|" in text:
cleaned = text.replace("|", "").strip()
if cleaned != text:
# Only strip leading/trailing pipes (OCR artifacts at cell edges)
cleaned = text.strip("|").strip()
if cleaned != text.strip():
cell["text"] = cleaned
# 4e. Detect and remove page-border decoration strips.
@@ -2668,6 +2682,63 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
)
font_size_suggestion = max(10, int(avg_row_height * 0.6))
# --- Dictionary detection on assembled grid ---
# Build lightweight ColumnGeometry-like structures from zone columns for
# dictionary signal scoring.
from cv_layout import _score_dictionary_signals
dict_detection: Dict[str, Any] = {"is_dictionary": False, "confidence": 0.0}
try:
from cv_vocab_types import ColumnGeometry
for z in zones_data:
zone_cells = z.get("cells", [])
zone_cols = z.get("columns", [])
if len(zone_cols) < 2 or len(zone_cells) < 10:
continue
# Build pseudo-ColumnGeometry per column
pseudo_geoms = []
for col in zone_cols:
ci = col["index"]
col_cells = [c for c in zone_cells if c.get("col_index") == ci]
# Flatten word_boxes into word dicts compatible with _score_language
col_words = []
for cell in col_cells:
for wb in cell.get("word_boxes") or []:
col_words.append({
"text": wb.get("text", ""),
"conf": wb.get("conf", 0),
"top": wb.get("top", 0),
"left": wb.get("left", 0),
"height": wb.get("height", 0),
"width": wb.get("width", 0),
})
# Fallback: use cell text if no word_boxes
if not cell.get("word_boxes") and cell.get("text"):
col_words.append({
"text": cell["text"],
"conf": cell.get("confidence", 50),
"top": cell.get("bbox_px", {}).get("y", 0),
"left": cell.get("bbox_px", {}).get("x", 0),
"height": cell.get("bbox_px", {}).get("h", 20),
"width": cell.get("bbox_px", {}).get("w", 50),
})
col_w = col.get("x_max_px", 0) - col.get("x_min_px", 0)
pseudo_geoms.append(ColumnGeometry(
index=ci, x=col.get("x_min_px", 0), y=0,
width=max(col_w, 1), height=img_h,
word_count=len(col_words), words=col_words,
width_ratio=col_w / max(img_w, 1),
))
if len(pseudo_geoms) >= 2:
dd = _score_dictionary_signals(
pseudo_geoms,
document_category=document_category,
margin_strip_detected=margin_strip_detected,
)
if dd["confidence"] > dict_detection["confidence"]:
dict_detection = dd
except Exception as e:
logger.warning("Dictionary detection failed: %s", e)
result = {
"session_id": session_id,
"image_width": img_w,
@@ -2693,6 +2764,13 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
"avg_row_height_px": round(avg_row_height, 1),
"font_size_suggestion_px": font_size_suggestion,
},
"dictionary_detection": {
"is_dictionary": dict_detection.get("is_dictionary", False),
"confidence": dict_detection.get("confidence", 0.0),
"signals": dict_detection.get("signals", {}),
"article_col_index": dict_detection.get("article_col_index"),
"headword_col_index": dict_detection.get("headword_col_index"),
},
"duration_seconds": round(duration, 2),
}
@@ -2722,8 +2800,8 @@ async def build_grid(session_id: str):
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
# Persist to DB
await update_session_db(session_id, grid_editor_result=result)
# Persist to DB and advance current_step to 11 (reconstruction complete)
await update_session_db(session_id, grid_editor_result=result, current_step=11)
logger.info(
"build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
@@ -2772,7 +2850,7 @@ async def save_grid(session_id: str, request: Request):
"edited": True,
}
await update_session_db(session_id, grid_editor_result=result)
await update_session_db(session_id, grid_editor_result=result, current_step=11)
logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"]))