From 45b83560fdf32634444131eb5ca4c8994d09c4e0 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Fri, 20 Mar 2026 16:38:12 +0100
Subject: [PATCH] Vertical zone split: detect divider lines and create
 independent sub-zones

Pages with two side-by-side vocabulary columns separated by a vertical
black line are now split into independent sub-zones before row/column
detection. Each sub-zone gets its own rows, preventing misalignment from
different heading rhythms.

- _detect_vertical_dividers(): finds pipe word_boxes at consistent x
  positions spanning >50% of zone height
- _split_zone_at_vertical_dividers(): creates left/right PageZone objects
  with layout_hint and vsplit_group metadata
- Column union skips vsplit zones (independent column sets)
- Frontend renders vsplit zones side by side via flex layout
- PageZone gets layout_hint + vsplit_group fields

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../components/grid-editor/GridEditor.tsx     |  77 ++++++---
 admin-lehrer/components/grid-editor/types.ts  |   2 +
 klausur-service/backend/cv_vocab_types.py     |   2 +
 klausur-service/backend/grid_editor_api.py    | 153 +++++++++++++++++-
 4 files changed, 215 insertions(+), 19 deletions(-)
diff --git a/admin-lehrer/components/grid-editor/GridEditor.tsx b/admin-lehrer/components/grid-editor/GridEditor.tsx
index 680489a..2d2a25a 100644
--- a/admin-lehrer/components/grid-editor/GridEditor.tsx
+++ b/admin-lehrer/components/grid-editor/GridEditor.tsx
@@ -186,25 +186,66 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
         <GridImageOverlay sessionId={sessionId} grid={grid} />
       )}
 
-      {/* Zone tables */}
+      {/* Zone tables — group vsplit zones side by side */}
       <div className="space-y-4">
-        {grid.zones.map((zone) => (
-          <div
-            key={zone.zone_index}
-            className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
-          >
-            <GridTable
-              zone={zone}
-              layoutMetrics={grid.layout_metrics}
-              selectedCell={selectedCell}
-              onSelectCell={setSelectedCell}
-              onCellTextChange={updateCellText}
-              onToggleColumnBold={toggleColumnBold}
-              onToggleRowHeader={toggleRowHeader}
-              onNavigate={handleNavigate}
-            />
-          </div>
-        ))}
+        {(() => {
+          // Group consecutive zones with same vsplit_group
+          const groups: typeof grid.zones[][] = []
+          for (const zone of grid.zones) {
+            const prev = groups[groups.length - 1]
+            if (
+              prev &&
+              zone.vsplit_group != null &&
+              prev[0].vsplit_group === zone.vsplit_group
+            ) {
+              prev.push(zone)
+            } else {
+              groups.push([zone])
+            }
+          }
+          return groups.map((group) =>
+            group.length === 1 ? (
+              <div
+                key={group[0].zone_index}
+                className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
+              >
+                <GridTable
+                  zone={group[0]}
+                  layoutMetrics={grid.layout_metrics}
+                  selectedCell={selectedCell}
+                  onSelectCell={setSelectedCell}
+                  onCellTextChange={updateCellText}
+                  onToggleColumnBold={toggleColumnBold}
+                  onToggleRowHeader={toggleRowHeader}
+                  onNavigate={handleNavigate}
+                />
+              </div>
+            ) : (
+              <div
+                key={`vsplit-${group[0].vsplit_group}`}
+                className="flex gap-2"
+              >
+                {group.map((zone) => (
+                  <div
+                    key={zone.zone_index}
+                    className="flex-1 min-w-0 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
+                  >
+                    <GridTable
+                      zone={zone}
+                      layoutMetrics={grid.layout_metrics}
+                      selectedCell={selectedCell}
+                      onSelectCell={setSelectedCell}
+                      onCellTextChange={updateCellText}
+                      onToggleColumnBold={toggleColumnBold}
+                      onToggleRowHeader={toggleRowHeader}
+                      onNavigate={handleNavigate}
+                    />
+                  </div>
+                ))}
+              </div>
+            ),
+          )
+        })()}
       </div>
 
       {/* Tip */}
diff --git a/admin-lehrer/components/grid-editor/types.ts b/admin-lehrer/components/grid-editor/types.ts
index c963f8e..c7b9f6c 100644
--- a/admin-lehrer/components/grid-editor/types.ts
+++ b/admin-lehrer/components/grid-editor/types.ts
@@ -52,6 +52,8 @@ export interface GridZone {
   rows: GridRow[]
   cells: GridEditorCell[]
   header_rows: number[]
+  layout_hint?: 'left_of_vsplit' | 'right_of_vsplit' | 'middle_of_vsplit'
+  vsplit_group?: number
 }
 
 export interface BBox {
diff --git a/klausur-service/backend/cv_vocab_types.py b/klausur-service/backend/cv_vocab_types.py
index e28abc8..12989f9 100644
--- a/klausur-service/backend/cv_vocab_types.py
+++ b/klausur-service/backend/cv_vocab_types.py
@@ -179,3 +179,5 @@ class PageZone:
     box: Optional[DetectedBox] = None
     columns: List[ColumnGeometry] = field(default_factory=list)
     image_overlays: List[Dict] = field(default_factory=list)
+    layout_hint: Optional[str] = None   # 'left_of_vsplit', 'right_of_vsplit'
+    vsplit_group: Optional[int] = None  # group ID for side-by-side rendering
diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py
index dff43c3..0de693a 100644
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -449,6 +449,108 @@ def _words_in_zone(
     return result
 
 
+# ---------------------------------------------------------------------------
+# Vertical divider detection and zone splitting
+# ---------------------------------------------------------------------------
+
+_PIPE_RE_VSPLIT = re.compile(r"^\|+$")
+
+
+def _detect_vertical_dividers(
+    words: List[Dict],
+    zone_x: int,
+    zone_w: int,
+    zone_y: int,
+    zone_h: int,
+) -> List[float]:
+    """Detect vertical divider lines from pipe word_boxes at consistent x.
+
+    Returns list of divider x-positions (empty if no dividers found).
+    """
+    if not words or zone_w <= 0 or zone_h <= 0:
+        return []
+
+    # Collect pipe word_boxes
+    pipes = [
+        w for w in words
+        if _PIPE_RE_VSPLIT.match((w.get("text") or "").strip())
+    ]
+    if len(pipes) < 5:
+        return []
+
+    # Cluster pipe x-centers by proximity
+    tolerance = max(15, int(zone_w * 0.02))
+    pipe_xs = sorted(w["left"] + w["width"] / 2 for w in pipes)
+
+    clusters: List[List[float]] = [[pipe_xs[0]]]
+    for x in pipe_xs[1:]:
+        if x - clusters[-1][-1] <= tolerance:
+            clusters[-1].append(x)
+        else:
+            clusters.append([x])
+
+    dividers: List[float] = []
+    for cluster in clusters:
+        if len(cluster) < 5:
+            continue
+        mean_x = sum(cluster) / len(cluster)
+        # Must be between 15% and 85% of zone width
+        rel_pos = (mean_x - zone_x) / zone_w
+        if rel_pos < 0.15 or rel_pos > 0.85:
+            continue
+        # Check vertical coverage: pipes must span >= 50% of zone height
+        cluster_pipes = [
+            w for w in pipes
+            if abs(w["left"] + w["width"] / 2 - mean_x) <= tolerance
+        ]
+        ys = [w["top"] for w in cluster_pipes] + [w["top"] + w["height"] for w in cluster_pipes]
+        y_span = max(ys) - min(ys) if ys else 0
+        if y_span < zone_h * 0.5:
+            continue
+        dividers.append(mean_x)
+
+    return sorted(dividers)
+
+
+def _split_zone_at_vertical_dividers(
+    zone: "PageZone",
+    divider_xs: List[float],
+    vsplit_group_id: int,
+) -> List["PageZone"]:
+    """Split a PageZone at vertical divider positions into sub-zones."""
+    from cv_vocab_types import PageZone
+
+    boundaries = [zone.x] + divider_xs + [zone.x + zone.width]
+    hints = []
+    for i in range(len(boundaries) - 1):
+        if i == 0:
+            hints.append("left_of_vsplit")
+        elif i == len(boundaries) - 2:
+            hints.append("right_of_vsplit")
+        else:
+            hints.append("middle_of_vsplit")
+
+    sub_zones = []
+    for i in range(len(boundaries) - 1):
+        x_start = int(boundaries[i])
+        x_end = int(boundaries[i + 1])
+        sub = PageZone(
+            index=0,  # re-indexed later
+            zone_type=zone.zone_type,
+            y=zone.y,
+            height=zone.height,
+            x=x_start,
+            width=x_end - x_start,
+            box=zone.box,
+            image_overlays=zone.image_overlays,
+            layout_hint=hints[i],
+            vsplit_group=vsplit_group_id,
+        )
+        sub_zones.append(sub)
+
+    return sub_zones
+
+
 def _merge_content_zones_across_boxes(
     zones: List,
     content_x: int,
@@ -1404,11 +1506,49 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
                     page_zones, content_x, content_w
                 )
 
+                # 3b. Detect vertical dividers and split content zones
+                vsplit_group_counter = 0
+                expanded_zones: List = []
+                for pz in page_zones:
+                    if pz.zone_type != "content":
+                        expanded_zones.append(pz)
+                        continue
+                    zone_words = _words_in_zone(
+                        all_words, pz.y, pz.height, pz.x, pz.width
+                    )
+                    divider_xs = _detect_vertical_dividers(
+                        zone_words, pz.x, pz.width, pz.y, pz.height
+                    )
+                    if divider_xs:
+                        sub_zones = _split_zone_at_vertical_dividers(
+                            pz, divider_xs, vsplit_group_counter
+                        )
+                        expanded_zones.extend(sub_zones)
+                        vsplit_group_counter += 1
+                        # Remove pipe words so they don't appear in sub-zones
+                        pipe_ids = set(
+                            id(w) for w in zone_words
+                            if _PIPE_RE_VSPLIT.match((w.get("text") or "").strip())
+                        )
+                        all_words[:] = [w for w in all_words if id(w) not in pipe_ids]
+                        logger.info(
+                            "build-grid: vertical split zone %d at x=%s → %d sub-zones",
+                            pz.index, [int(x) for x in divider_xs], len(sub_zones),
+                        )
+                    else:
+                        expanded_zones.append(pz)
+                # Re-index zones
+                for i, pz in enumerate(expanded_zones):
+                    pz.index = i
+                page_zones = expanded_zones
+
                 # --- Union columns from all content zones ---
                 # Each content zone detects columns independently.  Narrow
                 # columns (page refs, markers) may appear in only one zone.
                 # Merge column split-points from ALL content zones so every
                 # zone shares the full column set.
+                # NOTE: Zones from a vertical split are independent and must
+                # NOT share columns with each other.
 
                 # First pass: build grids per zone independently
                 zone_grids: List[Dict] = []
@@ -1459,8 +1599,11 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
                     zone_grids.append({"pz": pz, "words": zone_words, "grid": grid})
 
                 # Second pass: merge column boundaries from all content zones
+                # Exclude zones from vertical splits — they have independent columns.
                 content_zones = [
-                    zg for zg in zone_grids if zg["pz"].zone_type == "content"
+                    zg for zg in zone_grids
+                    if zg["pz"].zone_type == "content"
+                    and zg["pz"].vsplit_group is None
                 ]
                 if len(content_zones) > 1:
                     # Collect column split points (x_min of non-first columns)
@@ -1564,6 +1707,11 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
                     if pz.image_overlays:
                         zone_entry["image_overlays"] = pz.image_overlays
 
+                    if pz.layout_hint:
+                        zone_entry["layout_hint"] = pz.layout_hint
+                    if pz.vsplit_group is not None:
+                        zone_entry["vsplit_group"] = pz.vsplit_group
+
                     zones_data.append(zone_entry)
 
     # 4. Fallback: no boxes detected → single zone with all words
@@ -1719,8 +1867,11 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
     # OCR reads physical vertical divider lines as "|" or "||" characters.
     # These sit at consistent x positions near column boundaries and pollute
     # cell text.  Remove them from word_boxes and rebuild cell text.
+    # NOTE: Zones from a vertical split already had pipes removed in step 3b.
     _PIPE_RE = re.compile(r"^\|+$")
     for z in zones_data:
+        if z.get("vsplit_group") is not None:
+            continue  # pipes already removed before split
         removed_pipes = 0
         for cell in z.get("cells", []):
             wbs = cell.get("word_boxes") or []