Flexible inhaltsbasierte Spaltenerkennung (2-Phasen)
Ersetzt hardcodierte Positionsregeln durch ein zweistufiges System: Phase A erkennt Spaltengeometrie (Clustering), Phase B klassifiziert Typen per Inhalt (Sprache/Rolle) mit 3-stufiger Fallback-Kette. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -648,8 +648,16 @@ async def detect_columns(session_id: str):
|
||||
duration = time.time() - t0
|
||||
|
||||
columns = [asdict(r) for r in regions]
|
||||
|
||||
# Determine classification methods used
|
||||
methods = list(set(
|
||||
c.get("classification_method", "") for c in columns
|
||||
if c.get("classification_method")
|
||||
))
|
||||
|
||||
column_result = {
|
||||
"columns": columns,
|
||||
"classification_methods": methods,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
@@ -742,6 +750,7 @@ async def _get_columns_overlay(session_id: str) -> Response:
|
||||
"column_en": (255, 180, 0), # Blue
|
||||
"column_de": (0, 200, 0), # Green
|
||||
"column_example": (0, 140, 255), # Orange
|
||||
"column_text": (200, 200, 0), # Cyan/Turquoise
|
||||
"page_ref": (200, 0, 200), # Purple
|
||||
"column_marker": (0, 0, 220), # Red
|
||||
"header": (128, 128, 128), # Gray
|
||||
@@ -760,8 +769,11 @@ async def _get_columns_overlay(session_id: str) -> Response:
|
||||
# Solid border
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
||||
|
||||
# Label
|
||||
# Label with confidence
|
||||
label = col.get("type", "unknown").replace("column_", "").upper()
|
||||
conf = col.get("classification_confidence")
|
||||
if conf is not None and conf < 1.0:
|
||||
label = f"{label} {int(conf * 100)}%"
|
||||
cv2.putText(img, label, (x + 10, y + 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user