Extract page number as metadata instead of silently removing it
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 36s
CI / test-python-klausur (push) Failing after 2m9s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 21s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 36s
CI / test-python-klausur (push) Failing after 2m9s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 21s
_filter_footer_words now returns page number info (text, y_pct, number) instead of just removing footer words. The page number is included in the grid result as `page_number` and displayed in the frontend summary bar as "S. 233". This preserves page numbers for later page concatenation in the customer frontend while still removing them from the grid content. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -174,6 +174,11 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
||||
Woerterbuch ({Math.round(grid.dictionary_detection.confidence * 100)}%)
|
||||
</span>
|
||||
)}
|
||||
{grid.page_number?.text && (
|
||||
<span className="px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 border border-gray-200 dark:border-gray-600">
|
||||
S. {grid.page_number.text}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{grid.duration_seconds.toFixed(1)}s
|
||||
</span>
|
||||
|
||||
@@ -20,6 +20,13 @@ export interface DictionaryDetection {
|
||||
headword_col_index: number | null
|
||||
}
|
||||
|
||||
/** Page number extracted from footer region of the scan. */
|
||||
export interface PageNumber {
|
||||
text: string
|
||||
y_pct: number
|
||||
number?: number
|
||||
}
|
||||
|
||||
/** A complete structured grid with zones, ready for the Excel-like editor. */
|
||||
export interface StructuredGrid {
|
||||
session_id: string
|
||||
@@ -31,6 +38,7 @@ export interface StructuredGrid {
|
||||
formatting: GridFormatting
|
||||
layout_metrics?: LayoutMetrics
|
||||
dictionary_detection?: DictionaryDetection
|
||||
page_number?: PageNumber | null
|
||||
duration_seconds: number
|
||||
edited?: boolean
|
||||
layout_dividers?: LayoutDividers
|
||||
|
||||
Reference in New Issue
Block a user