feat: improved OCR pipeline session manager with categories, thumbnails, pipeline logging
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m48s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 20s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m48s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 20s
- Add document_category (10 types) and pipeline_log JSONB columns - Session list: thumbnails, copyable IDs, category/doc_type badges - Inline category dropdown, bulk delete, pipeline step logging - New endpoints: thumbnail, delete-all, pipeline-log, categories - Cleared all 22 old test sessions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -66,7 +66,9 @@ async def init_ocr_pipeline_tables():
|
||||
ADD COLUMN IF NOT EXISTS clean_png BYTEA,
|
||||
ADD COLUMN IF NOT EXISTS handwriting_removal_meta JSONB,
|
||||
ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS doc_type_result JSONB
|
||||
ADD COLUMN IF NOT EXISTS doc_type_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS document_category VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS pipeline_log JSONB
|
||||
""")
|
||||
|
||||
|
||||
@@ -91,6 +93,7 @@ async def create_session_db(
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
created_at, updated_at
|
||||
""", uuid.UUID(session_id), name, filename, original_png)
|
||||
|
||||
@@ -106,6 +109,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions WHERE id = $1
|
||||
""", uuid.UUID(session_id))
|
||||
@@ -151,9 +155,10 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
'deskew_result', 'dewarp_result', 'column_result', 'row_result',
|
||||
'word_result', 'ground_truth', 'auto_shear_degrees',
|
||||
'doc_type', 'doc_type_result',
|
||||
'document_category', 'pipeline_log',
|
||||
}
|
||||
|
||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result'}
|
||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if key in allowed_fields:
|
||||
@@ -180,6 +185,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
created_at, updated_at
|
||||
""", *values)
|
||||
|
||||
@@ -194,6 +200,7 @@ async def list_sessions_db(limit: int = 50) -> List[Dict[str, Any]]:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, filename, status, current_step,
|
||||
document_category, doc_type,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions
|
||||
ORDER BY created_at DESC
|
||||
@@ -213,6 +220,18 @@ async def delete_session_db(session_id: str) -> bool:
|
||||
return result == "DELETE 1"
|
||||
|
||||
|
||||
async def delete_all_sessions_db() -> int:
|
||||
"""Delete all sessions. Returns number of deleted rows."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute("DELETE FROM ocr_pipeline_sessions")
|
||||
# result is e.g. "DELETE 5"
|
||||
try:
|
||||
return int(result.split()[-1])
|
||||
except (ValueError, IndexError):
|
||||
return 0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HELPER
|
||||
# =============================================================================
|
||||
@@ -235,7 +254,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||
result[key] = result[key].isoformat()
|
||||
|
||||
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result']:
|
||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
|
||||
if key in result and result[key] is not None:
|
||||
if isinstance(result[key], str):
|
||||
result[key] = json.loads(result[key])
|
||||
|
||||
Reference in New Issue
Block a user