feat: Orientierung + Zuschneiden als Schritte 1-2 in OCR-Pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s
Zwei neue Wizard-Schritte vor Begradigung: - Step 1: Orientierungserkennung (0/90/180/270° via Tesseract OSD) - Step 2: Seitenrand-Erkennung und Zuschnitt (Scannerraender entfernen) Backend: - orientation_crop_api.py: POST /orientation, POST /crop, POST /crop/skip - page_crop.py: detect_and_crop_page() mit Format-Erkennung (A4/A5/Letter) - Session-Store: orientation_result, crop_result Felder - Pipeline nutzt zugeschnittenes Bild fuer Deskew/Dewarp Frontend: - StepOrientation.tsx: Upload + Auto-Orientierung + Vorher/Nachher - StepCrop.tsx: Auto-Crop + Format-Badge + Ueberspringen-Option - Pipeline-Stepper: 10 Schritte (war 8) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -68,7 +68,11 @@ async def init_ocr_pipeline_tables():
|
||||
ADD COLUMN IF NOT EXISTS doc_type VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS doc_type_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS document_category VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS pipeline_log JSONB
|
||||
ADD COLUMN IF NOT EXISTS pipeline_log JSONB,
|
||||
ADD COLUMN IF NOT EXISTS oriented_png BYTEA,
|
||||
ADD COLUMN IF NOT EXISTS cropped_png BYTEA,
|
||||
ADD COLUMN IF NOT EXISTS orientation_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS crop_result JSONB
|
||||
""")
|
||||
|
||||
|
||||
@@ -90,6 +94,7 @@ async def create_session_db(
|
||||
id, name, filename, original_png, status, current_step
|
||||
) VALUES ($1, $2, $3, $4, 'active', 1)
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
orientation_result, crop_result,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
@@ -106,6 +111,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT id, name, filename, status, current_step,
|
||||
orientation_result, crop_result,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
@@ -123,6 +129,8 @@ async def get_session_image(session_id: str, image_type: str) -> Optional[bytes]
|
||||
"""Load a single image (BYTEA) from the session."""
|
||||
column_map = {
|
||||
"original": "original_png",
|
||||
"oriented": "oriented_png",
|
||||
"cropped": "cropped_png",
|
||||
"deskewed": "deskewed_png",
|
||||
"binarized": "binarized_png",
|
||||
"dewarped": "dewarped_png",
|
||||
@@ -150,15 +158,17 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
|
||||
allowed_fields = {
|
||||
'name', 'filename', 'status', 'current_step',
|
||||
'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
|
||||
'original_png', 'oriented_png', 'cropped_png',
|
||||
'deskewed_png', 'binarized_png', 'dewarped_png',
|
||||
'clean_png', 'handwriting_removal_meta',
|
||||
'orientation_result', 'crop_result',
|
||||
'deskew_result', 'dewarp_result', 'column_result', 'row_result',
|
||||
'word_result', 'ground_truth', 'auto_shear_degrees',
|
||||
'doc_type', 'doc_type_result',
|
||||
'document_category', 'pipeline_log',
|
||||
}
|
||||
|
||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
|
||||
jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if key in allowed_fields:
|
||||
@@ -182,6 +192,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
SET {', '.join(fields)}
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
orientation_result, crop_result,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
@@ -254,7 +265,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||
result[key] = result[key].isoformat()
|
||||
|
||||
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
|
||||
for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
|
||||
if key in result and result[key] is not None:
|
||||
if isinstance(result[key], str):
|
||||
result[key] = json.loads(result[key])
|
||||
|
||||
Reference in New Issue
Block a user