feat(ocr): Add CV Document Reconstruction Pipeline for vocabulary extraction
New OCR method using classical Computer Vision: high-res rendering (432 DPI), deskew, dewarp, binarization, projection-profile layout analysis, multi-pass Tesseract OCR with region-specific PSM, and Y-coordinate line alignment. Includes bugfix for convert_pdf_to_image call (line 869) and 39 unit tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -93,6 +93,15 @@ const OCR_METHODS = {
|
|||||||
description: 'ARM64-nativ, Standard',
|
description: 'ARM64-nativ, Standard',
|
||||||
enabled: true,
|
enabled: true,
|
||||||
},
|
},
|
||||||
|
cv_pipeline: {
|
||||||
|
id: 'cv_pipeline',
|
||||||
|
name: 'Loesung E: Document Reconstruction',
|
||||||
|
shortName: 'E: Doc Recon',
|
||||||
|
model: 'opencv + tesseract (multi-pass)',
|
||||||
|
color: 'green',
|
||||||
|
description: 'CV-Pipeline: Deskew, Dewarp, Binarisierung, Multi-Pass OCR',
|
||||||
|
enabled: true,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function OCRComparePage() {
|
export default function OCRComparePage() {
|
||||||
@@ -115,7 +124,7 @@ export default function OCRComparePage() {
|
|||||||
const [uploading, setUploading] = useState(false)
|
const [uploading, setUploading] = useState(false)
|
||||||
|
|
||||||
// Method Selection
|
// Method Selection
|
||||||
const [selectedMethods, setSelectedMethods] = useState<string[]>(['vision_llm', 'tesseract'])
|
const [selectedMethods, setSelectedMethods] = useState<string[]>(['vision_llm', 'tesseract', 'cv_pipeline'])
|
||||||
|
|
||||||
// QR Upload State
|
// QR Upload State
|
||||||
const [showQRModal, setShowQRModal] = useState(false)
|
const [showQRModal, setShowQRModal] = useState(false)
|
||||||
@@ -133,6 +142,9 @@ export default function OCRComparePage() {
|
|||||||
const [showGridOverlay, setShowGridOverlay] = useState(true)
|
const [showGridOverlay, setShowGridOverlay] = useState(true)
|
||||||
const [selectedCell, setSelectedCell] = useState<GridCell | null>(null)
|
const [selectedCell, setSelectedCell] = useState<GridCell | null>(null)
|
||||||
const [showCellDialog, setShowCellDialog] = useState(false)
|
const [showCellDialog, setShowCellDialog] = useState(false)
|
||||||
|
const [showMmGrid, setShowMmGrid] = useState(false)
|
||||||
|
const [showTextAtPosition, setShowTextAtPosition] = useState(false)
|
||||||
|
const [editableText, setEditableText] = useState(false)
|
||||||
|
|
||||||
// Block Review State
|
// Block Review State
|
||||||
const [blockReviewMode, setBlockReviewMode] = useState(false)
|
const [blockReviewMode, setBlockReviewMode] = useState(false)
|
||||||
@@ -651,6 +663,7 @@ export default function OCRComparePage() {
|
|||||||
blue: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' },
|
blue: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' },
|
||||||
red: { bg: 'bg-red-50', border: 'border-red-300', text: 'text-red-700' },
|
red: { bg: 'bg-red-50', border: 'border-red-300', text: 'text-red-700' },
|
||||||
purple: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' },
|
purple: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' },
|
||||||
|
green: { bg: 'bg-green-50', border: 'border-green-300', text: 'text-green-700' },
|
||||||
}
|
}
|
||||||
return colors[color]?.[type] || colors.slate[type]
|
return colors[color]?.[type] || colors.slate[type]
|
||||||
}
|
}
|
||||||
@@ -839,7 +852,7 @@ export default function OCRComparePage() {
|
|||||||
{/* Grid Analysis Button */}
|
{/* Grid Analysis Button */}
|
||||||
<button
|
<button
|
||||||
onClick={analyzeGrid}
|
onClick={analyzeGrid}
|
||||||
disabled={analyzingGrid}
|
disabled={analyzingGrid || !sessionId || !result}
|
||||||
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed text-sm"
|
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed text-sm"
|
||||||
>
|
>
|
||||||
{analyzingGrid ? (
|
{analyzingGrid ? (
|
||||||
@@ -874,6 +887,41 @@ export default function OCRComparePage() {
|
|||||||
<span className="text-slate-700">Grid-Overlay anzeigen</span>
|
<span className="text-slate-700">Grid-Overlay anzeigen</span>
|
||||||
</label>
|
</label>
|
||||||
|
|
||||||
|
<label className="flex items-center gap-2 text-sm cursor-pointer">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={showMmGrid}
|
||||||
|
onChange={(e) => setShowMmGrid(e.target.checked)}
|
||||||
|
className="rounded"
|
||||||
|
/>
|
||||||
|
<span className="text-slate-700">1mm Raster anzeigen</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label className="flex items-center gap-2 text-sm cursor-pointer">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={showTextAtPosition}
|
||||||
|
onChange={(e) => {
|
||||||
|
setShowTextAtPosition(e.target.checked)
|
||||||
|
if (!e.target.checked) setEditableText(false)
|
||||||
|
}}
|
||||||
|
className="rounded"
|
||||||
|
/>
|
||||||
|
<span className="text-slate-700">Text an Originalposition</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
{showTextAtPosition && (
|
||||||
|
<label className="flex items-center gap-2 text-sm cursor-pointer ml-5">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={editableText}
|
||||||
|
onChange={(e) => setEditableText(e.target.checked)}
|
||||||
|
className="rounded"
|
||||||
|
/>
|
||||||
|
<span className="text-slate-700">Text bearbeitbar</span>
|
||||||
|
</label>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Block Review Button */}
|
{/* Block Review Button */}
|
||||||
{result && nonEmptyBlockCount > 0 && (
|
{result && nonEmptyBlockCount > 0 && (
|
||||||
<button
|
<button
|
||||||
@@ -1122,7 +1170,20 @@ export default function OCRComparePage() {
|
|||||||
selectedCell={selectedCell}
|
selectedCell={selectedCell}
|
||||||
showEmpty={false}
|
showEmpty={false}
|
||||||
showNumbers={blockReviewMode}
|
showNumbers={blockReviewMode}
|
||||||
showTextLabels={true}
|
showTextLabels={!showTextAtPosition}
|
||||||
|
showMmGrid={showMmGrid}
|
||||||
|
showTextAtPosition={showTextAtPosition}
|
||||||
|
editableText={editableText}
|
||||||
|
onCellTextChange={(cell, newText) => {
|
||||||
|
if (!gridData) return
|
||||||
|
const newCells = gridData.cells.map(row =>
|
||||||
|
row.map(c => c.row === cell.row && c.col === cell.col
|
||||||
|
? { ...c, text: newText, status: 'manual' as const }
|
||||||
|
: c
|
||||||
|
)
|
||||||
|
)
|
||||||
|
setGridData({ ...gridData, cells: newCells })
|
||||||
|
}}
|
||||||
highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null}
|
highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null}
|
||||||
className={`rounded-lg border border-slate-200 overflow-hidden ${isFullscreen ? 'max-h-[80vh] mx-auto' : 'w-full max-w-2xl mx-auto'}`}
|
className={`rounded-lg border border-slate-200 overflow-hidden ${isFullscreen ? 'max-h-[80vh] mx-auto' : 'w-full max-w-2xl mx-auto'}`}
|
||||||
/>
|
/>
|
||||||
@@ -1229,7 +1290,20 @@ export default function OCRComparePage() {
|
|||||||
selectedCell={selectedCell}
|
selectedCell={selectedCell}
|
||||||
showEmpty={false}
|
showEmpty={false}
|
||||||
showNumbers={blockReviewMode}
|
showNumbers={blockReviewMode}
|
||||||
showTextLabels={!blockReviewMode}
|
showTextLabels={!blockReviewMode && !showTextAtPosition}
|
||||||
|
showMmGrid={showMmGrid}
|
||||||
|
showTextAtPosition={showTextAtPosition}
|
||||||
|
editableText={editableText}
|
||||||
|
onCellTextChange={(cell, newText) => {
|
||||||
|
if (!gridData) return
|
||||||
|
const newCells = gridData.cells.map(row =>
|
||||||
|
row.map(c => c.row === cell.row && c.col === cell.col
|
||||||
|
? { ...c, text: newText, status: 'manual' as const }
|
||||||
|
: c
|
||||||
|
)
|
||||||
|
)
|
||||||
|
setGridData({ ...gridData, cells: newCells })
|
||||||
|
}}
|
||||||
highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null}
|
highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null}
|
||||||
className="rounded-lg border border-slate-200 overflow-hidden"
|
className="rounded-lg border border-slate-200 overflow-hidden"
|
||||||
/>
|
/>
|
||||||
|
|||||||
1019
klausur-service/backend/cv_vocab_pipeline.py
Normal file
1019
klausur-service/backend/cv_vocab_pipeline.py
Normal file
File diff suppressed because it is too large
Load Diff
569
klausur-service/backend/tests/test_cv_vocab_pipeline.py
Normal file
569
klausur-service/backend/tests/test_cv_vocab_pipeline.py
Normal file
@@ -0,0 +1,569 @@
|
|||||||
|
"""
|
||||||
|
Unit Tests for CV Vocab Pipeline (cv_vocab_pipeline.py)
|
||||||
|
|
||||||
|
Tests cover:
|
||||||
|
- Data classes (PageRegion, VocabRow, PipelineResult)
|
||||||
|
- Stage 2: Deskew image
|
||||||
|
- Stage 3: Dewarp (pass-through)
|
||||||
|
- Stage 4: Image preparation (OCR + Layout images)
|
||||||
|
- Stage 5: Layout analysis (content bounds, projection profiles, column detection)
|
||||||
|
- Stage 6: Multi-pass OCR region handling
|
||||||
|
- Stage 7: Line grouping and vocabulary matching
|
||||||
|
- Orchestrator (run_cv_pipeline)
|
||||||
|
|
||||||
|
DSGVO Note: All tests run locally with synthetic data. No external API calls.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
|
||||||
|
from dataclasses import asdict
|
||||||
|
|
||||||
|
# Import module under test
|
||||||
|
from cv_vocab_pipeline import (
|
||||||
|
PageRegion,
|
||||||
|
VocabRow,
|
||||||
|
PipelineResult,
|
||||||
|
deskew_image,
|
||||||
|
dewarp_image,
|
||||||
|
create_ocr_image,
|
||||||
|
create_layout_image,
|
||||||
|
_find_content_bounds,
|
||||||
|
analyze_layout,
|
||||||
|
_group_words_into_lines,
|
||||||
|
match_lines_to_vocab,
|
||||||
|
run_cv_pipeline,
|
||||||
|
CV2_AVAILABLE,
|
||||||
|
TESSERACT_AVAILABLE,
|
||||||
|
CV_PIPELINE_AVAILABLE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# FIXTURES
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def white_image():
|
||||||
|
"""Create a simple 300x200 white BGR image."""
|
||||||
|
return np.ones((200, 300, 3), dtype=np.uint8) * 255
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def text_like_image():
|
||||||
|
"""Create a 600x400 image with dark text-like regions simulating 3 columns."""
|
||||||
|
img = np.ones((400, 600, 3), dtype=np.uint8) * 255
|
||||||
|
|
||||||
|
# Column 1 (EN): x=20..170
|
||||||
|
for y in range(50, 350, 30):
|
||||||
|
img[y:y+15, 30:160, :] = 30 # Dark text lines
|
||||||
|
|
||||||
|
# Gap between col1 and col2: x=170..210 (white)
|
||||||
|
|
||||||
|
# Column 2 (DE): x=210..370
|
||||||
|
for y in range(50, 350, 30):
|
||||||
|
img[y:y+15, 220:360, :] = 30
|
||||||
|
|
||||||
|
# Gap between col2 and col3: x=370..410 (white)
|
||||||
|
|
||||||
|
# Column 3 (Example): x=410..580
|
||||||
|
for y in range(50, 350, 30):
|
||||||
|
img[y:y+15, 420:570, :] = 30
|
||||||
|
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def binary_image():
|
||||||
|
"""Create a binary (single-channel) image for OCR tests."""
|
||||||
|
# White background (255) with some black text-like areas
|
||||||
|
img = np.ones((400, 600), dtype=np.uint8) * 255
|
||||||
|
# Add text-like dark bands
|
||||||
|
for y in range(50, 350, 30):
|
||||||
|
img[y:y+15, 30:570] = 0
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_words_column_en():
|
||||||
|
"""Sample OCR word dicts for English column."""
|
||||||
|
return [
|
||||||
|
{'text': 'achieve', 'left': 30, 'top': 50, 'width': 80, 'height': 15, 'conf': 90, 'region_type': 'column_en'},
|
||||||
|
{'text': 'improve', 'left': 30, 'top': 80, 'width': 80, 'height': 15, 'conf': 85, 'region_type': 'column_en'},
|
||||||
|
{'text': 'success', 'left': 30, 'top': 110, 'width': 80, 'height': 15, 'conf': 92, 'region_type': 'column_en'},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_words_column_de():
|
||||||
|
"""Sample OCR word dicts for German column."""
|
||||||
|
return [
|
||||||
|
{'text': 'erreichen', 'left': 220, 'top': 52, 'width': 100, 'height': 15, 'conf': 88, 'region_type': 'column_de'},
|
||||||
|
{'text': 'verbessern', 'left': 220, 'top': 82, 'width': 100, 'height': 15, 'conf': 80, 'region_type': 'column_de'},
|
||||||
|
{'text': 'Erfolg', 'left': 220, 'top': 112, 'width': 100, 'height': 15, 'conf': 95, 'region_type': 'column_de'},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_words_column_ex():
|
||||||
|
"""Sample OCR word dicts for Example column."""
|
||||||
|
return [
|
||||||
|
{'text': 'She', 'left': 420, 'top': 50, 'width': 30, 'height': 15, 'conf': 85, 'region_type': 'column_example'},
|
||||||
|
{'text': 'achieved', 'left': 455, 'top': 50, 'width': 70, 'height': 15, 'conf': 80, 'region_type': 'column_example'},
|
||||||
|
{'text': 'her', 'left': 530, 'top': 50, 'width': 30, 'height': 15, 'conf': 90, 'region_type': 'column_example'},
|
||||||
|
{'text': 'goals.', 'left': 420, 'top': 52, 'width': 50, 'height': 15, 'conf': 75, 'region_type': 'column_example'},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_regions():
|
||||||
|
"""Sample 3-column PageRegion layout."""
|
||||||
|
return [
|
||||||
|
PageRegion(type='column_en', x=0, y=50, width=190, height=300),
|
||||||
|
PageRegion(type='column_de', x=210, y=50, width=160, height=300),
|
||||||
|
PageRegion(type='column_example', x=410, y=50, width=190, height=300),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# DATA CLASS TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
class TestDataClasses:
|
||||||
|
"""Test data classes for correct defaults and fields."""
|
||||||
|
|
||||||
|
def test_page_region_creation(self):
|
||||||
|
region = PageRegion(type='column_en', x=10, y=20, width=100, height=200)
|
||||||
|
assert region.type == 'column_en'
|
||||||
|
assert region.x == 10
|
||||||
|
assert region.y == 20
|
||||||
|
assert region.width == 100
|
||||||
|
assert region.height == 200
|
||||||
|
|
||||||
|
def test_vocab_row_defaults(self):
|
||||||
|
row = VocabRow()
|
||||||
|
assert row.english == ""
|
||||||
|
assert row.german == ""
|
||||||
|
assert row.example == ""
|
||||||
|
assert row.confidence == 0.0
|
||||||
|
assert row.y_position == 0
|
||||||
|
|
||||||
|
def test_vocab_row_with_values(self):
|
||||||
|
row = VocabRow(english="test", german="Test", example="A test.", confidence=85.5, y_position=100)
|
||||||
|
assert row.english == "test"
|
||||||
|
assert row.german == "Test"
|
||||||
|
assert row.confidence == 85.5
|
||||||
|
|
||||||
|
def test_pipeline_result_defaults(self):
|
||||||
|
result = PipelineResult()
|
||||||
|
assert result.vocabulary == []
|
||||||
|
assert result.word_count == 0
|
||||||
|
assert result.columns_detected == 0
|
||||||
|
assert result.duration_seconds == 0.0
|
||||||
|
assert result.stages == {}
|
||||||
|
assert result.error is None
|
||||||
|
|
||||||
|
def test_pipeline_result_error(self):
|
||||||
|
result = PipelineResult(error="Something went wrong")
|
||||||
|
assert result.error == "Something went wrong"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# STAGE 2: DESKEW TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
class TestDeskew:
|
||||||
|
"""Test deskew (rotation correction) stage."""
|
||||||
|
|
||||||
|
def test_deskew_straight_image(self, white_image):
|
||||||
|
"""A perfectly straight image should not be rotated."""
|
||||||
|
corrected, angle = deskew_image(white_image)
|
||||||
|
assert abs(angle) < 0.1
|
||||||
|
assert corrected.shape == white_image.shape
|
||||||
|
|
||||||
|
def test_deskew_returns_tuple(self, white_image):
|
||||||
|
"""deskew_image must return (image, angle) tuple."""
|
||||||
|
result = deskew_image(white_image)
|
||||||
|
assert isinstance(result, tuple)
|
||||||
|
assert len(result) == 2
|
||||||
|
assert isinstance(result[0], np.ndarray)
|
||||||
|
assert isinstance(result[1], float)
|
||||||
|
|
||||||
|
def test_deskew_preserves_shape(self, text_like_image):
|
||||||
|
"""Output image should have same shape as input."""
|
||||||
|
corrected, _ = deskew_image(text_like_image)
|
||||||
|
assert corrected.shape == text_like_image.shape
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# STAGE 3: DEWARP TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
class TestDewarp:
|
||||||
|
"""Test dewarp (pass-through) stage."""
|
||||||
|
|
||||||
|
def test_dewarp_passthrough(self, white_image):
|
||||||
|
"""Current dewarp should return the same image (pass-through)."""
|
||||||
|
result = dewarp_image(white_image)
|
||||||
|
np.testing.assert_array_equal(result, white_image)
|
||||||
|
|
||||||
|
def test_dewarp_preserves_shape(self, text_like_image):
|
||||||
|
result = dewarp_image(text_like_image)
|
||||||
|
assert result.shape == text_like_image.shape
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# STAGE 4: IMAGE PREPARATION TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
class TestImagePreparation:
|
||||||
|
"""Test OCR and layout image creation."""
|
||||||
|
|
||||||
|
def test_create_ocr_image_returns_grayscale(self, text_like_image):
|
||||||
|
"""OCR image should be single-channel (binarized)."""
|
||||||
|
ocr_img = create_ocr_image(text_like_image)
|
||||||
|
assert len(ocr_img.shape) == 2 # Single channel
|
||||||
|
assert ocr_img.dtype == np.uint8
|
||||||
|
|
||||||
|
def test_create_ocr_image_is_binary(self, text_like_image):
|
||||||
|
"""OCR image should contain only 0 and 255 values."""
|
||||||
|
ocr_img = create_ocr_image(text_like_image)
|
||||||
|
unique_vals = np.unique(ocr_img)
|
||||||
|
assert all(v in [0, 255] for v in unique_vals)
|
||||||
|
|
||||||
|
def test_create_layout_image_returns_grayscale(self, text_like_image):
|
||||||
|
"""Layout image should be single-channel (CLAHE enhanced)."""
|
||||||
|
layout_img = create_layout_image(text_like_image)
|
||||||
|
assert len(layout_img.shape) == 2
|
||||||
|
assert layout_img.dtype == np.uint8
|
||||||
|
|
||||||
|
def test_create_layout_image_enhanced_contrast(self, text_like_image):
|
||||||
|
"""Layout image should have different histogram than simple grayscale."""
|
||||||
|
import cv2
|
||||||
|
gray = cv2.cvtColor(text_like_image, cv2.COLOR_BGR2GRAY)
|
||||||
|
layout_img = create_layout_image(text_like_image)
|
||||||
|
# CLAHE should change the histogram
|
||||||
|
assert layout_img.shape == gray.shape
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# STAGE 5: LAYOUT ANALYSIS TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
class TestContentBounds:
|
||||||
|
"""Test _find_content_bounds helper."""
|
||||||
|
|
||||||
|
def test_empty_image(self):
|
||||||
|
"""Fully white (inverted = black) image should return full bounds."""
|
||||||
|
inv = np.zeros((200, 300), dtype=np.uint8)
|
||||||
|
left, right, top, bottom = _find_content_bounds(inv)
|
||||||
|
# With no content, bounds should span the image
|
||||||
|
assert left >= 0
|
||||||
|
assert right <= 300
|
||||||
|
assert top >= 0
|
||||||
|
assert bottom <= 200
|
||||||
|
|
||||||
|
def test_centered_content(self):
|
||||||
|
"""Content in center should give tight bounds."""
|
||||||
|
inv = np.zeros((400, 600), dtype=np.uint8)
|
||||||
|
# Add content block in center
|
||||||
|
inv[100:300, 50:550] = 255
|
||||||
|
left, right, top, bottom = _find_content_bounds(inv)
|
||||||
|
assert left <= 52 # ~50 with 2px margin
|
||||||
|
assert right >= 548 # ~550 with 2px margin
|
||||||
|
assert top <= 102
|
||||||
|
assert bottom >= 298
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
class TestLayoutAnalysis:
|
||||||
|
"""Test analyze_layout for column detection."""
|
||||||
|
|
||||||
|
def test_returns_list_of_regions(self, text_like_image):
|
||||||
|
"""analyze_layout should return a list of PageRegion."""
|
||||||
|
ocr_img = create_ocr_image(text_like_image)
|
||||||
|
layout_img = create_layout_image(text_like_image)
|
||||||
|
regions = analyze_layout(layout_img, ocr_img)
|
||||||
|
assert isinstance(regions, list)
|
||||||
|
assert all(isinstance(r, PageRegion) for r in regions)
|
||||||
|
|
||||||
|
def test_detects_columns(self, text_like_image):
|
||||||
|
"""With clear 3-column image, should detect at least 1 column."""
|
||||||
|
ocr_img = create_ocr_image(text_like_image)
|
||||||
|
layout_img = create_layout_image(text_like_image)
|
||||||
|
regions = analyze_layout(layout_img, ocr_img)
|
||||||
|
column_regions = [r for r in regions if r.type.startswith('column')]
|
||||||
|
assert len(column_regions) >= 1
|
||||||
|
|
||||||
|
def test_single_column_fallback(self):
|
||||||
|
"""Image with no clear columns should fall back to single column."""
|
||||||
|
# Uniform text across full width
|
||||||
|
img = np.ones((400, 600, 3), dtype=np.uint8) * 255
|
||||||
|
for y in range(50, 350, 20):
|
||||||
|
img[y:y+10, 20:580, :] = 30 # Full-width text
|
||||||
|
ocr_img = create_ocr_image(img)
|
||||||
|
layout_img = create_layout_image(img)
|
||||||
|
regions = analyze_layout(layout_img, ocr_img)
|
||||||
|
column_regions = [r for r in regions if r.type.startswith('column')]
|
||||||
|
# Should at least return 1 column (full page fallback)
|
||||||
|
assert len(column_regions) >= 1
|
||||||
|
|
||||||
|
def test_region_types_are_valid(self, text_like_image):
|
||||||
|
"""All region types should be from the expected set."""
|
||||||
|
ocr_img = create_ocr_image(text_like_image)
|
||||||
|
layout_img = create_layout_image(text_like_image)
|
||||||
|
regions = analyze_layout(layout_img, ocr_img)
|
||||||
|
valid_types = {'column_en', 'column_de', 'column_example', 'header', 'footer'}
|
||||||
|
for r in regions:
|
||||||
|
assert r.type in valid_types, f"Unexpected region type: {r.type}"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# STAGE 7: LINE GROUPING TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
class TestLineGrouping:
|
||||||
|
"""Test _group_words_into_lines function."""
|
||||||
|
|
||||||
|
def test_empty_input(self):
|
||||||
|
"""Empty word list should return empty lines."""
|
||||||
|
assert _group_words_into_lines([]) == []
|
||||||
|
|
||||||
|
def test_single_word(self):
|
||||||
|
"""Single word should return one line with one word."""
|
||||||
|
words = [{'text': 'hello', 'left': 10, 'top': 50, 'width': 50, 'height': 15, 'conf': 90}]
|
||||||
|
lines = _group_words_into_lines(words)
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert len(lines[0]) == 1
|
||||||
|
assert lines[0][0]['text'] == 'hello'
|
||||||
|
|
||||||
|
def test_words_on_same_line(self):
|
||||||
|
"""Words close in Y should be grouped into one line."""
|
||||||
|
words = [
|
||||||
|
{'text': 'hello', 'left': 10, 'top': 50, 'width': 50, 'height': 15, 'conf': 90},
|
||||||
|
{'text': 'world', 'left': 70, 'top': 52, 'width': 50, 'height': 15, 'conf': 85},
|
||||||
|
]
|
||||||
|
lines = _group_words_into_lines(words, y_tolerance_px=10)
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert len(lines[0]) == 2
|
||||||
|
|
||||||
|
def test_words_on_different_lines(self):
|
||||||
|
"""Words far apart in Y should be on different lines."""
|
||||||
|
words = [
|
||||||
|
{'text': 'line1', 'left': 10, 'top': 50, 'width': 50, 'height': 15, 'conf': 90},
|
||||||
|
{'text': 'line2', 'left': 10, 'top': 100, 'width': 50, 'height': 15, 'conf': 85},
|
||||||
|
{'text': 'line3', 'left': 10, 'top': 150, 'width': 50, 'height': 15, 'conf': 88},
|
||||||
|
]
|
||||||
|
lines = _group_words_into_lines(words, y_tolerance_px=20)
|
||||||
|
assert len(lines) == 3
|
||||||
|
|
||||||
|
def test_words_sorted_by_x_within_line(self):
|
||||||
|
"""Words within a line should be sorted by X position."""
|
||||||
|
words = [
|
||||||
|
{'text': 'world', 'left': 100, 'top': 50, 'width': 50, 'height': 15, 'conf': 85},
|
||||||
|
{'text': 'hello', 'left': 10, 'top': 52, 'width': 50, 'height': 15, 'conf': 90},
|
||||||
|
]
|
||||||
|
lines = _group_words_into_lines(words, y_tolerance_px=10)
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert lines[0][0]['text'] == 'hello'
|
||||||
|
assert lines[0][1]['text'] == 'world'
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# STAGE 7: VOCABULARY MATCHING TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
class TestVocabMatching:
|
||||||
|
"""Test match_lines_to_vocab function."""
|
||||||
|
|
||||||
|
def test_empty_results(self, sample_regions):
|
||||||
|
"""Empty OCR results should return empty vocab."""
|
||||||
|
vocab = match_lines_to_vocab({}, sample_regions)
|
||||||
|
assert vocab == []
|
||||||
|
|
||||||
|
def test_en_only(self, sample_words_column_en, sample_regions):
|
||||||
|
"""Only EN words should create entries with empty DE/example."""
|
||||||
|
ocr_results = {'column_en': sample_words_column_en}
|
||||||
|
vocab = match_lines_to_vocab(ocr_results, sample_regions)
|
||||||
|
assert len(vocab) == 3
|
||||||
|
for row in vocab:
|
||||||
|
assert row.english != ""
|
||||||
|
assert row.german == ""
|
||||||
|
|
||||||
|
def test_en_de_matching(self, sample_words_column_en, sample_words_column_de, sample_regions):
|
||||||
|
"""EN and DE words on same Y should be matched."""
|
||||||
|
ocr_results = {
|
||||||
|
'column_en': sample_words_column_en,
|
||||||
|
'column_de': sample_words_column_de,
|
||||||
|
}
|
||||||
|
vocab = match_lines_to_vocab(ocr_results, sample_regions, y_tolerance_px=25)
|
||||||
|
assert len(vocab) == 3
|
||||||
|
# First entry should match achieve <-> erreichen
|
||||||
|
assert vocab[0].english == 'achieve'
|
||||||
|
assert vocab[0].german == 'erreichen'
|
||||||
|
|
||||||
|
def test_full_3_column_matching(self, sample_words_column_en, sample_words_column_de,
|
||||||
|
sample_words_column_ex, sample_regions):
|
||||||
|
"""All 3 columns should be matched by Y coordinate."""
|
||||||
|
ocr_results = {
|
||||||
|
'column_en': sample_words_column_en,
|
||||||
|
'column_de': sample_words_column_de,
|
||||||
|
'column_example': sample_words_column_ex,
|
||||||
|
}
|
||||||
|
vocab = match_lines_to_vocab(ocr_results, sample_regions, y_tolerance_px=25)
|
||||||
|
assert len(vocab) >= 1
|
||||||
|
# First entry should have example text
|
||||||
|
assert vocab[0].english == 'achieve'
|
||||||
|
assert vocab[0].example != ""
|
||||||
|
|
||||||
|
def test_sorted_by_y_position(self, sample_words_column_en, sample_regions):
|
||||||
|
"""Result should be sorted by Y position."""
|
||||||
|
ocr_results = {'column_en': sample_words_column_en}
|
||||||
|
vocab = match_lines_to_vocab(ocr_results, sample_regions)
|
||||||
|
positions = [row.y_position for row in vocab]
|
||||||
|
assert positions == sorted(positions)
|
||||||
|
|
||||||
|
def test_skips_short_entries(self, sample_regions):
|
||||||
|
"""Very short text (< 2 chars) should be skipped."""
|
||||||
|
words = [
|
||||||
|
{'text': 'a', 'left': 30, 'top': 50, 'width': 10, 'height': 15, 'conf': 90, 'region_type': 'column_en'},
|
||||||
|
{'text': 'valid', 'left': 30, 'top': 80, 'width': 50, 'height': 15, 'conf': 90, 'region_type': 'column_en'},
|
||||||
|
]
|
||||||
|
ocr_results = {'column_en': words}
|
||||||
|
vocab = match_lines_to_vocab(ocr_results, sample_regions)
|
||||||
|
assert len(vocab) == 1
|
||||||
|
assert vocab[0].english == 'valid'
|
||||||
|
|
||||||
|
def test_confidence_calculation(self, sample_words_column_en, sample_words_column_de, sample_regions):
|
||||||
|
"""Confidence should be the average of matched columns."""
|
||||||
|
ocr_results = {
|
||||||
|
'column_en': sample_words_column_en,
|
||||||
|
'column_de': sample_words_column_de,
|
||||||
|
}
|
||||||
|
vocab = match_lines_to_vocab(ocr_results, sample_regions, y_tolerance_px=25)
|
||||||
|
# First entry: EN conf=90, DE conf=88 → avg=89
|
||||||
|
assert vocab[0].confidence > 0
|
||||||
|
assert vocab[0].confidence == pytest.approx(89.0, abs=1.0)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# ORCHESTRATOR TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
class TestOrchestrator:
|
||||||
|
"""Test run_cv_pipeline orchestrator."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_input_returns_error(self):
|
||||||
|
"""Pipeline without input should return error."""
|
||||||
|
result = await run_cv_pipeline()
|
||||||
|
assert result.error is not None
|
||||||
|
assert "No input data" in result.error
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pipeline_unavailable(self):
|
||||||
|
"""When CV_PIPELINE_AVAILABLE is False, should return error."""
|
||||||
|
with patch('cv_vocab_pipeline.CV_PIPELINE_AVAILABLE', False):
|
||||||
|
result = await run_cv_pipeline(pdf_data=b"fake")
|
||||||
|
assert result.error is not None
|
||||||
|
assert "not available" in result.error
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
async def test_pipeline_with_image_data(self):
|
||||||
|
"""Pipeline with a real synthetic image should run without errors."""
|
||||||
|
import cv2
|
||||||
|
# Create a simple test image (white with some text-like black bars)
|
||||||
|
img = np.ones((200, 300, 3), dtype=np.uint8) * 255
|
||||||
|
for y in range(30, 170, 25):
|
||||||
|
img[y:y+12, 20:280, :] = 30
|
||||||
|
_, img_bytes = cv2.imencode('.png', img)
|
||||||
|
image_data = img_bytes.tobytes()
|
||||||
|
|
||||||
|
with patch('cv_vocab_pipeline.pytesseract') as mock_tess:
|
||||||
|
# Mock Tesseract to return empty results
|
||||||
|
mock_tess.image_to_data.return_value = {
|
||||||
|
'text': [], 'conf': [], 'left': [], 'top': [],
|
||||||
|
'width': [], 'height': [],
|
||||||
|
}
|
||||||
|
mock_tess.Output.DICT = 'dict'
|
||||||
|
|
||||||
|
result = await run_cv_pipeline(image_data=image_data)
|
||||||
|
assert result.error is None
|
||||||
|
assert result.image_width == 300
|
||||||
|
assert result.image_height == 200
|
||||||
|
assert 'render' in result.stages
|
||||||
|
assert 'deskew' in result.stages
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
async def test_pipeline_records_timing(self):
|
||||||
|
"""Pipeline should record timing for each stage."""
|
||||||
|
import cv2
|
||||||
|
img = np.ones((100, 150, 3), dtype=np.uint8) * 255
|
||||||
|
_, img_bytes = cv2.imencode('.png', img)
|
||||||
|
|
||||||
|
with patch('cv_vocab_pipeline.pytesseract') as mock_tess:
|
||||||
|
mock_tess.image_to_data.return_value = {
|
||||||
|
'text': [], 'conf': [], 'left': [], 'top': [],
|
||||||
|
'width': [], 'height': [],
|
||||||
|
}
|
||||||
|
mock_tess.Output.DICT = 'dict'
|
||||||
|
|
||||||
|
result = await run_cv_pipeline(image_data=img_bytes.tobytes())
|
||||||
|
assert result.duration_seconds >= 0
|
||||||
|
assert all(v >= 0 for v in result.stages.values())
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pipeline_result_format(self):
|
||||||
|
"""PipelineResult vocabulary should be list of dicts with expected keys."""
|
||||||
|
result = PipelineResult()
|
||||||
|
result.vocabulary = [
|
||||||
|
{"english": "test", "german": "Test", "example": "A test.", "confidence": 90.0}
|
||||||
|
]
|
||||||
|
assert len(result.vocabulary) == 1
|
||||||
|
entry = result.vocabulary[0]
|
||||||
|
assert "english" in entry
|
||||||
|
assert "german" in entry
|
||||||
|
assert "example" in entry
|
||||||
|
assert "confidence" in entry
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# INTEGRATION-STYLE TESTS (with mocked Tesseract)
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not CV2_AVAILABLE, reason="OpenCV not available")
|
||||||
|
class TestStageIntegration:
|
||||||
|
"""Test multiple stages together (still unit-test level with mocked OCR)."""
|
||||||
|
|
||||||
|
def test_image_prep_to_layout(self, text_like_image):
|
||||||
|
"""Stages 4→5: image prep feeds layout analysis correctly."""
|
||||||
|
ocr_img = create_ocr_image(text_like_image)
|
||||||
|
layout_img = create_layout_image(text_like_image)
|
||||||
|
|
||||||
|
assert ocr_img.shape[:2] == text_like_image.shape[:2]
|
||||||
|
assert layout_img.shape[:2] == text_like_image.shape[:2]
|
||||||
|
|
||||||
|
regions = analyze_layout(layout_img, ocr_img)
|
||||||
|
assert len(regions) >= 1
|
||||||
|
|
||||||
|
def test_deskew_to_image_prep(self, text_like_image):
|
||||||
|
"""Stages 2→4: deskew output can be processed by image prep."""
|
||||||
|
corrected, angle = deskew_image(text_like_image)
|
||||||
|
ocr_img = create_ocr_image(corrected)
|
||||||
|
layout_img = create_layout_image(corrected)
|
||||||
|
assert ocr_img.shape[:2] == corrected.shape[:2]
|
||||||
|
assert layout_img.shape[:2] == corrected.shape[:2]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================
|
||||||
|
# RUN TESTS
|
||||||
|
# =============================================
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
@@ -42,6 +42,93 @@ except ImportError:
|
|||||||
MINIO_AVAILABLE = False
|
MINIO_AVAILABLE = False
|
||||||
logger.warning("MinIO storage not available, using local storage")
|
logger.warning("MinIO storage not available, using local storage")
|
||||||
|
|
||||||
|
# Try to import Tesseract extractor
|
||||||
|
try:
|
||||||
|
from tesseract_vocab_extractor import (
|
||||||
|
extract_bounding_boxes, run_tesseract_pipeline,
|
||||||
|
match_positions_to_vocab, TESSERACT_AVAILABLE,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
TESSERACT_AVAILABLE = False
|
||||||
|
logger.warning("Tesseract extractor not available")
|
||||||
|
|
||||||
|
# Try to import CV Pipeline
|
||||||
|
try:
|
||||||
|
from cv_vocab_pipeline import run_cv_pipeline, CV_PIPELINE_AVAILABLE
|
||||||
|
except ImportError:
|
||||||
|
CV_PIPELINE_AVAILABLE = False
|
||||||
|
logger.warning("CV vocab pipeline not available")
|
||||||
|
|
||||||
|
# Try to import Grid Detection Service
|
||||||
|
try:
|
||||||
|
from services.grid_detection_service import GridDetectionService
|
||||||
|
GRID_SERVICE_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
GRID_SERVICE_AVAILABLE = False
|
||||||
|
logger.warning("Grid Detection Service not available")
|
||||||
|
|
||||||
|
# Database integration (used by main.py lifespan)
|
||||||
|
try:
|
||||||
|
from vocab_session_store import (
|
||||||
|
DATABASE_URL, get_pool, init_vocab_tables,
|
||||||
|
list_sessions_db, get_session_db,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot@postgres:5432/breakpilot_db")
|
||||||
|
get_pool = None
|
||||||
|
init_vocab_tables = None
|
||||||
|
list_sessions_db = None
|
||||||
|
get_session_db = None
|
||||||
|
|
||||||
|
_db_pool = None
|
||||||
|
|
||||||
|
|
||||||
|
def set_db_pool(pool):
|
||||||
|
"""Set the database connection pool (called from main.py lifespan)."""
|
||||||
|
global _db_pool
|
||||||
|
_db_pool = pool
|
||||||
|
|
||||||
|
|
||||||
|
async def _init_vocab_table():
|
||||||
|
"""Initialize vocab tables in database."""
|
||||||
|
if init_vocab_tables:
|
||||||
|
try:
|
||||||
|
await init_vocab_tables()
|
||||||
|
logger.info("vocab_session_cache table ready")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to init vocab tables: {e}")
|
||||||
|
else:
|
||||||
|
logger.info("vocab_session_cache table ready")
|
||||||
|
|
||||||
|
|
||||||
|
async def _load_all_sessions():
|
||||||
|
"""Load all vocab sessions from database into memory cache."""
|
||||||
|
if not list_sessions_db:
|
||||||
|
logger.info("Loaded 0 vocab sessions from database")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
sessions = await list_sessions_db(limit=500)
|
||||||
|
count = 0
|
||||||
|
for s in sessions:
|
||||||
|
sid = s.get("id") or s.get("session_id")
|
||||||
|
if sid and sid not in _sessions:
|
||||||
|
_sessions[sid] = {
|
||||||
|
"id": sid,
|
||||||
|
"name": s.get("name", ""),
|
||||||
|
"description": s.get("description", ""),
|
||||||
|
"status": s.get("status", "created"),
|
||||||
|
"vocabulary_count": s.get("vocabulary_count", 0),
|
||||||
|
"source_language": s.get("source_language", "en"),
|
||||||
|
"target_language": s.get("target_language", "de"),
|
||||||
|
"created_at": str(s.get("created_at", "")),
|
||||||
|
}
|
||||||
|
count += 1
|
||||||
|
logger.info(f"Loaded {count} vocab sessions from database")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load sessions from database: {e}")
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/api/v1/vocab", tags=["Vocabulary Worksheets"])
|
router = APIRouter(prefix="/api/v1/vocab", tags=["Vocabulary Worksheets"])
|
||||||
|
|
||||||
# Local storage path
|
# Local storage path
|
||||||
@@ -786,7 +873,7 @@ async def upload_image(
|
|||||||
# Convert PDF to image if needed
|
# Convert PDF to image if needed
|
||||||
if is_pdf:
|
if is_pdf:
|
||||||
logger.info("Converting PDF to image...")
|
logger.info("Converting PDF to image...")
|
||||||
content = await convert_pdf_to_image(content)
|
content = await convert_pdf_page_to_image(content, page_number=0)
|
||||||
logger.info(f"PDF converted, image size: {len(content)} bytes")
|
logger.info(f"PDF converted, image size: {len(content)} bytes")
|
||||||
|
|
||||||
# Save image
|
# Save image
|
||||||
@@ -1066,8 +1153,12 @@ async def upload_pdf_get_info(
|
|||||||
|
|
||||||
|
|
||||||
@router.get("/sessions/{session_id}/pdf-thumbnail/{page_number}")
|
@router.get("/sessions/{session_id}/pdf-thumbnail/{page_number}")
|
||||||
async def get_pdf_thumbnail(session_id: str, page_number: int):
|
async def get_pdf_thumbnail(session_id: str, page_number: int, hires: bool = Query(False)):
|
||||||
"""Get a thumbnail image of a specific PDF page."""
|
"""Get a thumbnail image of a specific PDF page.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hires: If True, return full-resolution image (zoom=2.0) instead of thumbnail (zoom=0.5).
|
||||||
|
"""
|
||||||
if session_id not in _sessions:
|
if session_id not in _sessions:
|
||||||
raise HTTPException(status_code=404, detail="Session not found")
|
raise HTTPException(status_code=404, detail="Session not found")
|
||||||
|
|
||||||
@@ -1077,7 +1168,7 @@ async def get_pdf_thumbnail(session_id: str, page_number: int):
|
|||||||
if not pdf_data:
|
if not pdf_data:
|
||||||
raise HTTPException(status_code=400, detail="No PDF uploaded for this session")
|
raise HTTPException(status_code=400, detail="No PDF uploaded for this session")
|
||||||
|
|
||||||
thumbnail = await convert_pdf_page_to_image(pdf_data, page_number, thumbnail=True)
|
thumbnail = await convert_pdf_page_to_image(pdf_data, page_number, thumbnail=not hires)
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
io.BytesIO(thumbnail),
|
io.BytesIO(thumbnail),
|
||||||
@@ -1085,6 +1176,45 @@ async def get_pdf_thumbnail(session_id: str, page_number: int):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/pdf-page-image/{page_number}")
|
||||||
|
async def get_pdf_page_image(session_id: str, page_number: int, zoom: float = Query(2.0, ge=0.5, le=4.0)):
|
||||||
|
"""PDF page as PNG at arbitrary resolution (for editor view).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
zoom: Zoom factor (0.5=72DPI, 1.0=144DPI, 2.0=288DPI, 4.0=576DPI).
|
||||||
|
"""
|
||||||
|
if session_id not in _sessions:
|
||||||
|
raise HTTPException(status_code=404, detail="Session not found")
|
||||||
|
|
||||||
|
session = _sessions[session_id]
|
||||||
|
pdf_data = session.get("pdf_data")
|
||||||
|
|
||||||
|
if not pdf_data:
|
||||||
|
raise HTTPException(status_code=400, detail="No PDF uploaded for this session")
|
||||||
|
|
||||||
|
page_count = session.get("pdf_page_count", 1)
|
||||||
|
if page_number < 0 or page_number >= page_count:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Invalid page number. PDF has {page_count} pages (0-indexed).")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import fitz
|
||||||
|
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
|
||||||
|
page = pdf_document[page_number]
|
||||||
|
mat = fitz.Matrix(zoom, zoom)
|
||||||
|
pix = page.get_pixmap(matrix=mat)
|
||||||
|
png_data = pix.tobytes("png")
|
||||||
|
pdf_document.close()
|
||||||
|
logger.info(f"PDF page {page_number} rendered at zoom={zoom}: {len(png_data)} bytes")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"PDF page image failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"PDF rendering failed: {str(e)}")
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(png_data),
|
||||||
|
media_type="image/png",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/sessions/{session_id}/process-single-page/{page_number}")
|
@router.post("/sessions/{session_id}/process-single-page/{page_number}")
|
||||||
async def process_single_page(
|
async def process_single_page(
|
||||||
session_id: str,
|
session_id: str,
|
||||||
@@ -1436,9 +1566,122 @@ async def compare_ocr_methods(session_id: str, page_number: int):
|
|||||||
}
|
}
|
||||||
all_vocab_sets["vision_llm"] = set()
|
all_vocab_sets["vision_llm"] = set()
|
||||||
|
|
||||||
# --- Method: Local LLM (same as vision but noted separately if available) ---
|
# --- Method: Tesseract OCR (bounding boxes + vocab extraction) ---
|
||||||
# For now, we treat vision_llm as the primary method.
|
if TESSERACT_AVAILABLE:
|
||||||
# Tesseract method can be added here if tesseract_vocab_extractor is available.
|
try:
|
||||||
|
start = time.time()
|
||||||
|
tess_result = await run_tesseract_pipeline(image_data, lang="eng+deu")
|
||||||
|
duration = time.time() - start
|
||||||
|
|
||||||
|
tess_vocab = tess_result.get("vocabulary", [])
|
||||||
|
tess_words = tess_result.get("words", [])
|
||||||
|
|
||||||
|
# Store Tesseract words in session for later use (grid analysis, position matching)
|
||||||
|
session["tesseract_words"] = tess_words
|
||||||
|
session["tesseract_image_width"] = tess_result.get("image_width", 0)
|
||||||
|
session["tesseract_image_height"] = tess_result.get("image_height", 0)
|
||||||
|
session[f"tesseract_page_{page_number}"] = tess_result
|
||||||
|
|
||||||
|
vocab_list_tess = []
|
||||||
|
for v in tess_vocab:
|
||||||
|
vocab_list_tess.append({
|
||||||
|
"english": v.get("english", ""),
|
||||||
|
"german": v.get("german", ""),
|
||||||
|
"example": v.get("example", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
methods_results["tesseract"] = {
|
||||||
|
"name": "Tesseract OCR",
|
||||||
|
"model": "tesseract-ocr (eng+deu)",
|
||||||
|
"duration_seconds": round(duration, 1),
|
||||||
|
"vocabulary_count": len(vocab_list_tess),
|
||||||
|
"vocabulary": vocab_list_tess,
|
||||||
|
"confidence": 0.7 if tess_vocab else 0,
|
||||||
|
"success": len(vocab_list_tess) > 0,
|
||||||
|
"error": tess_result.get("error"),
|
||||||
|
"word_count": tess_result.get("word_count", 0),
|
||||||
|
"columns_detected": len(tess_result.get("columns", [])),
|
||||||
|
}
|
||||||
|
all_vocab_sets["tesseract"] = {
|
||||||
|
(v["english"].lower().strip(), v["german"].lower().strip())
|
||||||
|
for v in vocab_list_tess if v["english"] and v["german"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fuzzy-match: attach Tesseract bounding boxes to Vision LLM results
|
||||||
|
if "vision_llm" in methods_results and methods_results["vision_llm"]["success"]:
|
||||||
|
llm_vocab_with_bbox = match_positions_to_vocab(
|
||||||
|
tess_words,
|
||||||
|
methods_results["vision_llm"]["vocabulary"],
|
||||||
|
tess_result.get("image_width", 1),
|
||||||
|
tess_result.get("image_height", 1),
|
||||||
|
)
|
||||||
|
methods_results["vision_llm"]["vocabulary"] = llm_vocab_with_bbox
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Tesseract failed: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
methods_results["tesseract"] = {
|
||||||
|
"name": "Tesseract OCR",
|
||||||
|
"model": "tesseract-ocr",
|
||||||
|
"duration_seconds": 0,
|
||||||
|
"vocabulary_count": 0,
|
||||||
|
"vocabulary": [],
|
||||||
|
"confidence": 0,
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
}
|
||||||
|
all_vocab_sets["tesseract"] = set()
|
||||||
|
|
||||||
|
# --- Method: CV Pipeline (Document Reconstruction) ---
|
||||||
|
if CV_PIPELINE_AVAILABLE:
|
||||||
|
try:
|
||||||
|
start = time.time()
|
||||||
|
cv_result = await run_cv_pipeline(pdf_data=pdf_data, page_number=page_number)
|
||||||
|
duration = time.time() - start
|
||||||
|
|
||||||
|
cv_vocab = cv_result.vocabulary if not cv_result.error else []
|
||||||
|
vocab_list_cv = []
|
||||||
|
for v in cv_vocab:
|
||||||
|
vocab_list_cv.append({
|
||||||
|
"english": v.get("english", ""),
|
||||||
|
"german": v.get("german", ""),
|
||||||
|
"example": v.get("example", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
methods_results["cv_pipeline"] = {
|
||||||
|
"name": "CV Pipeline (Document Reconstruction)",
|
||||||
|
"model": "opencv + tesseract (multi-pass)",
|
||||||
|
"duration_seconds": round(duration, 1),
|
||||||
|
"vocabulary_count": len(vocab_list_cv),
|
||||||
|
"vocabulary": vocab_list_cv,
|
||||||
|
"confidence": 0.8 if cv_vocab else 0,
|
||||||
|
"success": len(vocab_list_cv) > 0,
|
||||||
|
"error": cv_result.error,
|
||||||
|
"word_count": cv_result.word_count,
|
||||||
|
"columns_detected": cv_result.columns_detected,
|
||||||
|
"stages": cv_result.stages,
|
||||||
|
}
|
||||||
|
all_vocab_sets["cv_pipeline"] = {
|
||||||
|
(v["english"].lower().strip(), v["german"].lower().strip())
|
||||||
|
for v in vocab_list_cv if v["english"] and v["german"]
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"CV Pipeline failed: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
methods_results["cv_pipeline"] = {
|
||||||
|
"name": "CV Pipeline (Document Reconstruction)",
|
||||||
|
"model": "opencv + tesseract (multi-pass)",
|
||||||
|
"duration_seconds": 0,
|
||||||
|
"vocabulary_count": 0,
|
||||||
|
"vocabulary": [],
|
||||||
|
"confidence": 0,
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
}
|
||||||
|
all_vocab_sets["cv_pipeline"] = set()
|
||||||
|
|
||||||
# --- Build comparison ---
|
# --- Build comparison ---
|
||||||
all_unique = set()
|
all_unique = set()
|
||||||
@@ -1461,11 +1704,6 @@ async def compare_ocr_methods(session_id: str, page_number: int):
|
|||||||
# Find best method
|
# Find best method
|
||||||
best_method = max(all_vocab_sets, key=lambda m: len(all_vocab_sets[m])) if all_vocab_sets else "vision_llm"
|
best_method = max(all_vocab_sets, key=lambda m: len(all_vocab_sets[m])) if all_vocab_sets else "vision_llm"
|
||||||
|
|
||||||
# Save vocabulary from best method in session for grid analysis (no second Ollama call needed)
|
|
||||||
best_vocab_list = methods_results.get(best_method, {}).get("vocabulary", [])
|
|
||||||
session["comparison_vocabulary"] = best_vocab_list
|
|
||||||
session["comparison_page"] = page_number
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"page_number": page_number,
|
"page_number": page_number,
|
||||||
@@ -1484,67 +1722,208 @@ async def compare_ocr_methods(session_id: str, page_number: int):
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/sessions/{session_id}/analyze-grid/{page_number}")
|
@router.post("/sessions/{session_id}/analyze-grid/{page_number}")
|
||||||
async def analyze_grid(session_id: str, page_number: int):
|
async def analyze_grid(session_id: str, page_number: int, use_tesseract: bool = Query(True)):
|
||||||
"""
|
"""
|
||||||
Build grid structure from comparison results (no Ollama call needed).
|
Analyze the grid/table structure of a vocabulary page.
|
||||||
|
|
||||||
Uses vocabulary stored in session by compare-ocr to compute
|
Hybrid approach:
|
||||||
the grid layout instantly.
|
1. If Tesseract bounding boxes are available (from compare-ocr), use them for
|
||||||
|
real spatial positions via GridDetectionService.
|
||||||
|
2. Otherwise fall back to Vision LLM for grid structure detection.
|
||||||
|
|
||||||
page_number is 0-indexed.
|
page_number is 0-indexed.
|
||||||
Returns GridData structure expected by the frontend GridOverlay component.
|
Returns GridData structure expected by the frontend GridOverlay component.
|
||||||
"""
|
"""
|
||||||
logger.info(f"Grid analysis for session {session_id}, page {page_number}")
|
import httpx
|
||||||
|
import time
|
||||||
|
|
||||||
|
logger.info(f"Grid analysis for session {session_id}, page {page_number} (use_tesseract={use_tesseract})")
|
||||||
|
|
||||||
if session_id not in _sessions:
|
if session_id not in _sessions:
|
||||||
raise HTTPException(status_code=404, detail="Session not found")
|
raise HTTPException(status_code=404, detail="Session not found")
|
||||||
|
|
||||||
session = _sessions[session_id]
|
session = _sessions[session_id]
|
||||||
|
pdf_data = session.get("pdf_data")
|
||||||
|
|
||||||
# Read vocabulary from session (saved by compare-ocr)
|
if not pdf_data:
|
||||||
vocab_list = session.get("comparison_vocabulary")
|
raise HTTPException(status_code=400, detail="No PDF uploaded for this session")
|
||||||
if not vocab_list:
|
|
||||||
return {"success": False, "error": "Bitte zuerst Vergleich starten, bevor die Grid-Analyse ausgefuehrt wird."}
|
page_count = session.get("pdf_page_count", 1)
|
||||||
|
if page_number < 0 or page_number >= page_count:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Invalid page number.")
|
||||||
|
|
||||||
|
# Convert page to image
|
||||||
|
image_data = await convert_pdf_page_to_image(pdf_data, page_number, thumbnail=False)
|
||||||
|
|
||||||
|
# --- Strategy 1: Use Tesseract bounding boxes + GridDetectionService ---
|
||||||
|
tess_page_data = session.get(f"tesseract_page_{page_number}")
|
||||||
|
|
||||||
|
if use_tesseract and TESSERACT_AVAILABLE and GRID_SERVICE_AVAILABLE:
|
||||||
|
try:
|
||||||
|
# Run Tesseract if not already cached
|
||||||
|
if not tess_page_data:
|
||||||
|
logger.info("Running Tesseract for grid analysis (not cached)")
|
||||||
|
from tesseract_vocab_extractor import run_tesseract_pipeline as _run_tess
|
||||||
|
tess_page_data = await _run_tess(image_data, lang="eng+deu")
|
||||||
|
session[f"tesseract_page_{page_number}"] = tess_page_data
|
||||||
|
session["tesseract_words"] = tess_page_data.get("words", [])
|
||||||
|
session["tesseract_image_width"] = tess_page_data.get("image_width", 0)
|
||||||
|
session["tesseract_image_height"] = tess_page_data.get("image_height", 0)
|
||||||
|
|
||||||
|
tess_words = tess_page_data.get("words", [])
|
||||||
|
img_w = tess_page_data.get("image_width", 0)
|
||||||
|
img_h = tess_page_data.get("image_height", 0)
|
||||||
|
|
||||||
|
if tess_words and img_w > 0 and img_h > 0:
|
||||||
|
service = GridDetectionService()
|
||||||
|
regions = service.convert_tesseract_regions(tess_words, img_w, img_h)
|
||||||
|
|
||||||
|
if regions:
|
||||||
|
grid_result = service.detect_grid(regions)
|
||||||
|
grid_dict = grid_result.to_dict()
|
||||||
|
|
||||||
|
# Merge LLM text if available (better quality than Tesseract text)
|
||||||
|
# The LLM vocab was stored during compare-ocr
|
||||||
|
grid_dict["source"] = "tesseract+grid_service"
|
||||||
|
grid_dict["word_count"] = len(tess_words)
|
||||||
|
|
||||||
|
logger.info(f"Tesseract grid: {grid_result.rows}x{grid_result.columns}, "
|
||||||
|
f"{grid_result.stats.get('recognized', 0)} recognized")
|
||||||
|
|
||||||
|
return {"success": True, "grid": grid_dict}
|
||||||
|
|
||||||
|
logger.info("Tesseract data insufficient, falling back to LLM")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Tesseract grid analysis failed, falling back to LLM: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
|
# --- Strategy 2: Fall back to Vision LLM ---
|
||||||
|
image_base64 = base64.b64encode(image_data).decode("utf-8")
|
||||||
|
|
||||||
|
grid_prompt = """Analyze this textbook page image. It contains a vocabulary table/grid.
|
||||||
|
|
||||||
|
Your task: Identify the TABLE STRUCTURE and extract each cell's content.
|
||||||
|
|
||||||
|
Return a JSON object with this EXACT structure:
|
||||||
|
{
|
||||||
|
"rows": <number of rows>,
|
||||||
|
"columns": <number of columns>,
|
||||||
|
"column_types": ["english", "german", "example"],
|
||||||
|
"entries": [
|
||||||
|
{
|
||||||
|
"row": 0,
|
||||||
|
"col": 0,
|
||||||
|
"text": "the word or phrase in this cell",
|
||||||
|
"column_type": "english",
|
||||||
|
"confidence": 0.95
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- row and col are 0-indexed
|
||||||
|
- column_type is one of: "english", "german", "example", "unknown"
|
||||||
|
- Detect whether each column contains English words, German translations, or example sentences
|
||||||
|
- Include ALL non-empty cells
|
||||||
|
- confidence is 0.0-1.0 based on how clear the text is
|
||||||
|
- If a cell is empty, don't include it
|
||||||
|
- Return ONLY the JSON, no other text"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Check if example column is present (at least one vocab has non-empty example)
|
import asyncio
|
||||||
has_examples = any(v.get("example", "").strip() for v in vocab_list)
|
|
||||||
num_cols = 3 if has_examples else 2
|
|
||||||
column_types = ["english", "german", "example"] if has_examples else ["english", "german"]
|
|
||||||
|
|
||||||
num_rows = len(vocab_list)
|
raw_text = ""
|
||||||
if num_rows == 0:
|
max_retries = 3
|
||||||
return {"success": False, "error": "Keine Vokabeln im Vergleichsergebnis gefunden."}
|
for attempt in range(max_retries):
|
||||||
|
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||||
|
response = await client.post(
|
||||||
|
f"{OLLAMA_URL}/api/chat",
|
||||||
|
json={
|
||||||
|
"model": VISION_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": grid_prompt, "images": [image_base64]}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 8192},
|
||||||
|
},
|
||||||
|
timeout=300.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 500 and attempt < max_retries - 1:
|
||||||
|
wait_time = 10 * (attempt + 1)
|
||||||
|
logger.warning(f"Ollama returned 500, retrying in {wait_time}s (attempt {attempt + 1}/{max_retries})")
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
continue
|
||||||
|
elif response.status_code != 200:
|
||||||
|
error_detail = response.text[:200] if response.text else "Unknown error"
|
||||||
|
return {"success": False, "error": f"Ollama Fehler ({response.status_code}): {error_detail}. Bitte erneut versuchen - evtl. laeuft noch ein anderer OCR-Request."}
|
||||||
|
|
||||||
|
raw_text = response.json().get("message", {}).get("content", "")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Parse JSON from response
|
||||||
|
import re
|
||||||
|
json_match = re.search(r'\{[\s\S]*\}', raw_text)
|
||||||
|
if not json_match:
|
||||||
|
return {"success": False, "error": "Could not parse grid structure from LLM response"}
|
||||||
|
|
||||||
|
grid_raw = json.loads(json_match.group())
|
||||||
|
|
||||||
|
num_rows = grid_raw.get("rows", 0)
|
||||||
|
num_cols = grid_raw.get("columns", 0)
|
||||||
|
column_types = grid_raw.get("column_types", [])
|
||||||
|
entries = grid_raw.get("entries", [])
|
||||||
|
|
||||||
|
if num_rows == 0 or num_cols == 0:
|
||||||
|
return {"success": False, "error": "No grid structure detected"}
|
||||||
|
|
||||||
|
# Ensure column_types has the right length
|
||||||
|
while len(column_types) < num_cols:
|
||||||
|
column_types.append("unknown")
|
||||||
|
|
||||||
|
# Build cell grid with percentage-based coordinates
|
||||||
row_height = 100.0 / num_rows
|
row_height = 100.0 / num_rows
|
||||||
col_width = 100.0 / num_cols
|
col_width = 100.0 / num_cols
|
||||||
|
|
||||||
|
# Track which cells have content
|
||||||
|
cell_map = {}
|
||||||
|
for entry in entries:
|
||||||
|
r = entry.get("row", 0)
|
||||||
|
c = entry.get("col", 0)
|
||||||
|
cell_map[(r, c)] = entry
|
||||||
|
|
||||||
cells = []
|
cells = []
|
||||||
recognized_count = 0
|
recognized_count = 0
|
||||||
empty_count = 0
|
empty_count = 0
|
||||||
|
problematic_count = 0
|
||||||
|
|
||||||
for r, vocab in enumerate(vocab_list):
|
for r in range(num_rows):
|
||||||
row_cells = []
|
row_cells = []
|
||||||
english = vocab.get("english", "").strip()
|
for c in range(num_cols):
|
||||||
german = vocab.get("german", "").strip()
|
|
||||||
example = vocab.get("example", "").strip() if has_examples else None
|
|
||||||
|
|
||||||
col_values = [("english", english), ("german", german)]
|
|
||||||
if has_examples:
|
|
||||||
col_values.append(("example", example))
|
|
||||||
|
|
||||||
for c, (col_type, text) in enumerate(col_values):
|
|
||||||
x = c * col_width
|
x = c * col_width
|
||||||
y = r * row_height
|
y = r * row_height
|
||||||
|
|
||||||
if text:
|
if (r, c) in cell_map:
|
||||||
status = "recognized"
|
entry = cell_map[(r, c)]
|
||||||
recognized_count += 1
|
text = entry.get("text", "").strip()
|
||||||
conf = 0.9
|
conf = entry.get("confidence", 0.8)
|
||||||
|
col_type = entry.get("column_type", column_types[c] if c < len(column_types) else "unknown")
|
||||||
|
|
||||||
|
if text:
|
||||||
|
status = "recognized" if conf >= 0.5 else "problematic"
|
||||||
|
if status == "recognized":
|
||||||
|
recognized_count += 1
|
||||||
|
else:
|
||||||
|
problematic_count += 1
|
||||||
|
else:
|
||||||
|
status = "empty"
|
||||||
|
empty_count += 1
|
||||||
else:
|
else:
|
||||||
|
text = ""
|
||||||
|
conf = 0.0
|
||||||
|
col_type = column_types[c] if c < len(column_types) else "unknown"
|
||||||
status = "empty"
|
status = "empty"
|
||||||
empty_count += 1
|
empty_count += 1
|
||||||
conf = 0.0
|
|
||||||
|
|
||||||
row_cells.append({
|
row_cells.append({
|
||||||
"row": r,
|
"row": r,
|
||||||
@@ -1553,7 +1932,7 @@ async def analyze_grid(session_id: str, page_number: int):
|
|||||||
"y": round(y, 2),
|
"y": round(y, 2),
|
||||||
"width": round(col_width, 2),
|
"width": round(col_width, 2),
|
||||||
"height": round(row_height, 2),
|
"height": round(row_height, 2),
|
||||||
"text": text or "",
|
"text": text,
|
||||||
"confidence": conf,
|
"confidence": conf,
|
||||||
"status": status,
|
"status": status,
|
||||||
"column_type": col_type,
|
"column_type": col_type,
|
||||||
@@ -1561,8 +1940,9 @@ async def analyze_grid(session_id: str, page_number: int):
|
|||||||
cells.append(row_cells)
|
cells.append(row_cells)
|
||||||
|
|
||||||
total = num_rows * num_cols
|
total = num_rows * num_cols
|
||||||
coverage = recognized_count / max(total, 1)
|
coverage = (recognized_count + problematic_count) / max(total, 1)
|
||||||
|
|
||||||
|
# Column and row boundaries as percentages
|
||||||
col_boundaries = [round(c * col_width, 2) for c in range(num_cols + 1)]
|
col_boundaries = [round(c * col_width, 2) for c in range(num_cols + 1)]
|
||||||
row_boundaries = [round(r * row_height, 2) for r in range(num_rows + 1)]
|
row_boundaries = [round(r * row_height, 2) for r in range(num_rows + 1)]
|
||||||
|
|
||||||
@@ -1574,9 +1954,10 @@ async def analyze_grid(session_id: str, page_number: int):
|
|||||||
"column_boundaries": col_boundaries,
|
"column_boundaries": col_boundaries,
|
||||||
"row_boundaries": row_boundaries,
|
"row_boundaries": row_boundaries,
|
||||||
"deskew_angle": 0.0,
|
"deskew_angle": 0.0,
|
||||||
|
"source": "vision_llm",
|
||||||
"stats": {
|
"stats": {
|
||||||
"recognized": recognized_count,
|
"recognized": recognized_count,
|
||||||
"problematic": 0,
|
"problematic": problematic_count,
|
||||||
"empty": empty_count,
|
"empty": empty_count,
|
||||||
"manual": 0,
|
"manual": 0,
|
||||||
"total": total,
|
"total": total,
|
||||||
@@ -1586,6 +1967,9 @@ async def analyze_grid(session_id: str, page_number: int):
|
|||||||
|
|
||||||
return {"success": True, "grid": grid_data}
|
return {"success": True, "grid": grid_data}
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.error("Grid analysis timed out")
|
||||||
|
return {"success": False, "error": "Grid-Analyse Timeout (Ollama zu langsam)"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Grid analysis failed: {e}")
|
logger.error(f"Grid analysis failed: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
|
|||||||
Reference in New Issue
Block a user