feat: PaddleOCR Remote-Engine (PP-OCRv5 Latin auf Hetzner x86_64)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 31s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m7s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 21s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 31s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m7s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 21s
PaddleOCR als neue engine=paddle Option in der OCR-Pipeline. Microservice auf Hetzner (paddleocr-service/), async HTTP-Client (paddleocr_remote.py), Frontend-Dropdown, automatisch words_first. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
101
klausur-service/backend/tests/test_paddleocr_remote.py
Normal file
101
klausur-service/backend/tests/test_paddleocr_remote.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Tests for the remote PaddleOCR client."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
|
||||
|
||||
SAMPLE_RESPONSE = {
|
||||
"words": [
|
||||
{"text": "Hello", "left": 10, "top": 20, "width": 80, "height": 30, "conf": 95.2},
|
||||
{"text": "World", "left": 100, "top": 20, "width": 90, "height": 30, "conf": 91.0},
|
||||
],
|
||||
"image_width": 640,
|
||||
"image_height": 480,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_remote_paddle_success():
|
||||
"""Successful OCR call returns word dicts and image dimensions."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = SAMPLE_RESPONSE
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("services.paddleocr_remote.PADDLEOCR_REMOTE_URL", "https://example.com:8095"), \
|
||||
patch("services.paddleocr_remote.PADDLEOCR_API_KEY", "test-key"), \
|
||||
patch("httpx.AsyncClient", return_value=mock_client):
|
||||
|
||||
from services.paddleocr_remote import ocr_remote_paddle
|
||||
words, w, h = await ocr_remote_paddle(b"fake-png-bytes", "test.png")
|
||||
|
||||
assert len(words) == 2
|
||||
assert words[0]["text"] == "Hello"
|
||||
assert words[1]["text"] == "World"
|
||||
assert w == 640
|
||||
assert h == 480
|
||||
|
||||
# Verify API key was sent
|
||||
call_kwargs = mock_client.post.call_args
|
||||
assert call_kwargs.kwargs["headers"]["X-API-Key"] == "test-key"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_remote_paddle_no_url():
|
||||
"""Raises RuntimeError when PADDLEOCR_REMOTE_URL is not configured."""
|
||||
with patch("services.paddleocr_remote.PADDLEOCR_REMOTE_URL", ""):
|
||||
from services.paddleocr_remote import ocr_remote_paddle
|
||||
with pytest.raises(RuntimeError, match="PADDLEOCR_REMOTE_URL not configured"):
|
||||
await ocr_remote_paddle(b"fake-png-bytes")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_remote_paddle_no_api_key():
|
||||
"""When no API key is set, no X-API-Key header is sent."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = SAMPLE_RESPONSE
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("services.paddleocr_remote.PADDLEOCR_REMOTE_URL", "https://example.com:8095"), \
|
||||
patch("services.paddleocr_remote.PADDLEOCR_API_KEY", ""), \
|
||||
patch("httpx.AsyncClient", return_value=mock_client):
|
||||
|
||||
from services.paddleocr_remote import ocr_remote_paddle
|
||||
words, w, h = await ocr_remote_paddle(b"fake-png-bytes")
|
||||
|
||||
assert len(words) == 2
|
||||
call_kwargs = mock_client.post.call_args
|
||||
assert "X-API-Key" not in call_kwargs.kwargs["headers"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ocr_remote_paddle_http_error():
|
||||
"""HTTP errors are raised to the caller."""
|
||||
import httpx
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
|
||||
"401 Unauthorized", request=MagicMock(), response=MagicMock()
|
||||
)
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("services.paddleocr_remote.PADDLEOCR_REMOTE_URL", "https://example.com:8095"), \
|
||||
patch("services.paddleocr_remote.PADDLEOCR_API_KEY", "wrong-key"), \
|
||||
patch("httpx.AsyncClient", return_value=mock_client):
|
||||
|
||||
from services.paddleocr_remote import ocr_remote_paddle
|
||||
with pytest.raises(httpx.HTTPStatusError):
|
||||
await ocr_remote_paddle(b"fake-png-bytes")
|
||||
Reference in New Issue
Block a user