"""Tests for EmbeddingClient.chunk_text() — ChunkResult with metadata (D2).""" from unittest.mock import AsyncMock, MagicMock, patch import pytest from embedding_client import ChunkResult, EmbeddingClient @pytest.fixture def client(): with patch("embedding_client.settings") as mock_settings: mock_settings.EMBEDDING_SERVICE_URL = "http://localhost:8087" return EmbeddingClient() def _mock_response(json_data: dict, status_code: int = 200): """Create a mock httpx response (sync methods like .json() and .raise_for_status()).""" resp = MagicMock() resp.status_code = status_code resp.json.return_value = json_data return resp @pytest.mark.asyncio async def test_chunk_text_returns_chunk_result(client): """chunk_text returns ChunkResult with both chunks and metadata.""" mock_json = { "chunks": ["chunk1 text", "chunk2 text"], "chunks_with_metadata": [ { "text": "chunk1 text", "section": "§ 25", "section_title": "Informationspflichten", "paragraph": "Abs. 1", "paragraph_num": 1, "page": None, "index": 0, }, { "text": "chunk2 text", "section": "§ 25", "section_title": "Informationspflichten", "paragraph": "Abs. 2", "paragraph_num": 2, "page": None, "index": 1, }, ], "count": 2, "strategy": "recursive", } with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.return_value = _mock_response(mock_json) mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client result = await client.chunk_text("some legal text") assert isinstance(result, ChunkResult) assert result.chunks == ["chunk1 text", "chunk2 text"] assert len(result.chunks_with_metadata) == 2 assert result.chunks_with_metadata[0]["section"] == "§ 25" assert result.chunks_with_metadata[1]["paragraph"] == "Abs. 2" @pytest.mark.asyncio async def test_chunk_text_without_metadata_field(client): """Embedding service response without chunks_with_metadata → empty list.""" mock_json = { "chunks": ["chunk1"], "count": 1, "strategy": "semantic", } with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.return_value = _mock_response(mock_json) mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client result = await client.chunk_text("text", strategy="semantic") assert isinstance(result, ChunkResult) assert result.chunks == ["chunk1"] assert result.chunks_with_metadata == [] @pytest.mark.asyncio async def test_chunk_text_with_null_metadata(client): """chunks_with_metadata: null in response → empty list.""" mock_json = { "chunks": ["chunk1"], "chunks_with_metadata": None, "count": 1, "strategy": "recursive", } with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.return_value = _mock_response(mock_json) mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client result = await client.chunk_text("text") assert result.chunks_with_metadata == [] @pytest.mark.asyncio async def test_chunk_text_empty(client): """Empty text → empty chunks and metadata.""" mock_json = { "chunks": [], "chunks_with_metadata": [], "count": 0, "strategy": "recursive", } with patch("httpx.AsyncClient") as mock_client_cls: mock_client = AsyncMock() mock_client.post.return_value = _mock_response(mock_json) mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=False) mock_client_cls.return_value = mock_client result = await client.chunk_text("") assert result.chunks == [] assert result.chunks_with_metadata == []