feat(rag): optimize RAG pipeline — JSON-Mode, CoT, Hybrid Search, Re-Ranking, Cross-Reg Dedup, chunk 1024

Phase 1 (LLM Quality): - Add format=json to all Ollama payloads (obligation_extractor, control_generator, citation_backfill) - Add Chain-of-Thought analysis steps to Pass 0a/0b system prompts Phase 2 (Retrieval Quality): - Hybrid search via Qdrant Query API with RRF fusion + automatic text index (legal_rag.go) - Fallback to dense-only search if Query API unavailable - Cross-encoder re-ranking with BGE Reranker v2 (RERANK_ENABLED=false by default) - CPU-only PyTorch dependency to keep Docker image small Phase 3 (Data Layer): - Cross-regulation dedup pass (threshold 0.95) links controls across regulations - DedupResult.link_type field distinguishes dedup_merge vs cross_regulation - Chunk size defaults updated 512/50 → 1024/128 for new ingestions only - Existing collections and controls are NOT affected Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 11:49:43 +01:00
parent c3a53fe5d2
commit c52dbdb8f1
24 changed files with 2620 additions and 139 deletions
@@ -937,3 +937,36 @@ class TestConstants:

    def test_candidate_threshold_is_60(self):
        assert EMBEDDING_CANDIDATE_THRESHOLD == 0.60
+
+
+# =============================================================================
+# Tests: Ollama JSON-Mode
+# =============================================================================
+
+
+class TestOllamaJsonMode:
+    """Verify that Ollama payloads include format=json."""
+
+    @pytest.mark.asyncio
+    async def test_ollama_payload_contains_format_json(self):
+        """_llm_ollama must send format='json' in the request payload."""
+        from compliance.services.obligation_extractor import _llm_ollama
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "message": {"content": '{"test": true}'}
+        }
+
+        with patch("compliance.services.obligation_extractor.httpx.AsyncClient") as mock_cls:
+            mock_client = AsyncMock()
+            mock_client.post.return_value = mock_response
+            mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
+
+            await _llm_ollama("test prompt", "system prompt")
+
+            mock_client.post.assert_called_once()
+            call_kwargs = mock_client.post.call_args
+            payload = call_kwargs.kwargs.get("json") or call_kwargs[1].get("json")
+            assert payload["format"] == "json"