feat(rag): optimize RAG pipeline — JSON-Mode, CoT, Hybrid Search, Re-Ranking, Cross-Reg Dedup, chunk 1024
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 42s
CI/CD / test-python-backend-compliance (push) Successful in 1m38s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped

Phase 1 (LLM Quality):
- Add format=json to all Ollama payloads (obligation_extractor, control_generator, citation_backfill)
- Add Chain-of-Thought analysis steps to Pass 0a/0b system prompts

Phase 2 (Retrieval Quality):
- Hybrid search via Qdrant Query API with RRF fusion + automatic text index (legal_rag.go)
- Fallback to dense-only search if Query API unavailable
- Cross-encoder re-ranking with BGE Reranker v2 (RERANK_ENABLED=false by default)
- CPU-only PyTorch dependency to keep Docker image small

Phase 3 (Data Layer):
- Cross-regulation dedup pass (threshold 0.95) links controls across regulations
- DedupResult.link_type field distinguishes dedup_merge vs cross_regulation
- Chunk size defaults updated 512/50 → 1024/128 for new ingestions only
- Existing collections and controls are NOT affected

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-21 11:49:43 +01:00
parent c3a53fe5d2
commit c52dbdb8f1
24 changed files with 2620 additions and 139 deletions

View File

@@ -32,11 +32,13 @@ func TestSearchCollection_UsesCorrectCollection(t *testing.T) {
// Parse qdrant mock host/port
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: false, // dense-only for this test
httpClient: http.DefaultClient,
}
// Test with explicit collection
@@ -69,11 +71,13 @@ func TestSearchCollection_FallbackDefault(t *testing.T) {
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: false,
httpClient: http.DefaultClient,
}
// Test with empty collection (should fall back to default)
@@ -140,8 +144,9 @@ func TestScrollChunks_ReturnsChunksAndNextOffset(t *testing.T) {
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
textIndexEnsured: make(map[string]bool),
httpClient: http.DefaultClient,
}
chunks, nextOffset, err := client.ScrollChunks(context.Background(), "bp_compliance_ce", "", 100)
@@ -196,8 +201,9 @@ func TestScrollChunks_EmptyCollection_ReturnsEmpty(t *testing.T) {
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
textIndexEnsured: make(map[string]bool),
httpClient: http.DefaultClient,
}
chunks, nextOffset, err := client.ScrollChunks(context.Background(), "bp_compliance_ce", "", 100)
@@ -230,8 +236,9 @@ func TestScrollChunks_WithOffset_SendsOffset(t *testing.T) {
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
textIndexEnsured: make(map[string]bool),
httpClient: http.DefaultClient,
}
_, _, err := client.ScrollChunks(context.Background(), "bp_compliance_ce", "some-offset-id", 50)
@@ -263,9 +270,10 @@ func TestScrollChunks_SendsAPIKey(t *testing.T) {
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
qdrantAPIKey: "test-api-key-123",
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
qdrantAPIKey: "test-api-key-123",
textIndexEnsured: make(map[string]bool),
httpClient: http.DefaultClient,
}
_, _, err := client.ScrollChunks(context.Background(), "bp_compliance_ce", "", 10)
@@ -310,11 +318,13 @@ func TestSearch_StillWorks(t *testing.T) {
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
httpClient: http.DefaultClient,
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: false,
httpClient: http.DefaultClient,
}
results, err := client.Search(context.Background(), "DSGVO Art. 35", nil, 5)
@@ -334,3 +344,257 @@ func TestSearch_StillWorks(t *testing.T) {
t.Errorf("Expected default collection in URL, got: %s", requestedURL)
}
}
// --- Hybrid Search Tests ---
func TestHybridSearch_UsesQueryAPI(t *testing.T) {
var requestedPaths []string
ollamaMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(ollamaEmbeddingResponse{
Embedding: make([]float64, 1024),
})
}))
defer ollamaMock.Close()
qdrantMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestedPaths = append(requestedPaths, r.URL.Path)
if strings.Contains(r.URL.Path, "/index") {
// Text index creation — return OK
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"result":{"operation_id":1,"status":"completed"}}`))
return
}
if strings.Contains(r.URL.Path, "/points/query") {
// Verify the query request body has prefetch + fusion
var reqBody map[string]interface{}
json.NewDecoder(r.Body).Decode(&reqBody)
if _, ok := reqBody["prefetch"]; !ok {
t.Error("Query request missing 'prefetch' field")
}
queryField, ok := reqBody["query"].(map[string]interface{})
if !ok || queryField["fusion"] != "rrf" {
t.Error("Query request missing 'query.fusion=rrf'")
}
json.NewEncoder(w).Encode(qdrantQueryResponse{
Result: []qdrantSearchHit{
{
ID: "1",
Score: 0.88,
Payload: map[string]interface{}{
"chunk_text": "Hybrid result",
"regulation_id": "eu_2016_679",
"regulation_name_de": "DSGVO",
"regulation_short": "DSGVO",
"category": "regulation",
"source": "https://example.com",
},
},
},
})
return
}
// Fallback: should not reach dense search
t.Error("Unexpected dense search call when hybrid succeeded")
json.NewEncoder(w).Encode(qdrantSearchResponse{Result: []qdrantSearchHit{}})
}))
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: true,
httpClient: http.DefaultClient,
}
results, err := client.Search(context.Background(), "DSGVO Art. 35", nil, 5)
if err != nil {
t.Fatalf("Hybrid search failed: %v", err)
}
if len(results) != 1 {
t.Fatalf("Expected 1 result, got %d", len(results))
}
if results[0].Text != "Hybrid result" {
t.Errorf("Expected 'Hybrid result', got '%s'", results[0].Text)
}
// Verify text index was created
hasIndex := false
hasQuery := false
for _, p := range requestedPaths {
if strings.Contains(p, "/index") {
hasIndex = true
}
if strings.Contains(p, "/points/query") {
hasQuery = true
}
}
if !hasIndex {
t.Error("Expected text index creation call")
}
if !hasQuery {
t.Error("Expected Query API call")
}
}
func TestHybridSearch_FallbackToDense(t *testing.T) {
var requestedPaths []string
ollamaMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(ollamaEmbeddingResponse{
Embedding: make([]float64, 1024),
})
}))
defer ollamaMock.Close()
qdrantMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestedPaths = append(requestedPaths, r.URL.Path)
if strings.Contains(r.URL.Path, "/index") {
// Simulate text index failure (old Qdrant version)
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(`{"status":{"error":"not supported"}}`))
return
}
if strings.Contains(r.URL.Path, "/points/search") {
// Dense fallback
json.NewEncoder(w).Encode(qdrantSearchResponse{
Result: []qdrantSearchHit{
{
ID: "2",
Score: 0.90,
Payload: map[string]interface{}{
"chunk_text": "Dense fallback result",
"regulation_id": "eu_2016_679",
"regulation_name_de": "DSGVO",
"regulation_short": "DSGVO",
"category": "regulation",
"source": "https://example.com",
},
},
},
})
return
}
w.WriteHeader(http.StatusInternalServerError)
}))
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: true,
httpClient: http.DefaultClient,
}
results, err := client.Search(context.Background(), "test query", nil, 5)
if err != nil {
t.Fatalf("Fallback search failed: %v", err)
}
if len(results) != 1 {
t.Fatalf("Expected 1 result, got %d", len(results))
}
if results[0].Text != "Dense fallback result" {
t.Errorf("Expected 'Dense fallback result', got '%s'", results[0].Text)
}
// Verify it fell back to dense search
hasDense := false
for _, p := range requestedPaths {
if strings.Contains(p, "/points/search") {
hasDense = true
}
}
if !hasDense {
t.Error("Expected fallback to dense /points/search")
}
}
func TestEnsureTextIndex_OnlyCalledOnce(t *testing.T) {
callCount := 0
qdrantMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.Contains(r.URL.Path, "/index") {
callCount++
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"result":{"operation_id":1,"status":"completed"}}`))
return
}
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"result":[]}`))
}))
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
textIndexEnsured: make(map[string]bool),
httpClient: http.DefaultClient,
}
ctx := context.Background()
_ = client.ensureTextIndex(ctx, "test_collection")
_ = client.ensureTextIndex(ctx, "test_collection")
_ = client.ensureTextIndex(ctx, "test_collection")
if callCount != 1 {
t.Errorf("Expected ensureTextIndex to call Qdrant exactly once, called %d times", callCount)
}
}
func TestHybridDisabled_UsesDenseOnly(t *testing.T) {
var requestedPaths []string
ollamaMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(ollamaEmbeddingResponse{
Embedding: make([]float64, 1024),
})
}))
defer ollamaMock.Close()
qdrantMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestedPaths = append(requestedPaths, r.URL.Path)
json.NewEncoder(w).Encode(qdrantSearchResponse{
Result: []qdrantSearchHit{},
})
}))
defer qdrantMock.Close()
client := &LegalRAGClient{
qdrantURL: qdrantMock.URL,
ollamaURL: ollamaMock.URL,
embeddingModel: "bge-m3",
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: false,
httpClient: http.DefaultClient,
}
_, err := client.Search(context.Background(), "test", nil, 5)
if err != nil {
t.Fatalf("Search failed: %v", err)
}
for _, p := range requestedPaths {
if strings.Contains(p, "/points/query") {
t.Error("Query API should not be called when hybrid is disabled")
}
if strings.Contains(p, "/index") {
t.Error("Text index should not be created when hybrid is disabled")
}
}
}