This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

149 lines
4.8 KiB
Python

"""
Re-Ranking Module for RAG Quality Improvement
NOTE: This module delegates ML-heavy operations to the embedding-service via HTTP.
Implements two-stage retrieval:
1. Initial retrieval with bi-encoder (fast, many results)
2. Re-ranking with cross-encoder (slower, but much more accurate)
This consistently improves RAG accuracy by 20-35% and reduces hallucinations.
Supported re-rankers (configured in embedding-service):
- local: sentence-transformers cross-encoder (default, no API key needed)
- cohere: Cohere Rerank API (requires COHERE_API_KEY)
"""
import os
import logging
from typing import List, Tuple
logger = logging.getLogger(__name__)
# Configuration (for backward compatibility - actual config in embedding-service)
EMBEDDING_SERVICE_URL = os.getenv("EMBEDDING_SERVICE_URL", "http://embedding-service:8087")
EMBEDDING_SERVICE_TIMEOUT = float(os.getenv("EMBEDDING_SERVICE_TIMEOUT", "60.0"))
RERANKER_BACKEND = os.getenv("RERANKER_BACKEND", "local")
COHERE_API_KEY = os.getenv("COHERE_API_KEY", "")
LOCAL_RERANKER_MODEL = os.getenv("LOCAL_RERANKER_MODEL", "BAAI/bge-reranker-v2-m3")
class RerankerError(Exception):
"""Error during re-ranking."""
pass
async def rerank_documents(
query: str,
documents: List[str],
top_k: int = 5
) -> List[Tuple[int, float, str]]:
"""
Re-rank documents using embedding-service.
Args:
query: The search query
documents: List of document texts to re-rank
top_k: Number of top results to return
Returns:
List of (original_index, score, text) tuples, sorted by score descending
"""
if not documents:
return []
import httpx
try:
async with httpx.AsyncClient(timeout=EMBEDDING_SERVICE_TIMEOUT) as client:
response = await client.post(
f"{EMBEDDING_SERVICE_URL}/rerank",
json={
"query": query,
"documents": documents,
"top_k": top_k
}
)
response.raise_for_status()
data = response.json()
return [
(r["index"], r["score"], r["text"])
for r in data["results"]
]
except httpx.TimeoutException:
raise RerankerError("Embedding service timeout during re-ranking")
except httpx.HTTPStatusError as e:
raise RerankerError(f"Re-ranking error: {e.response.status_code} - {e.response.text}")
except Exception as e:
raise RerankerError(f"Failed to re-rank documents: {e}")
async def rerank_search_results(
query: str,
results: List[dict],
text_field: str = "text",
top_k: int = 5
) -> List[dict]:
"""
Re-rank search results (dictionaries with text field).
Convenience function for re-ranking Qdrant search results.
Args:
query: The search query
results: List of search result dicts
text_field: Key in dict containing the text to rank on
top_k: Number of top results to return
Returns:
Re-ranked list of search result dicts with added 'rerank_score' field
"""
if not results:
return []
texts = [r.get(text_field, "") for r in results]
reranked = await rerank_documents(query, texts, top_k)
reranked_results = []
for orig_idx, score, _ in reranked:
result = results[orig_idx].copy()
result["rerank_score"] = score
result["original_rank"] = orig_idx
reranked_results.append(result)
return reranked_results
def get_reranker_info() -> dict:
"""Get information about the current reranker configuration."""
import httpx
try:
with httpx.Client(timeout=5.0) as client:
response = client.get(f"{EMBEDDING_SERVICE_URL}/models")
if response.status_code == 200:
data = response.json()
return {
"backend": data.get("reranker_backend", RERANKER_BACKEND),
"model": data.get("reranker_model", LOCAL_RERANKER_MODEL),
"model_license": "See embedding-service",
"commercial_safe": True,
"cohere_configured": bool(COHERE_API_KEY),
"embedding_service_url": EMBEDDING_SERVICE_URL,
"embedding_service_available": True,
}
except Exception as e:
logger.warning(f"Could not reach embedding-service: {e}")
# Fallback when embedding-service is not available
return {
"backend": RERANKER_BACKEND,
"model": LOCAL_RERANKER_MODEL,
"model_license": "Unknown (embedding-service unavailable)",
"commercial_safe": True,
"cohere_configured": bool(COHERE_API_KEY),
"embedding_service_url": EMBEDDING_SERVICE_URL,
"embedding_service_available": False,
}