Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 42s
CI/CD / test-python-backend-compliance (push) Successful in 1m38s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped
Phase 1 (LLM Quality): - Add format=json to all Ollama payloads (obligation_extractor, control_generator, citation_backfill) - Add Chain-of-Thought analysis steps to Pass 0a/0b system prompts Phase 2 (Retrieval Quality): - Hybrid search via Qdrant Query API with RRF fusion + automatic text index (legal_rag.go) - Fallback to dense-only search if Query API unavailable - Cross-encoder re-ranking with BGE Reranker v2 (RERANK_ENABLED=false by default) - CPU-only PyTorch dependency to keep Docker image small Phase 3 (Data Layer): - Cross-regulation dedup pass (threshold 0.95) links controls across regulations - DedupResult.link_type field distinguishes dedup_merge vs cross_regulation - Chunk size defaults updated 512/50 → 1024/128 for new ingestions only - Existing collections and controls are NOT affected Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
"""
|
|
Cross-Encoder Re-Ranking for RAG Search Results.
|
|
|
|
Uses BGE Reranker v2 (BAAI/bge-reranker-v2-m3, MIT license) to re-rank
|
|
search results from Qdrant for improved retrieval quality.
|
|
|
|
Lazy-loads the model on first use. Disabled by default (RERANK_ENABLED=false).
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
from typing import Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
RERANK_ENABLED = os.getenv("RERANK_ENABLED", "false").lower() == "true"
|
|
RERANK_MODEL = os.getenv("RERANK_MODEL", "BAAI/bge-reranker-v2-m3")
|
|
|
|
|
|
class Reranker:
|
|
"""Cross-encoder reranker using sentence-transformers."""
|
|
|
|
def __init__(self, model_name: str = RERANK_MODEL):
|
|
self._model = None # Lazy init
|
|
self._model_name = model_name
|
|
|
|
def _ensure_model(self) -> None:
|
|
"""Load model on first use."""
|
|
if self._model is not None:
|
|
return
|
|
try:
|
|
from sentence_transformers import CrossEncoder
|
|
|
|
logger.info("Loading reranker model: %s", self._model_name)
|
|
self._model = CrossEncoder(self._model_name)
|
|
logger.info("Reranker model loaded successfully")
|
|
except ImportError:
|
|
logger.error(
|
|
"sentence-transformers not installed. "
|
|
"Install with: pip install sentence-transformers"
|
|
)
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Failed to load reranker model: %s", e)
|
|
raise
|
|
|
|
def rerank(
|
|
self, query: str, texts: list[str], top_k: int = 5
|
|
) -> list[int]:
|
|
"""
|
|
Return indices of top_k texts sorted by relevance (highest first).
|
|
|
|
Args:
|
|
query: The search query.
|
|
texts: List of candidate texts to re-rank.
|
|
top_k: Number of top results to return.
|
|
|
|
Returns:
|
|
List of indices into the original texts list, sorted by relevance.
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
self._ensure_model()
|
|
|
|
pairs = [[query, text] for text in texts]
|
|
scores = self._model.predict(pairs)
|
|
|
|
# Sort by score descending, return indices
|
|
ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
|
|
return ranked[:top_k]
|
|
|
|
|
|
# Module-level singleton
|
|
_reranker: Optional[Reranker] = None
|
|
|
|
|
|
def get_reranker() -> Optional[Reranker]:
|
|
"""Get the shared reranker instance. Returns None if disabled."""
|
|
global _reranker
|
|
if not RERANK_ENABLED:
|
|
return None
|
|
if _reranker is None:
|
|
_reranker = Reranker()
|
|
return _reranker
|