Control-Pipeline (Pass 0a/0b, BatchDedup, Generator) als eigenstaendiger Service in Core, damit Compliance-Repo unabhaengig refakturiert werden kann. Schreibt weiterhin ins compliance-Schema der shared PostgreSQL. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
"""
|
|
Cross-Encoder Re-Ranking for RAG Search Results.
|
|
|
|
Uses BGE Reranker v2 (BAAI/bge-reranker-v2-m3, MIT license) to re-rank
|
|
search results from Qdrant for improved retrieval quality.
|
|
|
|
Lazy-loads the model on first use. Disabled by default (RERANK_ENABLED=false).
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
from typing import Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
RERANK_ENABLED = os.getenv("RERANK_ENABLED", "false").lower() == "true"
|
|
RERANK_MODEL = os.getenv("RERANK_MODEL", "BAAI/bge-reranker-v2-m3")
|
|
|
|
|
|
class Reranker:
|
|
"""Cross-encoder reranker using sentence-transformers."""
|
|
|
|
def __init__(self, model_name: str = RERANK_MODEL):
|
|
self._model = None # Lazy init
|
|
self._model_name = model_name
|
|
|
|
def _ensure_model(self) -> None:
|
|
"""Load model on first use."""
|
|
if self._model is not None:
|
|
return
|
|
try:
|
|
from sentence_transformers import CrossEncoder
|
|
|
|
logger.info("Loading reranker model: %s", self._model_name)
|
|
self._model = CrossEncoder(self._model_name)
|
|
logger.info("Reranker model loaded successfully")
|
|
except ImportError:
|
|
logger.error(
|
|
"sentence-transformers not installed. "
|
|
"Install with: pip install sentence-transformers"
|
|
)
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Failed to load reranker model: %s", e)
|
|
raise
|
|
|
|
def rerank(
|
|
self, query: str, texts: list[str], top_k: int = 5
|
|
) -> list[int]:
|
|
"""
|
|
Return indices of top_k texts sorted by relevance (highest first).
|
|
|
|
Args:
|
|
query: The search query.
|
|
texts: List of candidate texts to re-rank.
|
|
top_k: Number of top results to return.
|
|
|
|
Returns:
|
|
List of indices into the original texts list, sorted by relevance.
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
self._ensure_model()
|
|
|
|
pairs = [[query, text] for text in texts]
|
|
scores = self._model.predict(pairs)
|
|
|
|
# Sort by score descending, return indices
|
|
ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
|
|
return ranked[:top_k]
|
|
|
|
|
|
# Module-level singleton
|
|
_reranker: Optional[Reranker] = None
|
|
|
|
|
|
def get_reranker() -> Optional[Reranker]:
|
|
"""Get the shared reranker instance. Returns None if disabled."""
|
|
global _reranker
|
|
if not RERANK_ENABLED:
|
|
return None
|
|
if _reranker is None:
|
|
_reranker = Reranker()
|
|
return _reranker
|