breakpilot-lehrer/klausur-service/backend/self_rag_retrieval.py

"""
Self-RAG Retrieval — query reformulation, retrieval loop, info.

Extracted from self_rag.py for modularity.

IMPORTANT: Self-RAG is DISABLED by default for privacy reasons!
When enabled, search queries and retrieved documents are sent to OpenAI API
for relevance grading and query reformulation.
"""

import os
from typing import List, Dict, Optional
import httpx

from self_rag_grading import (
    SELF_RAG_ENABLED,
    OPENAI_API_KEY,
    SELF_RAG_MODEL,
    RELEVANCE_THRESHOLD,
    GROUNDING_THRESHOLD,
    MAX_RETRIEVAL_ATTEMPTS,
    RetrievalDecision,
    filter_relevant_documents,
    decide_retrieval_strategy,
)


async def reformulate_query(
    original_query: str,
    context: Optional[str] = None,
    previous_results_summary: Optional[str] = None,
) -> str:
    """
    Reformulate a query to improve retrieval.

    Uses LLM to generate a better query based on:
    - Original query
    - Optional context (subject, niveau, etc.)
    - Summary of why previous retrieval failed
    """
    if not OPENAI_API_KEY:
        # Simple reformulation: expand abbreviations, add synonyms
        reformulated = original_query
        expansions = {
            "EA": "erhoehtes Anforderungsniveau",
            "eA": "erhoehtes Anforderungsniveau",
            "GA": "grundlegendes Anforderungsniveau",
            "gA": "grundlegendes Anforderungsniveau",
            "AFB": "Anforderungsbereich",
            "Abi": "Abitur",
        }
        for abbr, expansion in expansions.items():
            if abbr in original_query:
                reformulated = reformulated.replace(abbr, f"{abbr} ({expansion})")
        return reformulated

    prompt = f"""Du bist ein Experte fuer deutsche Bildungsstandards und Pruefungsanforderungen.

Die folgende Suchanfrage hat keine guten Ergebnisse geliefert:
ORIGINAL: {original_query}

{f"KONTEXT: {context}" if context else ""}
{f"PROBLEM MIT VORHERIGEN ERGEBNISSEN: {previous_results_summary}" if previous_results_summary else ""}

Formuliere die Anfrage so um, dass sie:
1. Formellere/technischere Begriffe verwendet (wie in offiziellen Dokumenten)
2. Relevante Synonyme oder verwandte Begriffe einschliesst
3. Spezifischer auf Erwartungshorizonte/Bewertungskriterien ausgerichtet ist

Antworte NUR mit der umformulierten Suchanfrage, ohne Erklaerung."""

    try:
        async with httpx.AsyncClient() as client:
            response = await client.post(
                "https://api.openai.com/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {OPENAI_API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": SELF_RAG_MODEL,
                    "messages": [{"role": "user", "content": prompt}],
                    "max_tokens": 100,
                    "temperature": 0.3,
                },
                timeout=30.0
            )

            if response.status_code != 200:
                return original_query

            return response.json()["choices"][0]["message"]["content"].strip()

    except Exception:
        return original_query


async def self_rag_retrieve(
    query: str,
    search_func,
    subject: Optional[str] = None,
    niveau: Optional[str] = None,
    initial_top_k: int = 10,
    final_top_k: int = 5,
    **search_kwargs
) -> Dict:
    """
    Perform Self-RAG enhanced retrieval with reflection and correction.

    This implements a retrieval loop that:
    1. Retrieves initial documents
    2. Grades them for relevance
    3. Decides if more retrieval is needed
    4. Reformulates query if necessary
    5. Returns filtered, high-quality context

    Args:
        query: The search query
        search_func: Async function to perform the actual search
        subject: Optional subject context
        niveau: Optional niveau context
        initial_top_k: Number of documents for initial retrieval
        final_top_k: Maximum documents to return
        **search_kwargs: Additional args for search_func

    Returns:
        Dict with results, metadata, and reflection trace
    """
    if not SELF_RAG_ENABLED:
        # Fall back to simple search
        results = await search_func(query=query, limit=final_top_k, **search_kwargs)
        return {
            "results": results,
            "self_rag_enabled": False,
            "query_used": query,
        }

    trace = []
    current_query = query
    attempt = 1

    while attempt <= MAX_RETRIEVAL_ATTEMPTS:
        # Step 1: Retrieve documents
        results = await search_func(query=current_query, limit=initial_top_k, **search_kwargs)

        trace.append({
            "attempt": attempt,
            "query": current_query,
            "retrieved_count": len(results) if results else 0,
        })

        if not results:
            attempt += 1
            if attempt <= MAX_RETRIEVAL_ATTEMPTS:
                current_query = await reformulate_query(
                    query,
                    context=f"Fach: {subject}" if subject else None,
                    previous_results_summary="Keine Dokumente gefunden"
                )
                trace[-1]["action"] = "reformulate"
                trace[-1]["new_query"] = current_query
            continue

        # Step 2: Grade documents for relevance
        relevant, filtered = await filter_relevant_documents(current_query, results)

        trace[-1]["relevant_count"] = len(relevant)
        trace[-1]["filtered_count"] = len(filtered)

        # Step 3: Decide what to do
        decision, decision_meta = await decide_retrieval_strategy(
            current_query, relevant, attempt
        )

        trace[-1]["decision"] = decision.value
        trace[-1]["decision_meta"] = decision_meta

        if decision == RetrievalDecision.SUFFICIENT:
            # We have good context, return it
            return {
                "results": relevant[:final_top_k],
                "self_rag_enabled": True,
                "query_used": current_query,
                "original_query": query if current_query != query else None,
                "attempts": attempt,
                "decision": decision.value,
                "trace": trace,
                "filtered_out_count": len(filtered),
            }

        elif decision == RetrievalDecision.REFORMULATE:
            # Reformulate and try again
            avg_score = decision_meta.get("avg_relevance", 0)
            current_query = await reformulate_query(
                query,
                context=f"Fach: {subject}" if subject else None,
                previous_results_summary=f"Durchschnittliche Relevanz: {avg_score:.2f}"
            )
            trace[-1]["action"] = "reformulate"
            trace[-1]["new_query"] = current_query

        elif decision == RetrievalDecision.NEEDS_MORE:
            # Retrieve more with expanded query
            current_query = f"{current_query} Bewertungskriterien Anforderungen"
            trace[-1]["action"] = "expand_query"
            trace[-1]["new_query"] = current_query

        elif decision == RetrievalDecision.FALLBACK:
            # Return what we have, even if not ideal
            return {
                "results": (relevant or results)[:final_top_k],
                "self_rag_enabled": True,
                "query_used": current_query,
                "original_query": query if current_query != query else None,
                "attempts": attempt,
                "decision": decision.value,
                "warning": "Fallback mode - low relevance context",
                "trace": trace,
            }

        attempt += 1

    # Max attempts reached
    return {
        "results": results[:final_top_k] if results else [],
        "self_rag_enabled": True,
        "query_used": current_query,
        "original_query": query if current_query != query else None,
        "attempts": attempt - 1,
        "decision": "max_attempts",
        "warning": "Max retrieval attempts reached",
        "trace": trace,
    }


def get_self_rag_info() -> dict:
    """Get information about Self-RAG configuration."""
    return {
        "enabled": SELF_RAG_ENABLED,
        "llm_configured": bool(OPENAI_API_KEY),
        "model": SELF_RAG_MODEL,
        "relevance_threshold": RELEVANCE_THRESHOLD,
        "grounding_threshold": GROUNDING_THRESHOLD,
        "max_retrieval_attempts": MAX_RETRIEVAL_ATTEMPTS,
        "features": [
            "document_grading",
            "relevance_filtering",
            "query_reformulation",
            "answer_grounding_check",
            "retrieval_decision",
        ],
        "sends_data_externally": True,  # ALWAYS true when enabled
        "privacy_warning": "When enabled, queries and documents are sent to OpenAI API for grading",
        "default_enabled": False,  # Disabled by default for privacy
    }