""" Self-RAG Retrieval — query reformulation, retrieval loop, info. Extracted from self_rag.py for modularity. IMPORTANT: Self-RAG is DISABLED by default for privacy reasons! When enabled, search queries and retrieved documents are sent to OpenAI API for relevance grading and query reformulation. """ import os from typing import List, Dict, Optional import httpx from self_rag_grading import ( SELF_RAG_ENABLED, OPENAI_API_KEY, SELF_RAG_MODEL, RELEVANCE_THRESHOLD, GROUNDING_THRESHOLD, MAX_RETRIEVAL_ATTEMPTS, RetrievalDecision, filter_relevant_documents, decide_retrieval_strategy, ) async def reformulate_query( original_query: str, context: Optional[str] = None, previous_results_summary: Optional[str] = None, ) -> str: """ Reformulate a query to improve retrieval. Uses LLM to generate a better query based on: - Original query - Optional context (subject, niveau, etc.) - Summary of why previous retrieval failed """ if not OPENAI_API_KEY: # Simple reformulation: expand abbreviations, add synonyms reformulated = original_query expansions = { "EA": "erhoehtes Anforderungsniveau", "eA": "erhoehtes Anforderungsniveau", "GA": "grundlegendes Anforderungsniveau", "gA": "grundlegendes Anforderungsniveau", "AFB": "Anforderungsbereich", "Abi": "Abitur", } for abbr, expansion in expansions.items(): if abbr in original_query: reformulated = reformulated.replace(abbr, f"{abbr} ({expansion})") return reformulated prompt = f"""Du bist ein Experte fuer deutsche Bildungsstandards und Pruefungsanforderungen. Die folgende Suchanfrage hat keine guten Ergebnisse geliefert: ORIGINAL: {original_query} {f"KONTEXT: {context}" if context else ""} {f"PROBLEM MIT VORHERIGEN ERGEBNISSEN: {previous_results_summary}" if previous_results_summary else ""} Formuliere die Anfrage so um, dass sie: 1. Formellere/technischere Begriffe verwendet (wie in offiziellen Dokumenten) 2. Relevante Synonyme oder verwandte Begriffe einschliesst 3. Spezifischer auf Erwartungshorizonte/Bewertungskriterien ausgerichtet ist Antworte NUR mit der umformulierten Suchanfrage, ohne Erklaerung.""" try: async with httpx.AsyncClient() as client: response = await client.post( "https://api.openai.com/v1/chat/completions", headers={ "Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json" }, json={ "model": SELF_RAG_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": 100, "temperature": 0.3, }, timeout=30.0 ) if response.status_code != 200: return original_query return response.json()["choices"][0]["message"]["content"].strip() except Exception: return original_query async def self_rag_retrieve( query: str, search_func, subject: Optional[str] = None, niveau: Optional[str] = None, initial_top_k: int = 10, final_top_k: int = 5, **search_kwargs ) -> Dict: """ Perform Self-RAG enhanced retrieval with reflection and correction. This implements a retrieval loop that: 1. Retrieves initial documents 2. Grades them for relevance 3. Decides if more retrieval is needed 4. Reformulates query if necessary 5. Returns filtered, high-quality context Args: query: The search query search_func: Async function to perform the actual search subject: Optional subject context niveau: Optional niveau context initial_top_k: Number of documents for initial retrieval final_top_k: Maximum documents to return **search_kwargs: Additional args for search_func Returns: Dict with results, metadata, and reflection trace """ if not SELF_RAG_ENABLED: # Fall back to simple search results = await search_func(query=query, limit=final_top_k, **search_kwargs) return { "results": results, "self_rag_enabled": False, "query_used": query, } trace = [] current_query = query attempt = 1 while attempt <= MAX_RETRIEVAL_ATTEMPTS: # Step 1: Retrieve documents results = await search_func(query=current_query, limit=initial_top_k, **search_kwargs) trace.append({ "attempt": attempt, "query": current_query, "retrieved_count": len(results) if results else 0, }) if not results: attempt += 1 if attempt <= MAX_RETRIEVAL_ATTEMPTS: current_query = await reformulate_query( query, context=f"Fach: {subject}" if subject else None, previous_results_summary="Keine Dokumente gefunden" ) trace[-1]["action"] = "reformulate" trace[-1]["new_query"] = current_query continue # Step 2: Grade documents for relevance relevant, filtered = await filter_relevant_documents(current_query, results) trace[-1]["relevant_count"] = len(relevant) trace[-1]["filtered_count"] = len(filtered) # Step 3: Decide what to do decision, decision_meta = await decide_retrieval_strategy( current_query, relevant, attempt ) trace[-1]["decision"] = decision.value trace[-1]["decision_meta"] = decision_meta if decision == RetrievalDecision.SUFFICIENT: # We have good context, return it return { "results": relevant[:final_top_k], "self_rag_enabled": True, "query_used": current_query, "original_query": query if current_query != query else None, "attempts": attempt, "decision": decision.value, "trace": trace, "filtered_out_count": len(filtered), } elif decision == RetrievalDecision.REFORMULATE: # Reformulate and try again avg_score = decision_meta.get("avg_relevance", 0) current_query = await reformulate_query( query, context=f"Fach: {subject}" if subject else None, previous_results_summary=f"Durchschnittliche Relevanz: {avg_score:.2f}" ) trace[-1]["action"] = "reformulate" trace[-1]["new_query"] = current_query elif decision == RetrievalDecision.NEEDS_MORE: # Retrieve more with expanded query current_query = f"{current_query} Bewertungskriterien Anforderungen" trace[-1]["action"] = "expand_query" trace[-1]["new_query"] = current_query elif decision == RetrievalDecision.FALLBACK: # Return what we have, even if not ideal return { "results": (relevant or results)[:final_top_k], "self_rag_enabled": True, "query_used": current_query, "original_query": query if current_query != query else None, "attempts": attempt, "decision": decision.value, "warning": "Fallback mode - low relevance context", "trace": trace, } attempt += 1 # Max attempts reached return { "results": results[:final_top_k] if results else [], "self_rag_enabled": True, "query_used": current_query, "original_query": query if current_query != query else None, "attempts": attempt - 1, "decision": "max_attempts", "warning": "Max retrieval attempts reached", "trace": trace, } def get_self_rag_info() -> dict: """Get information about Self-RAG configuration.""" return { "enabled": SELF_RAG_ENABLED, "llm_configured": bool(OPENAI_API_KEY), "model": SELF_RAG_MODEL, "relevance_threshold": RELEVANCE_THRESHOLD, "grounding_threshold": GROUNDING_THRESHOLD, "max_retrieval_attempts": MAX_RETRIEVAL_ATTEMPTS, "features": [ "document_grading", "relevance_filtering", "query_reformulation", "answer_grounding_check", "retrieval_decision", ], "sends_data_externally": True, # ALWAYS true when enabled "privacy_warning": "When enabled, queries and documents are sent to OpenAI API for grading", "default_enabled": False, # Disabled by default for privacy }