[split-required] Split final 43 files (500-668 LOC) to complete refactoring
klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
255
klausur-service/backend/self_rag_retrieval.py
Normal file
255
klausur-service/backend/self_rag_retrieval.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
Self-RAG Retrieval — query reformulation, retrieval loop, info.
|
||||
|
||||
Extracted from self_rag.py for modularity.
|
||||
|
||||
IMPORTANT: Self-RAG is DISABLED by default for privacy reasons!
|
||||
When enabled, search queries and retrieved documents are sent to OpenAI API
|
||||
for relevance grading and query reformulation.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
import httpx
|
||||
|
||||
from self_rag_grading import (
|
||||
SELF_RAG_ENABLED,
|
||||
OPENAI_API_KEY,
|
||||
SELF_RAG_MODEL,
|
||||
RELEVANCE_THRESHOLD,
|
||||
GROUNDING_THRESHOLD,
|
||||
MAX_RETRIEVAL_ATTEMPTS,
|
||||
RetrievalDecision,
|
||||
filter_relevant_documents,
|
||||
decide_retrieval_strategy,
|
||||
)
|
||||
|
||||
|
||||
async def reformulate_query(
|
||||
original_query: str,
|
||||
context: Optional[str] = None,
|
||||
previous_results_summary: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Reformulate a query to improve retrieval.
|
||||
|
||||
Uses LLM to generate a better query based on:
|
||||
- Original query
|
||||
- Optional context (subject, niveau, etc.)
|
||||
- Summary of why previous retrieval failed
|
||||
"""
|
||||
if not OPENAI_API_KEY:
|
||||
# Simple reformulation: expand abbreviations, add synonyms
|
||||
reformulated = original_query
|
||||
expansions = {
|
||||
"EA": "erhoehtes Anforderungsniveau",
|
||||
"eA": "erhoehtes Anforderungsniveau",
|
||||
"GA": "grundlegendes Anforderungsniveau",
|
||||
"gA": "grundlegendes Anforderungsniveau",
|
||||
"AFB": "Anforderungsbereich",
|
||||
"Abi": "Abitur",
|
||||
}
|
||||
for abbr, expansion in expansions.items():
|
||||
if abbr in original_query:
|
||||
reformulated = reformulated.replace(abbr, f"{abbr} ({expansion})")
|
||||
return reformulated
|
||||
|
||||
prompt = f"""Du bist ein Experte fuer deutsche Bildungsstandards und Pruefungsanforderungen.
|
||||
|
||||
Die folgende Suchanfrage hat keine guten Ergebnisse geliefert:
|
||||
ORIGINAL: {original_query}
|
||||
|
||||
{f"KONTEXT: {context}" if context else ""}
|
||||
{f"PROBLEM MIT VORHERIGEN ERGEBNISSEN: {previous_results_summary}" if previous_results_summary else ""}
|
||||
|
||||
Formuliere die Anfrage so um, dass sie:
|
||||
1. Formellere/technischere Begriffe verwendet (wie in offiziellen Dokumenten)
|
||||
2. Relevante Synonyme oder verwandte Begriffe einschliesst
|
||||
3. Spezifischer auf Erwartungshorizonte/Bewertungskriterien ausgerichtet ist
|
||||
|
||||
Antworte NUR mit der umformulierten Suchanfrage, ohne Erklaerung."""
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": SELF_RAG_MODEL,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": 100,
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return original_query
|
||||
|
||||
return response.json()["choices"][0]["message"]["content"].strip()
|
||||
|
||||
except Exception:
|
||||
return original_query
|
||||
|
||||
|
||||
async def self_rag_retrieve(
|
||||
query: str,
|
||||
search_func,
|
||||
subject: Optional[str] = None,
|
||||
niveau: Optional[str] = None,
|
||||
initial_top_k: int = 10,
|
||||
final_top_k: int = 5,
|
||||
**search_kwargs
|
||||
) -> Dict:
|
||||
"""
|
||||
Perform Self-RAG enhanced retrieval with reflection and correction.
|
||||
|
||||
This implements a retrieval loop that:
|
||||
1. Retrieves initial documents
|
||||
2. Grades them for relevance
|
||||
3. Decides if more retrieval is needed
|
||||
4. Reformulates query if necessary
|
||||
5. Returns filtered, high-quality context
|
||||
|
||||
Args:
|
||||
query: The search query
|
||||
search_func: Async function to perform the actual search
|
||||
subject: Optional subject context
|
||||
niveau: Optional niveau context
|
||||
initial_top_k: Number of documents for initial retrieval
|
||||
final_top_k: Maximum documents to return
|
||||
**search_kwargs: Additional args for search_func
|
||||
|
||||
Returns:
|
||||
Dict with results, metadata, and reflection trace
|
||||
"""
|
||||
if not SELF_RAG_ENABLED:
|
||||
# Fall back to simple search
|
||||
results = await search_func(query=query, limit=final_top_k, **search_kwargs)
|
||||
return {
|
||||
"results": results,
|
||||
"self_rag_enabled": False,
|
||||
"query_used": query,
|
||||
}
|
||||
|
||||
trace = []
|
||||
current_query = query
|
||||
attempt = 1
|
||||
|
||||
while attempt <= MAX_RETRIEVAL_ATTEMPTS:
|
||||
# Step 1: Retrieve documents
|
||||
results = await search_func(query=current_query, limit=initial_top_k, **search_kwargs)
|
||||
|
||||
trace.append({
|
||||
"attempt": attempt,
|
||||
"query": current_query,
|
||||
"retrieved_count": len(results) if results else 0,
|
||||
})
|
||||
|
||||
if not results:
|
||||
attempt += 1
|
||||
if attempt <= MAX_RETRIEVAL_ATTEMPTS:
|
||||
current_query = await reformulate_query(
|
||||
query,
|
||||
context=f"Fach: {subject}" if subject else None,
|
||||
previous_results_summary="Keine Dokumente gefunden"
|
||||
)
|
||||
trace[-1]["action"] = "reformulate"
|
||||
trace[-1]["new_query"] = current_query
|
||||
continue
|
||||
|
||||
# Step 2: Grade documents for relevance
|
||||
relevant, filtered = await filter_relevant_documents(current_query, results)
|
||||
|
||||
trace[-1]["relevant_count"] = len(relevant)
|
||||
trace[-1]["filtered_count"] = len(filtered)
|
||||
|
||||
# Step 3: Decide what to do
|
||||
decision, decision_meta = await decide_retrieval_strategy(
|
||||
current_query, relevant, attempt
|
||||
)
|
||||
|
||||
trace[-1]["decision"] = decision.value
|
||||
trace[-1]["decision_meta"] = decision_meta
|
||||
|
||||
if decision == RetrievalDecision.SUFFICIENT:
|
||||
# We have good context, return it
|
||||
return {
|
||||
"results": relevant[:final_top_k],
|
||||
"self_rag_enabled": True,
|
||||
"query_used": current_query,
|
||||
"original_query": query if current_query != query else None,
|
||||
"attempts": attempt,
|
||||
"decision": decision.value,
|
||||
"trace": trace,
|
||||
"filtered_out_count": len(filtered),
|
||||
}
|
||||
|
||||
elif decision == RetrievalDecision.REFORMULATE:
|
||||
# Reformulate and try again
|
||||
avg_score = decision_meta.get("avg_relevance", 0)
|
||||
current_query = await reformulate_query(
|
||||
query,
|
||||
context=f"Fach: {subject}" if subject else None,
|
||||
previous_results_summary=f"Durchschnittliche Relevanz: {avg_score:.2f}"
|
||||
)
|
||||
trace[-1]["action"] = "reformulate"
|
||||
trace[-1]["new_query"] = current_query
|
||||
|
||||
elif decision == RetrievalDecision.NEEDS_MORE:
|
||||
# Retrieve more with expanded query
|
||||
current_query = f"{current_query} Bewertungskriterien Anforderungen"
|
||||
trace[-1]["action"] = "expand_query"
|
||||
trace[-1]["new_query"] = current_query
|
||||
|
||||
elif decision == RetrievalDecision.FALLBACK:
|
||||
# Return what we have, even if not ideal
|
||||
return {
|
||||
"results": (relevant or results)[:final_top_k],
|
||||
"self_rag_enabled": True,
|
||||
"query_used": current_query,
|
||||
"original_query": query if current_query != query else None,
|
||||
"attempts": attempt,
|
||||
"decision": decision.value,
|
||||
"warning": "Fallback mode - low relevance context",
|
||||
"trace": trace,
|
||||
}
|
||||
|
||||
attempt += 1
|
||||
|
||||
# Max attempts reached
|
||||
return {
|
||||
"results": results[:final_top_k] if results else [],
|
||||
"self_rag_enabled": True,
|
||||
"query_used": current_query,
|
||||
"original_query": query if current_query != query else None,
|
||||
"attempts": attempt - 1,
|
||||
"decision": "max_attempts",
|
||||
"warning": "Max retrieval attempts reached",
|
||||
"trace": trace,
|
||||
}
|
||||
|
||||
|
||||
def get_self_rag_info() -> dict:
|
||||
"""Get information about Self-RAG configuration."""
|
||||
return {
|
||||
"enabled": SELF_RAG_ENABLED,
|
||||
"llm_configured": bool(OPENAI_API_KEY),
|
||||
"model": SELF_RAG_MODEL,
|
||||
"relevance_threshold": RELEVANCE_THRESHOLD,
|
||||
"grounding_threshold": GROUNDING_THRESHOLD,
|
||||
"max_retrieval_attempts": MAX_RETRIEVAL_ATTEMPTS,
|
||||
"features": [
|
||||
"document_grading",
|
||||
"relevance_filtering",
|
||||
"query_reformulation",
|
||||
"answer_grounding_check",
|
||||
"retrieval_decision",
|
||||
],
|
||||
"sends_data_externally": True, # ALWAYS true when enabled
|
||||
"privacy_warning": "When enabled, queries and documents are sent to OpenAI API for grading",
|
||||
"default_enabled": False, # Disabled by default for privacy
|
||||
}
|
||||
Reference in New Issue
Block a user