klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
256 lines
8.7 KiB
Python
256 lines
8.7 KiB
Python
"""
|
|
Self-RAG Retrieval — query reformulation, retrieval loop, info.
|
|
|
|
Extracted from self_rag.py for modularity.
|
|
|
|
IMPORTANT: Self-RAG is DISABLED by default for privacy reasons!
|
|
When enabled, search queries and retrieved documents are sent to OpenAI API
|
|
for relevance grading and query reformulation.
|
|
"""
|
|
|
|
import os
|
|
from typing import List, Dict, Optional
|
|
import httpx
|
|
|
|
from self_rag_grading import (
|
|
SELF_RAG_ENABLED,
|
|
OPENAI_API_KEY,
|
|
SELF_RAG_MODEL,
|
|
RELEVANCE_THRESHOLD,
|
|
GROUNDING_THRESHOLD,
|
|
MAX_RETRIEVAL_ATTEMPTS,
|
|
RetrievalDecision,
|
|
filter_relevant_documents,
|
|
decide_retrieval_strategy,
|
|
)
|
|
|
|
|
|
async def reformulate_query(
|
|
original_query: str,
|
|
context: Optional[str] = None,
|
|
previous_results_summary: Optional[str] = None,
|
|
) -> str:
|
|
"""
|
|
Reformulate a query to improve retrieval.
|
|
|
|
Uses LLM to generate a better query based on:
|
|
- Original query
|
|
- Optional context (subject, niveau, etc.)
|
|
- Summary of why previous retrieval failed
|
|
"""
|
|
if not OPENAI_API_KEY:
|
|
# Simple reformulation: expand abbreviations, add synonyms
|
|
reformulated = original_query
|
|
expansions = {
|
|
"EA": "erhoehtes Anforderungsniveau",
|
|
"eA": "erhoehtes Anforderungsniveau",
|
|
"GA": "grundlegendes Anforderungsniveau",
|
|
"gA": "grundlegendes Anforderungsniveau",
|
|
"AFB": "Anforderungsbereich",
|
|
"Abi": "Abitur",
|
|
}
|
|
for abbr, expansion in expansions.items():
|
|
if abbr in original_query:
|
|
reformulated = reformulated.replace(abbr, f"{abbr} ({expansion})")
|
|
return reformulated
|
|
|
|
prompt = f"""Du bist ein Experte fuer deutsche Bildungsstandards und Pruefungsanforderungen.
|
|
|
|
Die folgende Suchanfrage hat keine guten Ergebnisse geliefert:
|
|
ORIGINAL: {original_query}
|
|
|
|
{f"KONTEXT: {context}" if context else ""}
|
|
{f"PROBLEM MIT VORHERIGEN ERGEBNISSEN: {previous_results_summary}" if previous_results_summary else ""}
|
|
|
|
Formuliere die Anfrage so um, dass sie:
|
|
1. Formellere/technischere Begriffe verwendet (wie in offiziellen Dokumenten)
|
|
2. Relevante Synonyme oder verwandte Begriffe einschliesst
|
|
3. Spezifischer auf Erwartungshorizonte/Bewertungskriterien ausgerichtet ist
|
|
|
|
Antworte NUR mit der umformulierten Suchanfrage, ohne Erklaerung."""
|
|
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"model": SELF_RAG_MODEL,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
"max_tokens": 100,
|
|
"temperature": 0.3,
|
|
},
|
|
timeout=30.0
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
return original_query
|
|
|
|
return response.json()["choices"][0]["message"]["content"].strip()
|
|
|
|
except Exception:
|
|
return original_query
|
|
|
|
|
|
async def self_rag_retrieve(
|
|
query: str,
|
|
search_func,
|
|
subject: Optional[str] = None,
|
|
niveau: Optional[str] = None,
|
|
initial_top_k: int = 10,
|
|
final_top_k: int = 5,
|
|
**search_kwargs
|
|
) -> Dict:
|
|
"""
|
|
Perform Self-RAG enhanced retrieval with reflection and correction.
|
|
|
|
This implements a retrieval loop that:
|
|
1. Retrieves initial documents
|
|
2. Grades them for relevance
|
|
3. Decides if more retrieval is needed
|
|
4. Reformulates query if necessary
|
|
5. Returns filtered, high-quality context
|
|
|
|
Args:
|
|
query: The search query
|
|
search_func: Async function to perform the actual search
|
|
subject: Optional subject context
|
|
niveau: Optional niveau context
|
|
initial_top_k: Number of documents for initial retrieval
|
|
final_top_k: Maximum documents to return
|
|
**search_kwargs: Additional args for search_func
|
|
|
|
Returns:
|
|
Dict with results, metadata, and reflection trace
|
|
"""
|
|
if not SELF_RAG_ENABLED:
|
|
# Fall back to simple search
|
|
results = await search_func(query=query, limit=final_top_k, **search_kwargs)
|
|
return {
|
|
"results": results,
|
|
"self_rag_enabled": False,
|
|
"query_used": query,
|
|
}
|
|
|
|
trace = []
|
|
current_query = query
|
|
attempt = 1
|
|
|
|
while attempt <= MAX_RETRIEVAL_ATTEMPTS:
|
|
# Step 1: Retrieve documents
|
|
results = await search_func(query=current_query, limit=initial_top_k, **search_kwargs)
|
|
|
|
trace.append({
|
|
"attempt": attempt,
|
|
"query": current_query,
|
|
"retrieved_count": len(results) if results else 0,
|
|
})
|
|
|
|
if not results:
|
|
attempt += 1
|
|
if attempt <= MAX_RETRIEVAL_ATTEMPTS:
|
|
current_query = await reformulate_query(
|
|
query,
|
|
context=f"Fach: {subject}" if subject else None,
|
|
previous_results_summary="Keine Dokumente gefunden"
|
|
)
|
|
trace[-1]["action"] = "reformulate"
|
|
trace[-1]["new_query"] = current_query
|
|
continue
|
|
|
|
# Step 2: Grade documents for relevance
|
|
relevant, filtered = await filter_relevant_documents(current_query, results)
|
|
|
|
trace[-1]["relevant_count"] = len(relevant)
|
|
trace[-1]["filtered_count"] = len(filtered)
|
|
|
|
# Step 3: Decide what to do
|
|
decision, decision_meta = await decide_retrieval_strategy(
|
|
current_query, relevant, attempt
|
|
)
|
|
|
|
trace[-1]["decision"] = decision.value
|
|
trace[-1]["decision_meta"] = decision_meta
|
|
|
|
if decision == RetrievalDecision.SUFFICIENT:
|
|
# We have good context, return it
|
|
return {
|
|
"results": relevant[:final_top_k],
|
|
"self_rag_enabled": True,
|
|
"query_used": current_query,
|
|
"original_query": query if current_query != query else None,
|
|
"attempts": attempt,
|
|
"decision": decision.value,
|
|
"trace": trace,
|
|
"filtered_out_count": len(filtered),
|
|
}
|
|
|
|
elif decision == RetrievalDecision.REFORMULATE:
|
|
# Reformulate and try again
|
|
avg_score = decision_meta.get("avg_relevance", 0)
|
|
current_query = await reformulate_query(
|
|
query,
|
|
context=f"Fach: {subject}" if subject else None,
|
|
previous_results_summary=f"Durchschnittliche Relevanz: {avg_score:.2f}"
|
|
)
|
|
trace[-1]["action"] = "reformulate"
|
|
trace[-1]["new_query"] = current_query
|
|
|
|
elif decision == RetrievalDecision.NEEDS_MORE:
|
|
# Retrieve more with expanded query
|
|
current_query = f"{current_query} Bewertungskriterien Anforderungen"
|
|
trace[-1]["action"] = "expand_query"
|
|
trace[-1]["new_query"] = current_query
|
|
|
|
elif decision == RetrievalDecision.FALLBACK:
|
|
# Return what we have, even if not ideal
|
|
return {
|
|
"results": (relevant or results)[:final_top_k],
|
|
"self_rag_enabled": True,
|
|
"query_used": current_query,
|
|
"original_query": query if current_query != query else None,
|
|
"attempts": attempt,
|
|
"decision": decision.value,
|
|
"warning": "Fallback mode - low relevance context",
|
|
"trace": trace,
|
|
}
|
|
|
|
attempt += 1
|
|
|
|
# Max attempts reached
|
|
return {
|
|
"results": results[:final_top_k] if results else [],
|
|
"self_rag_enabled": True,
|
|
"query_used": current_query,
|
|
"original_query": query if current_query != query else None,
|
|
"attempts": attempt - 1,
|
|
"decision": "max_attempts",
|
|
"warning": "Max retrieval attempts reached",
|
|
"trace": trace,
|
|
}
|
|
|
|
|
|
def get_self_rag_info() -> dict:
|
|
"""Get information about Self-RAG configuration."""
|
|
return {
|
|
"enabled": SELF_RAG_ENABLED,
|
|
"llm_configured": bool(OPENAI_API_KEY),
|
|
"model": SELF_RAG_MODEL,
|
|
"relevance_threshold": RELEVANCE_THRESHOLD,
|
|
"grounding_threshold": GROUNDING_THRESHOLD,
|
|
"max_retrieval_attempts": MAX_RETRIEVAL_ATTEMPTS,
|
|
"features": [
|
|
"document_grading",
|
|
"relevance_filtering",
|
|
"query_reformulation",
|
|
"answer_grounding_check",
|
|
"retrieval_decision",
|
|
],
|
|
"sends_data_externally": True, # ALWAYS true when enabled
|
|
"privacy_warning": "When enabled, queries and documents are sent to OpenAI API for grading",
|
|
"default_enabled": False, # Disabled by default for privacy
|
|
}
|