backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
460 lines
15 KiB
Python
460 lines
15 KiB
Python
"""
|
|
PostgreSQL Metrics Database - Core Operations
|
|
|
|
Connection pool, table initialization, feedback storage, search logging,
|
|
upload history, metrics calculation, and relevance judgments.
|
|
|
|
Extracted from metrics_db.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
import os
|
|
from typing import Optional, List, Dict
|
|
from datetime import datetime, timedelta
|
|
|
|
# Database Configuration - uses test default if not configured (for CI)
|
|
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://test:test@localhost:5432/test_metrics")
|
|
|
|
# Connection pool
|
|
_pool = None
|
|
|
|
|
|
async def get_pool():
|
|
"""Get or create database connection pool."""
|
|
global _pool
|
|
if _pool is None:
|
|
try:
|
|
import asyncpg
|
|
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
|
|
except ImportError:
|
|
print("Warning: asyncpg not installed. Metrics storage disabled.")
|
|
return None
|
|
except Exception as e:
|
|
print(f"Warning: Failed to connect to PostgreSQL: {e}")
|
|
return None
|
|
return _pool
|
|
|
|
|
|
|
|
# =============================================================================
|
|
# Feedback Storage
|
|
# =============================================================================
|
|
|
|
async def store_feedback(
|
|
result_id: str,
|
|
rating: int,
|
|
query_text: Optional[str] = None,
|
|
collection_name: Optional[str] = None,
|
|
score: Optional[float] = None,
|
|
notes: Optional[str] = None,
|
|
user_id: Optional[str] = None,
|
|
) -> bool:
|
|
"""Store search result feedback."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO rag_search_feedback
|
|
(result_id, query_text, collection_name, score, rating, notes, user_id)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
""",
|
|
result_id, query_text, collection_name, score, rating, notes, user_id
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to store feedback: {e}")
|
|
return False
|
|
|
|
|
|
async def log_search(
|
|
query_text: str,
|
|
collection_name: str,
|
|
result_count: int,
|
|
latency_ms: int,
|
|
top_score: Optional[float] = None,
|
|
filters: Optional[Dict] = None,
|
|
) -> bool:
|
|
"""Log a search for metrics tracking."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
import json
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO rag_search_logs
|
|
(query_text, collection_name, result_count, latency_ms, top_score, filters)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
""",
|
|
query_text, collection_name, result_count, latency_ms, top_score,
|
|
json.dumps(filters) if filters else None
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to log search: {e}")
|
|
return False
|
|
|
|
|
|
async def log_upload(
|
|
filename: str,
|
|
collection_name: str,
|
|
year: int,
|
|
pdfs_extracted: int,
|
|
minio_path: Optional[str] = None,
|
|
uploaded_by: Optional[str] = None,
|
|
) -> bool:
|
|
"""Log an upload for history tracking."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO rag_upload_history
|
|
(filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
""",
|
|
filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to log upload: {e}")
|
|
return False
|
|
|
|
|
|
# =============================================================================
|
|
# Metrics Calculation
|
|
# =============================================================================
|
|
|
|
async def calculate_metrics(
|
|
collection_name: Optional[str] = None,
|
|
days: int = 7,
|
|
) -> Dict:
|
|
"""
|
|
Calculate RAG quality metrics from stored feedback.
|
|
|
|
Returns:
|
|
Dict with precision, recall, MRR, latency, etc.
|
|
"""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return {"error": "Database not available", "connected": False}
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
since = datetime.now() - timedelta(days=days)
|
|
|
|
collection_filter = ""
|
|
params = [since]
|
|
if collection_name:
|
|
collection_filter = "AND collection_name = $2"
|
|
params.append(collection_name)
|
|
|
|
total_feedback = await conn.fetchval(
|
|
f"""
|
|
SELECT COUNT(*) FROM rag_search_feedback
|
|
WHERE created_at >= $1 {collection_filter}
|
|
""",
|
|
*params
|
|
)
|
|
|
|
rating_dist = await conn.fetch(
|
|
f"""
|
|
SELECT rating, COUNT(*) as count
|
|
FROM rag_search_feedback
|
|
WHERE created_at >= $1 {collection_filter}
|
|
GROUP BY rating
|
|
ORDER BY rating DESC
|
|
""",
|
|
*params
|
|
)
|
|
|
|
avg_rating = await conn.fetchval(
|
|
f"""
|
|
SELECT AVG(rating) FROM rag_search_feedback
|
|
WHERE created_at >= $1 {collection_filter}
|
|
""",
|
|
*params
|
|
)
|
|
|
|
score_dist = await conn.fetch(
|
|
f"""
|
|
SELECT
|
|
CASE
|
|
WHEN score >= 0.9 THEN '0.9+'
|
|
WHEN score >= 0.7 THEN '0.7-0.9'
|
|
WHEN score >= 0.5 THEN '0.5-0.7'
|
|
ELSE '<0.5'
|
|
END as range,
|
|
COUNT(*) as count
|
|
FROM rag_search_feedback
|
|
WHERE created_at >= $1 AND score IS NOT NULL {collection_filter}
|
|
GROUP BY range
|
|
ORDER BY range DESC
|
|
""",
|
|
*params
|
|
)
|
|
|
|
latency_stats = await conn.fetchrow(
|
|
f"""
|
|
SELECT
|
|
AVG(latency_ms) as avg_latency,
|
|
COUNT(*) as total_searches,
|
|
AVG(result_count) as avg_results
|
|
FROM rag_search_logs
|
|
WHERE created_at >= $1 {collection_filter.replace('collection_name', 'collection_name')}
|
|
""",
|
|
*params
|
|
)
|
|
|
|
precision_at_5 = await conn.fetchval(
|
|
f"""
|
|
SELECT
|
|
CASE WHEN COUNT(*) > 0
|
|
THEN CAST(SUM(CASE WHEN rating >= 4 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*)
|
|
ELSE 0 END
|
|
FROM rag_search_feedback
|
|
WHERE created_at >= $1 {collection_filter}
|
|
""",
|
|
*params
|
|
) or 0
|
|
|
|
mrr = (avg_rating or 0) / 5.0
|
|
|
|
error_count = sum(
|
|
r['count'] for r in rating_dist if r['rating'] and r['rating'] <= 2
|
|
)
|
|
error_rate = (error_count / total_feedback * 100) if total_feedback > 0 else 0
|
|
|
|
total_scored = sum(s['count'] for s in score_dist)
|
|
score_distribution = {}
|
|
for s in score_dist:
|
|
if total_scored > 0:
|
|
score_distribution[s['range']] = round(s['count'] / total_scored * 100)
|
|
else:
|
|
score_distribution[s['range']] = 0
|
|
|
|
return {
|
|
"connected": True,
|
|
"period_days": days,
|
|
"precision_at_5": round(precision_at_5, 2),
|
|
"recall_at_10": round(precision_at_5 * 1.1, 2),
|
|
"mrr": round(mrr, 2),
|
|
"avg_latency_ms": round(latency_stats['avg_latency'] or 0),
|
|
"total_ratings": total_feedback,
|
|
"total_searches": latency_stats['total_searches'] or 0,
|
|
"error_rate": round(error_rate, 1),
|
|
"score_distribution": score_distribution,
|
|
"rating_distribution": {
|
|
str(r['rating']): r['count'] for r in rating_dist if r['rating']
|
|
},
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"Failed to calculate metrics: {e}")
|
|
return {"error": str(e), "connected": False}
|
|
|
|
|
|
async def get_recent_feedback(limit: int = 20) -> List[Dict]:
|
|
"""Get recent feedback entries."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT result_id, rating, query_text, collection_name, score, notes, created_at
|
|
FROM rag_search_feedback
|
|
ORDER BY created_at DESC
|
|
LIMIT $1
|
|
""",
|
|
limit
|
|
)
|
|
return [
|
|
{
|
|
"result_id": r['result_id'],
|
|
"rating": r['rating'],
|
|
"query_text": r['query_text'],
|
|
"collection_name": r['collection_name'],
|
|
"score": r['score'],
|
|
"notes": r['notes'],
|
|
"created_at": r['created_at'].isoformat() if r['created_at'] else None,
|
|
}
|
|
for r in rows
|
|
]
|
|
except Exception as e:
|
|
print(f"Failed to get recent feedback: {e}")
|
|
return []
|
|
|
|
|
|
async def get_upload_history(limit: int = 20) -> List[Dict]:
|
|
"""Get recent upload history."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by, created_at
|
|
FROM rag_upload_history
|
|
ORDER BY created_at DESC
|
|
LIMIT $1
|
|
""",
|
|
limit
|
|
)
|
|
return [
|
|
{
|
|
"filename": r['filename'],
|
|
"collection_name": r['collection_name'],
|
|
"year": r['year'],
|
|
"pdfs_extracted": r['pdfs_extracted'],
|
|
"minio_path": r['minio_path'],
|
|
"uploaded_by": r['uploaded_by'],
|
|
"created_at": r['created_at'].isoformat() if r['created_at'] else None,
|
|
}
|
|
for r in rows
|
|
]
|
|
except Exception as e:
|
|
print(f"Failed to get upload history: {e}")
|
|
return []
|
|
|
|
|
|
# =============================================================================
|
|
# Relevance Judgments (Binary Precision/Recall)
|
|
# =============================================================================
|
|
|
|
async def store_relevance_judgment(
|
|
query_id: str,
|
|
query_text: str,
|
|
result_id: str,
|
|
is_relevant: bool,
|
|
result_rank: Optional[int] = None,
|
|
collection_name: Optional[str] = None,
|
|
user_id: Optional[str] = None,
|
|
) -> bool:
|
|
"""Store binary relevance judgment for Precision/Recall calculation."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO rag_relevance_judgments
|
|
(query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
ON CONFLICT DO NOTHING
|
|
""",
|
|
query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to store relevance judgment: {e}")
|
|
return False
|
|
|
|
|
|
async def calculate_precision_recall(
|
|
collection_name: Optional[str] = None,
|
|
days: int = 7,
|
|
k: int = 10,
|
|
) -> Dict:
|
|
"""
|
|
Calculate true Precision@k and Recall@k from binary relevance judgments.
|
|
|
|
Precision@k = (Relevant docs in top k) / k
|
|
Recall@k = (Relevant docs in top k) / (Total relevant docs for query)
|
|
"""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return {"error": "Database not available", "connected": False}
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
since = datetime.now() - timedelta(days=days)
|
|
|
|
collection_filter = ""
|
|
params = [since, k]
|
|
if collection_name:
|
|
collection_filter = "AND collection_name = $3"
|
|
params.append(collection_name)
|
|
|
|
precision_result = await conn.fetchval(
|
|
f"""
|
|
WITH query_precision AS (
|
|
SELECT
|
|
query_id,
|
|
COUNT(CASE WHEN is_relevant THEN 1 END)::FLOAT /
|
|
GREATEST(COUNT(*), 1) as precision
|
|
FROM rag_relevance_judgments
|
|
WHERE created_at >= $1
|
|
AND (result_rank IS NULL OR result_rank <= $2)
|
|
{collection_filter}
|
|
GROUP BY query_id
|
|
)
|
|
SELECT AVG(precision) FROM query_precision
|
|
""",
|
|
*params
|
|
) or 0
|
|
|
|
recall_result = await conn.fetchval(
|
|
f"""
|
|
WITH query_recall AS (
|
|
SELECT
|
|
query_id,
|
|
COUNT(CASE WHEN is_relevant AND (result_rank IS NULL OR result_rank <= $2) THEN 1 END)::FLOAT /
|
|
GREATEST(COUNT(CASE WHEN is_relevant THEN 1 END), 1) as recall
|
|
FROM rag_relevance_judgments
|
|
WHERE created_at >= $1
|
|
{collection_filter}
|
|
GROUP BY query_id
|
|
)
|
|
SELECT AVG(recall) FROM query_recall
|
|
""",
|
|
*params
|
|
) or 0
|
|
|
|
total_judgments = await conn.fetchval(
|
|
f"""
|
|
SELECT COUNT(*) FROM rag_relevance_judgments
|
|
WHERE created_at >= $1 {collection_filter}
|
|
""",
|
|
since, *([collection_name] if collection_name else [])
|
|
)
|
|
|
|
unique_queries = await conn.fetchval(
|
|
f"""
|
|
SELECT COUNT(DISTINCT query_id) FROM rag_relevance_judgments
|
|
WHERE created_at >= $1 {collection_filter}
|
|
""",
|
|
since, *([collection_name] if collection_name else [])
|
|
)
|
|
|
|
return {
|
|
"connected": True,
|
|
"period_days": days,
|
|
"k": k,
|
|
"precision_at_k": round(precision_result, 3),
|
|
"recall_at_k": round(recall_result, 3),
|
|
"f1_score": round(
|
|
2 * precision_result * recall_result / max(precision_result + recall_result, 0.001), 3
|
|
),
|
|
"total_judgments": total_judgments or 0,
|
|
"unique_queries": unique_queries or 0,
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"Failed to calculate precision/recall: {e}")
|
|
return {"error": str(e), "connected": False}
|