""" PostgreSQL Metrics Database - Core Operations Connection pool, table initialization, feedback storage, search logging, upload history, metrics calculation, and relevance judgments. Extracted from metrics_db.py to keep files under 500 LOC. """ import os from typing import Optional, List, Dict from datetime import datetime, timedelta # Database Configuration - uses test default if not configured (for CI) DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://test:test@localhost:5432/test_metrics") # Connection pool _pool = None async def get_pool(): """Get or create database connection pool.""" global _pool if _pool is None: try: import asyncpg _pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10) except ImportError: print("Warning: asyncpg not installed. Metrics storage disabled.") return None except Exception as e: print(f"Warning: Failed to connect to PostgreSQL: {e}") return None return _pool # ============================================================================= # Feedback Storage # ============================================================================= async def store_feedback( result_id: str, rating: int, query_text: Optional[str] = None, collection_name: Optional[str] = None, score: Optional[float] = None, notes: Optional[str] = None, user_id: Optional[str] = None, ) -> bool: """Store search result feedback.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_search_feedback (result_id, query_text, collection_name, score, rating, notes, user_id) VALUES ($1, $2, $3, $4, $5, $6, $7) """, result_id, query_text, collection_name, score, rating, notes, user_id ) return True except Exception as e: print(f"Failed to store feedback: {e}") return False async def log_search( query_text: str, collection_name: str, result_count: int, latency_ms: int, top_score: Optional[float] = None, filters: Optional[Dict] = None, ) -> bool: """Log a search for metrics tracking.""" pool = await get_pool() if pool is None: return False try: import json async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_search_logs (query_text, collection_name, result_count, latency_ms, top_score, filters) VALUES ($1, $2, $3, $4, $5, $6) """, query_text, collection_name, result_count, latency_ms, top_score, json.dumps(filters) if filters else None ) return True except Exception as e: print(f"Failed to log search: {e}") return False async def log_upload( filename: str, collection_name: str, year: int, pdfs_extracted: int, minio_path: Optional[str] = None, uploaded_by: Optional[str] = None, ) -> bool: """Log an upload for history tracking.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_upload_history (filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by) VALUES ($1, $2, $3, $4, $5, $6) """, filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by ) return True except Exception as e: print(f"Failed to log upload: {e}") return False # ============================================================================= # Metrics Calculation # ============================================================================= async def calculate_metrics( collection_name: Optional[str] = None, days: int = 7, ) -> Dict: """ Calculate RAG quality metrics from stored feedback. Returns: Dict with precision, recall, MRR, latency, etc. """ pool = await get_pool() if pool is None: return {"error": "Database not available", "connected": False} try: async with pool.acquire() as conn: since = datetime.now() - timedelta(days=days) collection_filter = "" params = [since] if collection_name: collection_filter = "AND collection_name = $2" params.append(collection_name) total_feedback = await conn.fetchval( f""" SELECT COUNT(*) FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} """, *params ) rating_dist = await conn.fetch( f""" SELECT rating, COUNT(*) as count FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} GROUP BY rating ORDER BY rating DESC """, *params ) avg_rating = await conn.fetchval( f""" SELECT AVG(rating) FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} """, *params ) score_dist = await conn.fetch( f""" SELECT CASE WHEN score >= 0.9 THEN '0.9+' WHEN score >= 0.7 THEN '0.7-0.9' WHEN score >= 0.5 THEN '0.5-0.7' ELSE '<0.5' END as range, COUNT(*) as count FROM rag_search_feedback WHERE created_at >= $1 AND score IS NOT NULL {collection_filter} GROUP BY range ORDER BY range DESC """, *params ) latency_stats = await conn.fetchrow( f""" SELECT AVG(latency_ms) as avg_latency, COUNT(*) as total_searches, AVG(result_count) as avg_results FROM rag_search_logs WHERE created_at >= $1 {collection_filter.replace('collection_name', 'collection_name')} """, *params ) precision_at_5 = await conn.fetchval( f""" SELECT CASE WHEN COUNT(*) > 0 THEN CAST(SUM(CASE WHEN rating >= 4 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*) ELSE 0 END FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} """, *params ) or 0 mrr = (avg_rating or 0) / 5.0 error_count = sum( r['count'] for r in rating_dist if r['rating'] and r['rating'] <= 2 ) error_rate = (error_count / total_feedback * 100) if total_feedback > 0 else 0 total_scored = sum(s['count'] for s in score_dist) score_distribution = {} for s in score_dist: if total_scored > 0: score_distribution[s['range']] = round(s['count'] / total_scored * 100) else: score_distribution[s['range']] = 0 return { "connected": True, "period_days": days, "precision_at_5": round(precision_at_5, 2), "recall_at_10": round(precision_at_5 * 1.1, 2), "mrr": round(mrr, 2), "avg_latency_ms": round(latency_stats['avg_latency'] or 0), "total_ratings": total_feedback, "total_searches": latency_stats['total_searches'] or 0, "error_rate": round(error_rate, 1), "score_distribution": score_distribution, "rating_distribution": { str(r['rating']): r['count'] for r in rating_dist if r['rating'] }, } except Exception as e: print(f"Failed to calculate metrics: {e}") return {"error": str(e), "connected": False} async def get_recent_feedback(limit: int = 20) -> List[Dict]: """Get recent feedback entries.""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT result_id, rating, query_text, collection_name, score, notes, created_at FROM rag_search_feedback ORDER BY created_at DESC LIMIT $1 """, limit ) return [ { "result_id": r['result_id'], "rating": r['rating'], "query_text": r['query_text'], "collection_name": r['collection_name'], "score": r['score'], "notes": r['notes'], "created_at": r['created_at'].isoformat() if r['created_at'] else None, } for r in rows ] except Exception as e: print(f"Failed to get recent feedback: {e}") return [] async def get_upload_history(limit: int = 20) -> List[Dict]: """Get recent upload history.""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by, created_at FROM rag_upload_history ORDER BY created_at DESC LIMIT $1 """, limit ) return [ { "filename": r['filename'], "collection_name": r['collection_name'], "year": r['year'], "pdfs_extracted": r['pdfs_extracted'], "minio_path": r['minio_path'], "uploaded_by": r['uploaded_by'], "created_at": r['created_at'].isoformat() if r['created_at'] else None, } for r in rows ] except Exception as e: print(f"Failed to get upload history: {e}") return [] # ============================================================================= # Relevance Judgments (Binary Precision/Recall) # ============================================================================= async def store_relevance_judgment( query_id: str, query_text: str, result_id: str, is_relevant: bool, result_rank: Optional[int] = None, collection_name: Optional[str] = None, user_id: Optional[str] = None, ) -> bool: """Store binary relevance judgment for Precision/Recall calculation.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_relevance_judgments (query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT DO NOTHING """, query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id ) return True except Exception as e: print(f"Failed to store relevance judgment: {e}") return False async def calculate_precision_recall( collection_name: Optional[str] = None, days: int = 7, k: int = 10, ) -> Dict: """ Calculate true Precision@k and Recall@k from binary relevance judgments. Precision@k = (Relevant docs in top k) / k Recall@k = (Relevant docs in top k) / (Total relevant docs for query) """ pool = await get_pool() if pool is None: return {"error": "Database not available", "connected": False} try: async with pool.acquire() as conn: since = datetime.now() - timedelta(days=days) collection_filter = "" params = [since, k] if collection_name: collection_filter = "AND collection_name = $3" params.append(collection_name) precision_result = await conn.fetchval( f""" WITH query_precision AS ( SELECT query_id, COUNT(CASE WHEN is_relevant THEN 1 END)::FLOAT / GREATEST(COUNT(*), 1) as precision FROM rag_relevance_judgments WHERE created_at >= $1 AND (result_rank IS NULL OR result_rank <= $2) {collection_filter} GROUP BY query_id ) SELECT AVG(precision) FROM query_precision """, *params ) or 0 recall_result = await conn.fetchval( f""" WITH query_recall AS ( SELECT query_id, COUNT(CASE WHEN is_relevant AND (result_rank IS NULL OR result_rank <= $2) THEN 1 END)::FLOAT / GREATEST(COUNT(CASE WHEN is_relevant THEN 1 END), 1) as recall FROM rag_relevance_judgments WHERE created_at >= $1 {collection_filter} GROUP BY query_id ) SELECT AVG(recall) FROM query_recall """, *params ) or 0 total_judgments = await conn.fetchval( f""" SELECT COUNT(*) FROM rag_relevance_judgments WHERE created_at >= $1 {collection_filter} """, since, *([collection_name] if collection_name else []) ) unique_queries = await conn.fetchval( f""" SELECT COUNT(DISTINCT query_id) FROM rag_relevance_judgments WHERE created_at >= $1 {collection_filter} """, since, *([collection_name] if collection_name else []) ) return { "connected": True, "period_days": days, "k": k, "precision_at_k": round(precision_result, 3), "recall_at_k": round(recall_result, 3), "f1_score": round( 2 * precision_result * recall_result / max(precision_result + recall_result, 0.001), 3 ), "total_judgments": total_judgments or 0, "unique_queries": unique_queries or 0, } except Exception as e: print(f"Failed to calculate precision/recall: {e}") return {"error": str(e), "connected": False}