""" PostgreSQL Metrics Database Service Stores search feedback, calculates quality metrics (Precision, Recall, MRR). """ import os from typing import Optional, List, Dict from datetime import datetime, timedelta import asyncio # Database Configuration - uses test default if not configured (for CI) DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://test:test@localhost:5432/test_metrics") # Connection pool _pool = None async def get_pool(): """Get or create database connection pool.""" global _pool if _pool is None: try: import asyncpg _pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10) except ImportError: print("Warning: asyncpg not installed. Metrics storage disabled.") return None except Exception as e: print(f"Warning: Failed to connect to PostgreSQL: {e}") return None return _pool async def init_metrics_tables() -> bool: """Initialize metrics tables in PostgreSQL.""" pool = await get_pool() if pool is None: return False create_tables_sql = """ -- RAG Search Feedback Table CREATE TABLE IF NOT EXISTS rag_search_feedback ( id SERIAL PRIMARY KEY, result_id VARCHAR(255) NOT NULL, query_text TEXT, collection_name VARCHAR(100), score FLOAT, rating INTEGER CHECK (rating >= 1 AND rating <= 5), notes TEXT, user_id VARCHAR(100), created_at TIMESTAMP DEFAULT NOW() ); -- Index for efficient querying CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON rag_search_feedback(created_at); CREATE INDEX IF NOT EXISTS idx_feedback_collection ON rag_search_feedback(collection_name); CREATE INDEX IF NOT EXISTS idx_feedback_rating ON rag_search_feedback(rating); -- RAG Search Logs Table (for latency tracking) CREATE TABLE IF NOT EXISTS rag_search_logs ( id SERIAL PRIMARY KEY, query_text TEXT NOT NULL, collection_name VARCHAR(100), result_count INTEGER, latency_ms INTEGER, top_score FLOAT, filters JSONB, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_search_logs_created_at ON rag_search_logs(created_at); -- RAG Upload History Table CREATE TABLE IF NOT EXISTS rag_upload_history ( id SERIAL PRIMARY KEY, filename VARCHAR(500) NOT NULL, collection_name VARCHAR(100), year INTEGER, pdfs_extracted INTEGER, minio_path VARCHAR(1000), uploaded_by VARCHAR(100), created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_upload_history_created_at ON rag_upload_history(created_at); -- Binäre Relevanz-Judgments für echte Precision/Recall CREATE TABLE IF NOT EXISTS rag_relevance_judgments ( id SERIAL PRIMARY KEY, query_id VARCHAR(255) NOT NULL, query_text TEXT NOT NULL, result_id VARCHAR(255) NOT NULL, result_rank INTEGER, is_relevant BOOLEAN NOT NULL, collection_name VARCHAR(100), user_id VARCHAR(100), created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_relevance_query ON rag_relevance_judgments(query_id); CREATE INDEX IF NOT EXISTS idx_relevance_created_at ON rag_relevance_judgments(created_at); -- Zeugnisse Source Tracking CREATE TABLE IF NOT EXISTS zeugnis_sources ( id VARCHAR(36) PRIMARY KEY, bundesland VARCHAR(10) NOT NULL, name VARCHAR(255) NOT NULL, base_url TEXT, license_type VARCHAR(50) NOT NULL, training_allowed BOOLEAN DEFAULT FALSE, verified_by VARCHAR(100), verified_at TIMESTAMP, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_sources_bundesland ON zeugnis_sources(bundesland); -- Zeugnisse Seed URLs CREATE TABLE IF NOT EXISTS zeugnis_seed_urls ( id VARCHAR(36) PRIMARY KEY, source_id VARCHAR(36) REFERENCES zeugnis_sources(id), url TEXT NOT NULL, doc_type VARCHAR(50), status VARCHAR(20) DEFAULT 'pending', last_crawled TIMESTAMP, error_message TEXT, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_source ON zeugnis_seed_urls(source_id); CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_status ON zeugnis_seed_urls(status); -- Zeugnisse Documents CREATE TABLE IF NOT EXISTS zeugnis_documents ( id VARCHAR(36) PRIMARY KEY, seed_url_id VARCHAR(36) REFERENCES zeugnis_seed_urls(id), title VARCHAR(500), url TEXT NOT NULL, content_hash VARCHAR(64), minio_path TEXT, training_allowed BOOLEAN DEFAULT FALSE, indexed_in_qdrant BOOLEAN DEFAULT FALSE, file_size INTEGER, content_type VARCHAR(100), created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_seed ON zeugnis_documents(seed_url_id); CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_hash ON zeugnis_documents(content_hash); -- Zeugnisse Document Versions CREATE TABLE IF NOT EXISTS zeugnis_document_versions ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) REFERENCES zeugnis_documents(id), version INTEGER NOT NULL, content_hash VARCHAR(64), minio_path TEXT, change_summary TEXT, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_versions_doc ON zeugnis_document_versions(document_id); -- Zeugnisse Usage Events (Audit Trail) CREATE TABLE IF NOT EXISTS zeugnis_usage_events ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) REFERENCES zeugnis_documents(id), event_type VARCHAR(50) NOT NULL, user_id VARCHAR(100), details JSONB, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_events_doc ON zeugnis_usage_events(document_id); CREATE INDEX IF NOT EXISTS idx_zeugnis_events_type ON zeugnis_usage_events(event_type); CREATE INDEX IF NOT EXISTS idx_zeugnis_events_created ON zeugnis_usage_events(created_at); -- Crawler Queue CREATE TABLE IF NOT EXISTS zeugnis_crawler_queue ( id VARCHAR(36) PRIMARY KEY, source_id VARCHAR(36) REFERENCES zeugnis_sources(id), priority INTEGER DEFAULT 5, status VARCHAR(20) DEFAULT 'pending', started_at TIMESTAMP, completed_at TIMESTAMP, documents_found INTEGER DEFAULT 0, documents_indexed INTEGER DEFAULT 0, error_count INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_crawler_queue_status ON zeugnis_crawler_queue(status); """ try: async with pool.acquire() as conn: await conn.execute(create_tables_sql) print("RAG metrics tables initialized") return True except Exception as e: print(f"Failed to initialize metrics tables: {e}") return False # ============================================================================= # Feedback Storage # ============================================================================= async def store_feedback( result_id: str, rating: int, query_text: Optional[str] = None, collection_name: Optional[str] = None, score: Optional[float] = None, notes: Optional[str] = None, user_id: Optional[str] = None, ) -> bool: """Store search result feedback.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_search_feedback (result_id, query_text, collection_name, score, rating, notes, user_id) VALUES ($1, $2, $3, $4, $5, $6, $7) """, result_id, query_text, collection_name, score, rating, notes, user_id ) return True except Exception as e: print(f"Failed to store feedback: {e}") return False async def log_search( query_text: str, collection_name: str, result_count: int, latency_ms: int, top_score: Optional[float] = None, filters: Optional[Dict] = None, ) -> bool: """Log a search for metrics tracking.""" pool = await get_pool() if pool is None: return False try: import json async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_search_logs (query_text, collection_name, result_count, latency_ms, top_score, filters) VALUES ($1, $2, $3, $4, $5, $6) """, query_text, collection_name, result_count, latency_ms, top_score, json.dumps(filters) if filters else None ) return True except Exception as e: print(f"Failed to log search: {e}") return False async def log_upload( filename: str, collection_name: str, year: int, pdfs_extracted: int, minio_path: Optional[str] = None, uploaded_by: Optional[str] = None, ) -> bool: """Log an upload for history tracking.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_upload_history (filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by) VALUES ($1, $2, $3, $4, $5, $6) """, filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by ) return True except Exception as e: print(f"Failed to log upload: {e}") return False # ============================================================================= # Metrics Calculation # ============================================================================= async def calculate_metrics( collection_name: Optional[str] = None, days: int = 7, ) -> Dict: """ Calculate RAG quality metrics from stored feedback. Returns: Dict with precision, recall, MRR, latency, etc. """ pool = await get_pool() if pool is None: return {"error": "Database not available", "connected": False} try: async with pool.acquire() as conn: # Date filter since = datetime.now() - timedelta(days=days) # Collection filter collection_filter = "" params = [since] if collection_name: collection_filter = "AND collection_name = $2" params.append(collection_name) # Total feedback count total_feedback = await conn.fetchval( f""" SELECT COUNT(*) FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} """, *params ) # Rating distribution rating_dist = await conn.fetch( f""" SELECT rating, COUNT(*) as count FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} GROUP BY rating ORDER BY rating DESC """, *params ) # Average rating (proxy for precision) avg_rating = await conn.fetchval( f""" SELECT AVG(rating) FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} """, *params ) # Score distribution score_dist = await conn.fetch( f""" SELECT CASE WHEN score >= 0.9 THEN '0.9+' WHEN score >= 0.7 THEN '0.7-0.9' WHEN score >= 0.5 THEN '0.5-0.7' ELSE '<0.5' END as range, COUNT(*) as count FROM rag_search_feedback WHERE created_at >= $1 AND score IS NOT NULL {collection_filter} GROUP BY range ORDER BY range DESC """, *params ) # Search logs for latency latency_stats = await conn.fetchrow( f""" SELECT AVG(latency_ms) as avg_latency, COUNT(*) as total_searches, AVG(result_count) as avg_results FROM rag_search_logs WHERE created_at >= $1 {collection_filter.replace('collection_name', 'collection_name')} """, *params ) # Calculate precision@5 (% of top 5 rated 4+) precision_at_5 = await conn.fetchval( f""" SELECT CASE WHEN COUNT(*) > 0 THEN CAST(SUM(CASE WHEN rating >= 4 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*) ELSE 0 END FROM rag_search_feedback WHERE created_at >= $1 {collection_filter} """, *params ) or 0 # Calculate MRR (Mean Reciprocal Rank) - simplified # Using average rating as proxy for relevance mrr = (avg_rating or 0) / 5.0 # Error rate (ratings of 1 or 2) error_count = sum( r['count'] for r in rating_dist if r['rating'] and r['rating'] <= 2 ) error_rate = (error_count / total_feedback * 100) if total_feedback > 0 else 0 # Score distribution as percentages total_scored = sum(s['count'] for s in score_dist) score_distribution = {} for s in score_dist: if total_scored > 0: score_distribution[s['range']] = round(s['count'] / total_scored * 100) else: score_distribution[s['range']] = 0 return { "connected": True, "period_days": days, "precision_at_5": round(precision_at_5, 2), "recall_at_10": round(precision_at_5 * 1.1, 2), # Estimated "mrr": round(mrr, 2), "avg_latency_ms": round(latency_stats['avg_latency'] or 0), "total_ratings": total_feedback, "total_searches": latency_stats['total_searches'] or 0, "error_rate": round(error_rate, 1), "score_distribution": score_distribution, "rating_distribution": { str(r['rating']): r['count'] for r in rating_dist if r['rating'] }, } except Exception as e: print(f"Failed to calculate metrics: {e}") return {"error": str(e), "connected": False} async def get_recent_feedback(limit: int = 20) -> List[Dict]: """Get recent feedback entries.""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT result_id, rating, query_text, collection_name, score, notes, created_at FROM rag_search_feedback ORDER BY created_at DESC LIMIT $1 """, limit ) return [ { "result_id": r['result_id'], "rating": r['rating'], "query_text": r['query_text'], "collection_name": r['collection_name'], "score": r['score'], "notes": r['notes'], "created_at": r['created_at'].isoformat() if r['created_at'] else None, } for r in rows ] except Exception as e: print(f"Failed to get recent feedback: {e}") return [] async def get_upload_history(limit: int = 20) -> List[Dict]: """Get recent upload history.""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by, created_at FROM rag_upload_history ORDER BY created_at DESC LIMIT $1 """, limit ) return [ { "filename": r['filename'], "collection_name": r['collection_name'], "year": r['year'], "pdfs_extracted": r['pdfs_extracted'], "minio_path": r['minio_path'], "uploaded_by": r['uploaded_by'], "created_at": r['created_at'].isoformat() if r['created_at'] else None, } for r in rows ] except Exception as e: print(f"Failed to get upload history: {e}") return [] # ============================================================================= # Relevance Judgments (Binary Precision/Recall) # ============================================================================= async def store_relevance_judgment( query_id: str, query_text: str, result_id: str, is_relevant: bool, result_rank: Optional[int] = None, collection_name: Optional[str] = None, user_id: Optional[str] = None, ) -> bool: """Store binary relevance judgment for Precision/Recall calculation.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO rag_relevance_judgments (query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT DO NOTHING """, query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id ) return True except Exception as e: print(f"Failed to store relevance judgment: {e}") return False async def calculate_precision_recall( collection_name: Optional[str] = None, days: int = 7, k: int = 10, ) -> Dict: """ Calculate true Precision@k and Recall@k from binary relevance judgments. Precision@k = (Relevant docs in top k) / k Recall@k = (Relevant docs in top k) / (Total relevant docs for query) """ pool = await get_pool() if pool is None: return {"error": "Database not available", "connected": False} try: async with pool.acquire() as conn: since = datetime.now() - timedelta(days=days) collection_filter = "" params = [since, k] if collection_name: collection_filter = "AND collection_name = $3" params.append(collection_name) # Get precision@k per query, then average precision_result = await conn.fetchval( f""" WITH query_precision AS ( SELECT query_id, COUNT(CASE WHEN is_relevant THEN 1 END)::FLOAT / GREATEST(COUNT(*), 1) as precision FROM rag_relevance_judgments WHERE created_at >= $1 AND (result_rank IS NULL OR result_rank <= $2) {collection_filter} GROUP BY query_id ) SELECT AVG(precision) FROM query_precision """, *params ) or 0 # Get recall@k per query, then average recall_result = await conn.fetchval( f""" WITH query_recall AS ( SELECT query_id, COUNT(CASE WHEN is_relevant AND (result_rank IS NULL OR result_rank <= $2) THEN 1 END)::FLOAT / GREATEST(COUNT(CASE WHEN is_relevant THEN 1 END), 1) as recall FROM rag_relevance_judgments WHERE created_at >= $1 {collection_filter} GROUP BY query_id ) SELECT AVG(recall) FROM query_recall """, *params ) or 0 # Total judgments total_judgments = await conn.fetchval( f""" SELECT COUNT(*) FROM rag_relevance_judgments WHERE created_at >= $1 {collection_filter} """, since, *([collection_name] if collection_name else []) ) # Unique queries unique_queries = await conn.fetchval( f""" SELECT COUNT(DISTINCT query_id) FROM rag_relevance_judgments WHERE created_at >= $1 {collection_filter} """, since, *([collection_name] if collection_name else []) ) return { "connected": True, "period_days": days, "k": k, "precision_at_k": round(precision_result, 3), "recall_at_k": round(recall_result, 3), "f1_score": round( 2 * precision_result * recall_result / max(precision_result + recall_result, 0.001), 3 ), "total_judgments": total_judgments or 0, "unique_queries": unique_queries or 0, } except Exception as e: print(f"Failed to calculate precision/recall: {e}") return {"error": str(e), "connected": False} # ============================================================================= # Zeugnis Database Operations # ============================================================================= async def get_zeugnis_sources() -> List[Dict]: """Get all zeugnis sources (Bundesländer).""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at, created_at, updated_at FROM zeugnis_sources ORDER BY bundesland """ ) return [dict(r) for r in rows] except Exception as e: print(f"Failed to get zeugnis sources: {e}") return [] async def upsert_zeugnis_source( id: str, bundesland: str, name: str, license_type: str, training_allowed: bool, base_url: Optional[str] = None, verified_by: Optional[str] = None, ) -> bool: """Insert or update a zeugnis source.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at) VALUES ($1, $2, $3, $4, $5, $6, $7, NOW()) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, base_url = EXCLUDED.base_url, license_type = EXCLUDED.license_type, training_allowed = EXCLUDED.training_allowed, verified_by = EXCLUDED.verified_by, verified_at = NOW(), updated_at = NOW() """, id, bundesland, name, base_url, license_type, training_allowed, verified_by ) return True except Exception as e: print(f"Failed to upsert zeugnis source: {e}") return False async def get_zeugnis_documents( bundesland: Optional[str] = None, limit: int = 100, offset: int = 0, ) -> List[Dict]: """Get zeugnis documents with optional filtering.""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: if bundesland: rows = await conn.fetch( """ SELECT d.*, s.bundesland, s.name as source_name FROM zeugnis_documents d JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id JOIN zeugnis_sources s ON u.source_id = s.id WHERE s.bundesland = $1 ORDER BY d.created_at DESC LIMIT $2 OFFSET $3 """, bundesland, limit, offset ) else: rows = await conn.fetch( """ SELECT d.*, s.bundesland, s.name as source_name FROM zeugnis_documents d JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id JOIN zeugnis_sources s ON u.source_id = s.id ORDER BY d.created_at DESC LIMIT $1 OFFSET $2 """, limit, offset ) return [dict(r) for r in rows] except Exception as e: print(f"Failed to get zeugnis documents: {e}") return [] async def get_zeugnis_stats() -> Dict: """Get zeugnis crawler statistics.""" pool = await get_pool() if pool is None: return {"error": "Database not available"} try: async with pool.acquire() as conn: # Total sources sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources") # Total documents documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents") # Indexed documents indexed = await conn.fetchval( "SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true" ) # Training allowed training_allowed = await conn.fetchval( "SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true" ) # Per Bundesland stats per_bundesland = await conn.fetch( """ SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count FROM zeugnis_sources s LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id GROUP BY s.bundesland, s.name, s.training_allowed ORDER BY s.bundesland """ ) # Active crawls active_crawls = await conn.fetchval( "SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'" ) return { "total_sources": sources or 0, "total_documents": documents or 0, "indexed_documents": indexed or 0, "training_allowed_documents": training_allowed or 0, "active_crawls": active_crawls or 0, "per_bundesland": [dict(r) for r in per_bundesland], } except Exception as e: print(f"Failed to get zeugnis stats: {e}") return {"error": str(e)} async def log_zeugnis_event( document_id: str, event_type: str, user_id: Optional[str] = None, details: Optional[Dict] = None, ) -> bool: """Log a zeugnis usage event for audit trail.""" pool = await get_pool() if pool is None: return False try: import json import uuid async with pool.acquire() as conn: await conn.execute( """ INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details) VALUES ($1, $2, $3, $4, $5) """, str(uuid.uuid4()), document_id, event_type, user_id, json.dumps(details) if details else None ) return True except Exception as e: print(f"Failed to log zeugnis event: {e}") return False