Files
breakpilot-lehrer/klausur-service/backend/metrics_db.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

834 lines
28 KiB
Python

"""
PostgreSQL Metrics Database Service
Stores search feedback, calculates quality metrics (Precision, Recall, MRR).
"""
import os
from typing import Optional, List, Dict
from datetime import datetime, timedelta
import asyncio
# Database Configuration - uses test default if not configured (for CI)
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://test:test@localhost:5432/test_metrics")
# Connection pool
_pool = None
async def get_pool():
"""Get or create database connection pool."""
global _pool
if _pool is None:
try:
import asyncpg
_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
except ImportError:
print("Warning: asyncpg not installed. Metrics storage disabled.")
return None
except Exception as e:
print(f"Warning: Failed to connect to PostgreSQL: {e}")
return None
return _pool
async def init_metrics_tables() -> bool:
"""Initialize metrics tables in PostgreSQL."""
pool = await get_pool()
if pool is None:
return False
create_tables_sql = """
-- RAG Search Feedback Table
CREATE TABLE IF NOT EXISTS rag_search_feedback (
id SERIAL PRIMARY KEY,
result_id VARCHAR(255) NOT NULL,
query_text TEXT,
collection_name VARCHAR(100),
score FLOAT,
rating INTEGER CHECK (rating >= 1 AND rating <= 5),
notes TEXT,
user_id VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW()
);
-- Index for efficient querying
CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON rag_search_feedback(created_at);
CREATE INDEX IF NOT EXISTS idx_feedback_collection ON rag_search_feedback(collection_name);
CREATE INDEX IF NOT EXISTS idx_feedback_rating ON rag_search_feedback(rating);
-- RAG Search Logs Table (for latency tracking)
CREATE TABLE IF NOT EXISTS rag_search_logs (
id SERIAL PRIMARY KEY,
query_text TEXT NOT NULL,
collection_name VARCHAR(100),
result_count INTEGER,
latency_ms INTEGER,
top_score FLOAT,
filters JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_search_logs_created_at ON rag_search_logs(created_at);
-- RAG Upload History Table
CREATE TABLE IF NOT EXISTS rag_upload_history (
id SERIAL PRIMARY KEY,
filename VARCHAR(500) NOT NULL,
collection_name VARCHAR(100),
year INTEGER,
pdfs_extracted INTEGER,
minio_path VARCHAR(1000),
uploaded_by VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_upload_history_created_at ON rag_upload_history(created_at);
-- Binäre Relevanz-Judgments für echte Precision/Recall
CREATE TABLE IF NOT EXISTS rag_relevance_judgments (
id SERIAL PRIMARY KEY,
query_id VARCHAR(255) NOT NULL,
query_text TEXT NOT NULL,
result_id VARCHAR(255) NOT NULL,
result_rank INTEGER,
is_relevant BOOLEAN NOT NULL,
collection_name VARCHAR(100),
user_id VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_relevance_query ON rag_relevance_judgments(query_id);
CREATE INDEX IF NOT EXISTS idx_relevance_created_at ON rag_relevance_judgments(created_at);
-- Zeugnisse Source Tracking
CREATE TABLE IF NOT EXISTS zeugnis_sources (
id VARCHAR(36) PRIMARY KEY,
bundesland VARCHAR(10) NOT NULL,
name VARCHAR(255) NOT NULL,
base_url TEXT,
license_type VARCHAR(50) NOT NULL,
training_allowed BOOLEAN DEFAULT FALSE,
verified_by VARCHAR(100),
verified_at TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_sources_bundesland ON zeugnis_sources(bundesland);
-- Zeugnisse Seed URLs
CREATE TABLE IF NOT EXISTS zeugnis_seed_urls (
id VARCHAR(36) PRIMARY KEY,
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
url TEXT NOT NULL,
doc_type VARCHAR(50),
status VARCHAR(20) DEFAULT 'pending',
last_crawled TIMESTAMP,
error_message TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_source ON zeugnis_seed_urls(source_id);
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_status ON zeugnis_seed_urls(status);
-- Zeugnisse Documents
CREATE TABLE IF NOT EXISTS zeugnis_documents (
id VARCHAR(36) PRIMARY KEY,
seed_url_id VARCHAR(36) REFERENCES zeugnis_seed_urls(id),
title VARCHAR(500),
url TEXT NOT NULL,
content_hash VARCHAR(64),
minio_path TEXT,
training_allowed BOOLEAN DEFAULT FALSE,
indexed_in_qdrant BOOLEAN DEFAULT FALSE,
file_size INTEGER,
content_type VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_seed ON zeugnis_documents(seed_url_id);
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_hash ON zeugnis_documents(content_hash);
-- Zeugnisse Document Versions
CREATE TABLE IF NOT EXISTS zeugnis_document_versions (
id VARCHAR(36) PRIMARY KEY,
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
version INTEGER NOT NULL,
content_hash VARCHAR(64),
minio_path TEXT,
change_summary TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_versions_doc ON zeugnis_document_versions(document_id);
-- Zeugnisse Usage Events (Audit Trail)
CREATE TABLE IF NOT EXISTS zeugnis_usage_events (
id VARCHAR(36) PRIMARY KEY,
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
event_type VARCHAR(50) NOT NULL,
user_id VARCHAR(100),
details JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_doc ON zeugnis_usage_events(document_id);
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_type ON zeugnis_usage_events(event_type);
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_created ON zeugnis_usage_events(created_at);
-- Crawler Queue
CREATE TABLE IF NOT EXISTS zeugnis_crawler_queue (
id VARCHAR(36) PRIMARY KEY,
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
priority INTEGER DEFAULT 5,
status VARCHAR(20) DEFAULT 'pending',
started_at TIMESTAMP,
completed_at TIMESTAMP,
documents_found INTEGER DEFAULT 0,
documents_indexed INTEGER DEFAULT 0,
error_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_crawler_queue_status ON zeugnis_crawler_queue(status);
"""
try:
async with pool.acquire() as conn:
await conn.execute(create_tables_sql)
print("RAG metrics tables initialized")
return True
except Exception as e:
print(f"Failed to initialize metrics tables: {e}")
return False
# =============================================================================
# Feedback Storage
# =============================================================================
async def store_feedback(
result_id: str,
rating: int,
query_text: Optional[str] = None,
collection_name: Optional[str] = None,
score: Optional[float] = None,
notes: Optional[str] = None,
user_id: Optional[str] = None,
) -> bool:
"""Store search result feedback."""
pool = await get_pool()
if pool is None:
return False
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO rag_search_feedback
(result_id, query_text, collection_name, score, rating, notes, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
""",
result_id, query_text, collection_name, score, rating, notes, user_id
)
return True
except Exception as e:
print(f"Failed to store feedback: {e}")
return False
async def log_search(
query_text: str,
collection_name: str,
result_count: int,
latency_ms: int,
top_score: Optional[float] = None,
filters: Optional[Dict] = None,
) -> bool:
"""Log a search for metrics tracking."""
pool = await get_pool()
if pool is None:
return False
try:
import json
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO rag_search_logs
(query_text, collection_name, result_count, latency_ms, top_score, filters)
VALUES ($1, $2, $3, $4, $5, $6)
""",
query_text, collection_name, result_count, latency_ms, top_score,
json.dumps(filters) if filters else None
)
return True
except Exception as e:
print(f"Failed to log search: {e}")
return False
async def log_upload(
filename: str,
collection_name: str,
year: int,
pdfs_extracted: int,
minio_path: Optional[str] = None,
uploaded_by: Optional[str] = None,
) -> bool:
"""Log an upload for history tracking."""
pool = await get_pool()
if pool is None:
return False
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO rag_upload_history
(filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by)
VALUES ($1, $2, $3, $4, $5, $6)
""",
filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by
)
return True
except Exception as e:
print(f"Failed to log upload: {e}")
return False
# =============================================================================
# Metrics Calculation
# =============================================================================
async def calculate_metrics(
collection_name: Optional[str] = None,
days: int = 7,
) -> Dict:
"""
Calculate RAG quality metrics from stored feedback.
Returns:
Dict with precision, recall, MRR, latency, etc.
"""
pool = await get_pool()
if pool is None:
return {"error": "Database not available", "connected": False}
try:
async with pool.acquire() as conn:
# Date filter
since = datetime.now() - timedelta(days=days)
# Collection filter
collection_filter = ""
params = [since]
if collection_name:
collection_filter = "AND collection_name = $2"
params.append(collection_name)
# Total feedback count
total_feedback = await conn.fetchval(
f"""
SELECT COUNT(*) FROM rag_search_feedback
WHERE created_at >= $1 {collection_filter}
""",
*params
)
# Rating distribution
rating_dist = await conn.fetch(
f"""
SELECT rating, COUNT(*) as count
FROM rag_search_feedback
WHERE created_at >= $1 {collection_filter}
GROUP BY rating
ORDER BY rating DESC
""",
*params
)
# Average rating (proxy for precision)
avg_rating = await conn.fetchval(
f"""
SELECT AVG(rating) FROM rag_search_feedback
WHERE created_at >= $1 {collection_filter}
""",
*params
)
# Score distribution
score_dist = await conn.fetch(
f"""
SELECT
CASE
WHEN score >= 0.9 THEN '0.9+'
WHEN score >= 0.7 THEN '0.7-0.9'
WHEN score >= 0.5 THEN '0.5-0.7'
ELSE '<0.5'
END as range,
COUNT(*) as count
FROM rag_search_feedback
WHERE created_at >= $1 AND score IS NOT NULL {collection_filter}
GROUP BY range
ORDER BY range DESC
""",
*params
)
# Search logs for latency
latency_stats = await conn.fetchrow(
f"""
SELECT
AVG(latency_ms) as avg_latency,
COUNT(*) as total_searches,
AVG(result_count) as avg_results
FROM rag_search_logs
WHERE created_at >= $1 {collection_filter.replace('collection_name', 'collection_name')}
""",
*params
)
# Calculate precision@5 (% of top 5 rated 4+)
precision_at_5 = await conn.fetchval(
f"""
SELECT
CASE WHEN COUNT(*) > 0
THEN CAST(SUM(CASE WHEN rating >= 4 THEN 1 ELSE 0 END) AS FLOAT) / COUNT(*)
ELSE 0 END
FROM rag_search_feedback
WHERE created_at >= $1 {collection_filter}
""",
*params
) or 0
# Calculate MRR (Mean Reciprocal Rank) - simplified
# Using average rating as proxy for relevance
mrr = (avg_rating or 0) / 5.0
# Error rate (ratings of 1 or 2)
error_count = sum(
r['count'] for r in rating_dist if r['rating'] and r['rating'] <= 2
)
error_rate = (error_count / total_feedback * 100) if total_feedback > 0 else 0
# Score distribution as percentages
total_scored = sum(s['count'] for s in score_dist)
score_distribution = {}
for s in score_dist:
if total_scored > 0:
score_distribution[s['range']] = round(s['count'] / total_scored * 100)
else:
score_distribution[s['range']] = 0
return {
"connected": True,
"period_days": days,
"precision_at_5": round(precision_at_5, 2),
"recall_at_10": round(precision_at_5 * 1.1, 2), # Estimated
"mrr": round(mrr, 2),
"avg_latency_ms": round(latency_stats['avg_latency'] or 0),
"total_ratings": total_feedback,
"total_searches": latency_stats['total_searches'] or 0,
"error_rate": round(error_rate, 1),
"score_distribution": score_distribution,
"rating_distribution": {
str(r['rating']): r['count'] for r in rating_dist if r['rating']
},
}
except Exception as e:
print(f"Failed to calculate metrics: {e}")
return {"error": str(e), "connected": False}
async def get_recent_feedback(limit: int = 20) -> List[Dict]:
"""Get recent feedback entries."""
pool = await get_pool()
if pool is None:
return []
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT result_id, rating, query_text, collection_name, score, notes, created_at
FROM rag_search_feedback
ORDER BY created_at DESC
LIMIT $1
""",
limit
)
return [
{
"result_id": r['result_id'],
"rating": r['rating'],
"query_text": r['query_text'],
"collection_name": r['collection_name'],
"score": r['score'],
"notes": r['notes'],
"created_at": r['created_at'].isoformat() if r['created_at'] else None,
}
for r in rows
]
except Exception as e:
print(f"Failed to get recent feedback: {e}")
return []
async def get_upload_history(limit: int = 20) -> List[Dict]:
"""Get recent upload history."""
pool = await get_pool()
if pool is None:
return []
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT filename, collection_name, year, pdfs_extracted, minio_path, uploaded_by, created_at
FROM rag_upload_history
ORDER BY created_at DESC
LIMIT $1
""",
limit
)
return [
{
"filename": r['filename'],
"collection_name": r['collection_name'],
"year": r['year'],
"pdfs_extracted": r['pdfs_extracted'],
"minio_path": r['minio_path'],
"uploaded_by": r['uploaded_by'],
"created_at": r['created_at'].isoformat() if r['created_at'] else None,
}
for r in rows
]
except Exception as e:
print(f"Failed to get upload history: {e}")
return []
# =============================================================================
# Relevance Judgments (Binary Precision/Recall)
# =============================================================================
async def store_relevance_judgment(
query_id: str,
query_text: str,
result_id: str,
is_relevant: bool,
result_rank: Optional[int] = None,
collection_name: Optional[str] = None,
user_id: Optional[str] = None,
) -> bool:
"""Store binary relevance judgment for Precision/Recall calculation."""
pool = await get_pool()
if pool is None:
return False
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO rag_relevance_judgments
(query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT DO NOTHING
""",
query_id, query_text, result_id, result_rank, is_relevant, collection_name, user_id
)
return True
except Exception as e:
print(f"Failed to store relevance judgment: {e}")
return False
async def calculate_precision_recall(
collection_name: Optional[str] = None,
days: int = 7,
k: int = 10,
) -> Dict:
"""
Calculate true Precision@k and Recall@k from binary relevance judgments.
Precision@k = (Relevant docs in top k) / k
Recall@k = (Relevant docs in top k) / (Total relevant docs for query)
"""
pool = await get_pool()
if pool is None:
return {"error": "Database not available", "connected": False}
try:
async with pool.acquire() as conn:
since = datetime.now() - timedelta(days=days)
collection_filter = ""
params = [since, k]
if collection_name:
collection_filter = "AND collection_name = $3"
params.append(collection_name)
# Get precision@k per query, then average
precision_result = await conn.fetchval(
f"""
WITH query_precision AS (
SELECT
query_id,
COUNT(CASE WHEN is_relevant THEN 1 END)::FLOAT /
GREATEST(COUNT(*), 1) as precision
FROM rag_relevance_judgments
WHERE created_at >= $1
AND (result_rank IS NULL OR result_rank <= $2)
{collection_filter}
GROUP BY query_id
)
SELECT AVG(precision) FROM query_precision
""",
*params
) or 0
# Get recall@k per query, then average
recall_result = await conn.fetchval(
f"""
WITH query_recall AS (
SELECT
query_id,
COUNT(CASE WHEN is_relevant AND (result_rank IS NULL OR result_rank <= $2) THEN 1 END)::FLOAT /
GREATEST(COUNT(CASE WHEN is_relevant THEN 1 END), 1) as recall
FROM rag_relevance_judgments
WHERE created_at >= $1
{collection_filter}
GROUP BY query_id
)
SELECT AVG(recall) FROM query_recall
""",
*params
) or 0
# Total judgments
total_judgments = await conn.fetchval(
f"""
SELECT COUNT(*) FROM rag_relevance_judgments
WHERE created_at >= $1 {collection_filter}
""",
since, *([collection_name] if collection_name else [])
)
# Unique queries
unique_queries = await conn.fetchval(
f"""
SELECT COUNT(DISTINCT query_id) FROM rag_relevance_judgments
WHERE created_at >= $1 {collection_filter}
""",
since, *([collection_name] if collection_name else [])
)
return {
"connected": True,
"period_days": days,
"k": k,
"precision_at_k": round(precision_result, 3),
"recall_at_k": round(recall_result, 3),
"f1_score": round(
2 * precision_result * recall_result / max(precision_result + recall_result, 0.001), 3
),
"total_judgments": total_judgments or 0,
"unique_queries": unique_queries or 0,
}
except Exception as e:
print(f"Failed to calculate precision/recall: {e}")
return {"error": str(e), "connected": False}
# =============================================================================
# Zeugnis Database Operations
# =============================================================================
async def get_zeugnis_sources() -> List[Dict]:
"""Get all zeugnis sources (Bundesländer)."""
pool = await get_pool()
if pool is None:
return []
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, bundesland, name, base_url, license_type, training_allowed,
verified_by, verified_at, created_at, updated_at
FROM zeugnis_sources
ORDER BY bundesland
"""
)
return [dict(r) for r in rows]
except Exception as e:
print(f"Failed to get zeugnis sources: {e}")
return []
async def upsert_zeugnis_source(
id: str,
bundesland: str,
name: str,
license_type: str,
training_allowed: bool,
base_url: Optional[str] = None,
verified_by: Optional[str] = None,
) -> bool:
"""Insert or update a zeugnis source."""
pool = await get_pool()
if pool is None:
return False
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
base_url = EXCLUDED.base_url,
license_type = EXCLUDED.license_type,
training_allowed = EXCLUDED.training_allowed,
verified_by = EXCLUDED.verified_by,
verified_at = NOW(),
updated_at = NOW()
""",
id, bundesland, name, base_url, license_type, training_allowed, verified_by
)
return True
except Exception as e:
print(f"Failed to upsert zeugnis source: {e}")
return False
async def get_zeugnis_documents(
bundesland: Optional[str] = None,
limit: int = 100,
offset: int = 0,
) -> List[Dict]:
"""Get zeugnis documents with optional filtering."""
pool = await get_pool()
if pool is None:
return []
try:
async with pool.acquire() as conn:
if bundesland:
rows = await conn.fetch(
"""
SELECT d.*, s.bundesland, s.name as source_name
FROM zeugnis_documents d
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
JOIN zeugnis_sources s ON u.source_id = s.id
WHERE s.bundesland = $1
ORDER BY d.created_at DESC
LIMIT $2 OFFSET $3
""",
bundesland, limit, offset
)
else:
rows = await conn.fetch(
"""
SELECT d.*, s.bundesland, s.name as source_name
FROM zeugnis_documents d
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
JOIN zeugnis_sources s ON u.source_id = s.id
ORDER BY d.created_at DESC
LIMIT $1 OFFSET $2
""",
limit, offset
)
return [dict(r) for r in rows]
except Exception as e:
print(f"Failed to get zeugnis documents: {e}")
return []
async def get_zeugnis_stats() -> Dict:
"""Get zeugnis crawler statistics."""
pool = await get_pool()
if pool is None:
return {"error": "Database not available"}
try:
async with pool.acquire() as conn:
# Total sources
sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources")
# Total documents
documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents")
# Indexed documents
indexed = await conn.fetchval(
"SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true"
)
# Training allowed
training_allowed = await conn.fetchval(
"SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true"
)
# Per Bundesland stats
per_bundesland = await conn.fetch(
"""
SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count
FROM zeugnis_sources s
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
GROUP BY s.bundesland, s.name, s.training_allowed
ORDER BY s.bundesland
"""
)
# Active crawls
active_crawls = await conn.fetchval(
"SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'"
)
return {
"total_sources": sources or 0,
"total_documents": documents or 0,
"indexed_documents": indexed or 0,
"training_allowed_documents": training_allowed or 0,
"active_crawls": active_crawls or 0,
"per_bundesland": [dict(r) for r in per_bundesland],
}
except Exception as e:
print(f"Failed to get zeugnis stats: {e}")
return {"error": str(e)}
async def log_zeugnis_event(
document_id: str,
event_type: str,
user_id: Optional[str] = None,
details: Optional[Dict] = None,
) -> bool:
"""Log a zeugnis usage event for audit trail."""
pool = await get_pool()
if pool is None:
return False
try:
import json
import uuid
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details)
VALUES ($1, $2, $3, $4, $5)
""",
str(uuid.uuid4()), document_id, event_type, user_id,
json.dumps(details) if details else None
)
return True
except Exception as e:
print(f"Failed to log zeugnis event: {e}")
return False