""" PostgreSQL Metrics Database - Schema Initialization Table creation DDL for all metrics, feedback, and zeugnis tables. Extracted from metrics_db_core.py to keep files under 500 LOC. """ from metrics_db_core import get_pool async def init_metrics_tables() -> bool: """Initialize metrics tables in PostgreSQL.""" pool = await get_pool() if pool is None: return False create_tables_sql = """ -- RAG Search Feedback Table CREATE TABLE IF NOT EXISTS rag_search_feedback ( id SERIAL PRIMARY KEY, result_id VARCHAR(255) NOT NULL, query_text TEXT, collection_name VARCHAR(100), score FLOAT, rating INTEGER CHECK (rating >= 1 AND rating <= 5), notes TEXT, user_id VARCHAR(100), created_at TIMESTAMP DEFAULT NOW() ); -- Index for efficient querying CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON rag_search_feedback(created_at); CREATE INDEX IF NOT EXISTS idx_feedback_collection ON rag_search_feedback(collection_name); CREATE INDEX IF NOT EXISTS idx_feedback_rating ON rag_search_feedback(rating); -- RAG Search Logs Table (for latency tracking) CREATE TABLE IF NOT EXISTS rag_search_logs ( id SERIAL PRIMARY KEY, query_text TEXT NOT NULL, collection_name VARCHAR(100), result_count INTEGER, latency_ms INTEGER, top_score FLOAT, filters JSONB, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_search_logs_created_at ON rag_search_logs(created_at); -- RAG Upload History Table CREATE TABLE IF NOT EXISTS rag_upload_history ( id SERIAL PRIMARY KEY, filename VARCHAR(500) NOT NULL, collection_name VARCHAR(100), year INTEGER, pdfs_extracted INTEGER, minio_path VARCHAR(1000), uploaded_by VARCHAR(100), created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_upload_history_created_at ON rag_upload_history(created_at); -- Binaere Relevanz-Judgments fuer echte Precision/Recall CREATE TABLE IF NOT EXISTS rag_relevance_judgments ( id SERIAL PRIMARY KEY, query_id VARCHAR(255) NOT NULL, query_text TEXT NOT NULL, result_id VARCHAR(255) NOT NULL, result_rank INTEGER, is_relevant BOOLEAN NOT NULL, collection_name VARCHAR(100), user_id VARCHAR(100), created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_relevance_query ON rag_relevance_judgments(query_id); CREATE INDEX IF NOT EXISTS idx_relevance_created_at ON rag_relevance_judgments(created_at); -- Zeugnisse Source Tracking CREATE TABLE IF NOT EXISTS zeugnis_sources ( id VARCHAR(36) PRIMARY KEY, bundesland VARCHAR(10) NOT NULL, name VARCHAR(255) NOT NULL, base_url TEXT, license_type VARCHAR(50) NOT NULL, training_allowed BOOLEAN DEFAULT FALSE, verified_by VARCHAR(100), verified_at TIMESTAMP, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_sources_bundesland ON zeugnis_sources(bundesland); -- Zeugnisse Seed URLs CREATE TABLE IF NOT EXISTS zeugnis_seed_urls ( id VARCHAR(36) PRIMARY KEY, source_id VARCHAR(36) REFERENCES zeugnis_sources(id), url TEXT NOT NULL, doc_type VARCHAR(50), status VARCHAR(20) DEFAULT 'pending', last_crawled TIMESTAMP, error_message TEXT, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_source ON zeugnis_seed_urls(source_id); CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_status ON zeugnis_seed_urls(status); -- Zeugnisse Documents CREATE TABLE IF NOT EXISTS zeugnis_documents ( id VARCHAR(36) PRIMARY KEY, seed_url_id VARCHAR(36) REFERENCES zeugnis_seed_urls(id), title VARCHAR(500), url TEXT NOT NULL, content_hash VARCHAR(64), minio_path TEXT, training_allowed BOOLEAN DEFAULT FALSE, indexed_in_qdrant BOOLEAN DEFAULT FALSE, file_size INTEGER, content_type VARCHAR(100), created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_seed ON zeugnis_documents(seed_url_id); CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_hash ON zeugnis_documents(content_hash); -- Zeugnisse Document Versions CREATE TABLE IF NOT EXISTS zeugnis_document_versions ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) REFERENCES zeugnis_documents(id), version INTEGER NOT NULL, content_hash VARCHAR(64), minio_path TEXT, change_summary TEXT, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_versions_doc ON zeugnis_document_versions(document_id); -- Zeugnisse Usage Events (Audit Trail) CREATE TABLE IF NOT EXISTS zeugnis_usage_events ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) REFERENCES zeugnis_documents(id), event_type VARCHAR(50) NOT NULL, user_id VARCHAR(100), details JSONB, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_zeugnis_events_doc ON zeugnis_usage_events(document_id); CREATE INDEX IF NOT EXISTS idx_zeugnis_events_type ON zeugnis_usage_events(event_type); CREATE INDEX IF NOT EXISTS idx_zeugnis_events_created ON zeugnis_usage_events(created_at); -- Crawler Queue CREATE TABLE IF NOT EXISTS zeugnis_crawler_queue ( id VARCHAR(36) PRIMARY KEY, source_id VARCHAR(36) REFERENCES zeugnis_sources(id), priority INTEGER DEFAULT 5, status VARCHAR(20) DEFAULT 'pending', started_at TIMESTAMP, completed_at TIMESTAMP, documents_found INTEGER DEFAULT 0, documents_indexed INTEGER DEFAULT 0, error_count INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_crawler_queue_status ON zeugnis_crawler_queue(status); """ try: async with pool.acquire() as conn: await conn.execute(create_tables_sql) print("RAG metrics tables initialized") return True except Exception as e: print(f"Failed to initialize metrics tables: {e}") return False