[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
182
klausur-service/backend/metrics_db_schema.py
Normal file
182
klausur-service/backend/metrics_db_schema.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
PostgreSQL Metrics Database - Schema Initialization
|
||||
|
||||
Table creation DDL for all metrics, feedback, and zeugnis tables.
|
||||
|
||||
Extracted from metrics_db_core.py to keep files under 500 LOC.
|
||||
"""
|
||||
|
||||
from metrics_db_core import get_pool
|
||||
|
||||
|
||||
async def init_metrics_tables() -> bool:
|
||||
"""Initialize metrics tables in PostgreSQL."""
|
||||
pool = await get_pool()
|
||||
if pool is None:
|
||||
return False
|
||||
|
||||
create_tables_sql = """
|
||||
-- RAG Search Feedback Table
|
||||
CREATE TABLE IF NOT EXISTS rag_search_feedback (
|
||||
id SERIAL PRIMARY KEY,
|
||||
result_id VARCHAR(255) NOT NULL,
|
||||
query_text TEXT,
|
||||
collection_name VARCHAR(100),
|
||||
score FLOAT,
|
||||
rating INTEGER CHECK (rating >= 1 AND rating <= 5),
|
||||
notes TEXT,
|
||||
user_id VARCHAR(100),
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for efficient querying
|
||||
CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON rag_search_feedback(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_feedback_collection ON rag_search_feedback(collection_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_feedback_rating ON rag_search_feedback(rating);
|
||||
|
||||
-- RAG Search Logs Table (for latency tracking)
|
||||
CREATE TABLE IF NOT EXISTS rag_search_logs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
query_text TEXT NOT NULL,
|
||||
collection_name VARCHAR(100),
|
||||
result_count INTEGER,
|
||||
latency_ms INTEGER,
|
||||
top_score FLOAT,
|
||||
filters JSONB,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_search_logs_created_at ON rag_search_logs(created_at);
|
||||
|
||||
-- RAG Upload History Table
|
||||
CREATE TABLE IF NOT EXISTS rag_upload_history (
|
||||
id SERIAL PRIMARY KEY,
|
||||
filename VARCHAR(500) NOT NULL,
|
||||
collection_name VARCHAR(100),
|
||||
year INTEGER,
|
||||
pdfs_extracted INTEGER,
|
||||
minio_path VARCHAR(1000),
|
||||
uploaded_by VARCHAR(100),
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_upload_history_created_at ON rag_upload_history(created_at);
|
||||
|
||||
-- Binaere Relevanz-Judgments fuer echte Precision/Recall
|
||||
CREATE TABLE IF NOT EXISTS rag_relevance_judgments (
|
||||
id SERIAL PRIMARY KEY,
|
||||
query_id VARCHAR(255) NOT NULL,
|
||||
query_text TEXT NOT NULL,
|
||||
result_id VARCHAR(255) NOT NULL,
|
||||
result_rank INTEGER,
|
||||
is_relevant BOOLEAN NOT NULL,
|
||||
collection_name VARCHAR(100),
|
||||
user_id VARCHAR(100),
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_relevance_query ON rag_relevance_judgments(query_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_relevance_created_at ON rag_relevance_judgments(created_at);
|
||||
|
||||
-- Zeugnisse Source Tracking
|
||||
CREATE TABLE IF NOT EXISTS zeugnis_sources (
|
||||
id VARCHAR(36) PRIMARY KEY,
|
||||
bundesland VARCHAR(10) NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
base_url TEXT,
|
||||
license_type VARCHAR(50) NOT NULL,
|
||||
training_allowed BOOLEAN DEFAULT FALSE,
|
||||
verified_by VARCHAR(100),
|
||||
verified_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_sources_bundesland ON zeugnis_sources(bundesland);
|
||||
|
||||
-- Zeugnisse Seed URLs
|
||||
CREATE TABLE IF NOT EXISTS zeugnis_seed_urls (
|
||||
id VARCHAR(36) PRIMARY KEY,
|
||||
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
|
||||
url TEXT NOT NULL,
|
||||
doc_type VARCHAR(50),
|
||||
status VARCHAR(20) DEFAULT 'pending',
|
||||
last_crawled TIMESTAMP,
|
||||
error_message TEXT,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_source ON zeugnis_seed_urls(source_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_status ON zeugnis_seed_urls(status);
|
||||
|
||||
-- Zeugnisse Documents
|
||||
CREATE TABLE IF NOT EXISTS zeugnis_documents (
|
||||
id VARCHAR(36) PRIMARY KEY,
|
||||
seed_url_id VARCHAR(36) REFERENCES zeugnis_seed_urls(id),
|
||||
title VARCHAR(500),
|
||||
url TEXT NOT NULL,
|
||||
content_hash VARCHAR(64),
|
||||
minio_path TEXT,
|
||||
training_allowed BOOLEAN DEFAULT FALSE,
|
||||
indexed_in_qdrant BOOLEAN DEFAULT FALSE,
|
||||
file_size INTEGER,
|
||||
content_type VARCHAR(100),
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_seed ON zeugnis_documents(seed_url_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_hash ON zeugnis_documents(content_hash);
|
||||
|
||||
-- Zeugnisse Document Versions
|
||||
CREATE TABLE IF NOT EXISTS zeugnis_document_versions (
|
||||
id VARCHAR(36) PRIMARY KEY,
|
||||
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
|
||||
version INTEGER NOT NULL,
|
||||
content_hash VARCHAR(64),
|
||||
minio_path TEXT,
|
||||
change_summary TEXT,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_versions_doc ON zeugnis_document_versions(document_id);
|
||||
|
||||
-- Zeugnisse Usage Events (Audit Trail)
|
||||
CREATE TABLE IF NOT EXISTS zeugnis_usage_events (
|
||||
id VARCHAR(36) PRIMARY KEY,
|
||||
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
|
||||
event_type VARCHAR(50) NOT NULL,
|
||||
user_id VARCHAR(100),
|
||||
details JSONB,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_doc ON zeugnis_usage_events(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_type ON zeugnis_usage_events(event_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_created ON zeugnis_usage_events(created_at);
|
||||
|
||||
-- Crawler Queue
|
||||
CREATE TABLE IF NOT EXISTS zeugnis_crawler_queue (
|
||||
id VARCHAR(36) PRIMARY KEY,
|
||||
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
|
||||
priority INTEGER DEFAULT 5,
|
||||
status VARCHAR(20) DEFAULT 'pending',
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
documents_found INTEGER DEFAULT 0,
|
||||
documents_indexed INTEGER DEFAULT 0,
|
||||
error_count INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_queue_status ON zeugnis_crawler_queue(status);
|
||||
"""
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(create_tables_sql)
|
||||
print("RAG metrics tables initialized")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Failed to initialize metrics tables: {e}")
|
||||
return False
|
||||
Reference in New Issue
Block a user