backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
183 lines
6.4 KiB
Python
183 lines
6.4 KiB
Python
"""
|
|
PostgreSQL Metrics Database - Schema Initialization
|
|
|
|
Table creation DDL for all metrics, feedback, and zeugnis tables.
|
|
|
|
Extracted from metrics_db_core.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
from metrics_db_core import get_pool
|
|
|
|
|
|
async def init_metrics_tables() -> bool:
|
|
"""Initialize metrics tables in PostgreSQL."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
create_tables_sql = """
|
|
-- RAG Search Feedback Table
|
|
CREATE TABLE IF NOT EXISTS rag_search_feedback (
|
|
id SERIAL PRIMARY KEY,
|
|
result_id VARCHAR(255) NOT NULL,
|
|
query_text TEXT,
|
|
collection_name VARCHAR(100),
|
|
score FLOAT,
|
|
rating INTEGER CHECK (rating >= 1 AND rating <= 5),
|
|
notes TEXT,
|
|
user_id VARCHAR(100),
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
-- Index for efficient querying
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON rag_search_feedback(created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_collection ON rag_search_feedback(collection_name);
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_rating ON rag_search_feedback(rating);
|
|
|
|
-- RAG Search Logs Table (for latency tracking)
|
|
CREATE TABLE IF NOT EXISTS rag_search_logs (
|
|
id SERIAL PRIMARY KEY,
|
|
query_text TEXT NOT NULL,
|
|
collection_name VARCHAR(100),
|
|
result_count INTEGER,
|
|
latency_ms INTEGER,
|
|
top_score FLOAT,
|
|
filters JSONB,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_search_logs_created_at ON rag_search_logs(created_at);
|
|
|
|
-- RAG Upload History Table
|
|
CREATE TABLE IF NOT EXISTS rag_upload_history (
|
|
id SERIAL PRIMARY KEY,
|
|
filename VARCHAR(500) NOT NULL,
|
|
collection_name VARCHAR(100),
|
|
year INTEGER,
|
|
pdfs_extracted INTEGER,
|
|
minio_path VARCHAR(1000),
|
|
uploaded_by VARCHAR(100),
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_upload_history_created_at ON rag_upload_history(created_at);
|
|
|
|
-- Binaere Relevanz-Judgments fuer echte Precision/Recall
|
|
CREATE TABLE IF NOT EXISTS rag_relevance_judgments (
|
|
id SERIAL PRIMARY KEY,
|
|
query_id VARCHAR(255) NOT NULL,
|
|
query_text TEXT NOT NULL,
|
|
result_id VARCHAR(255) NOT NULL,
|
|
result_rank INTEGER,
|
|
is_relevant BOOLEAN NOT NULL,
|
|
collection_name VARCHAR(100),
|
|
user_id VARCHAR(100),
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_relevance_query ON rag_relevance_judgments(query_id);
|
|
CREATE INDEX IF NOT EXISTS idx_relevance_created_at ON rag_relevance_judgments(created_at);
|
|
|
|
-- Zeugnisse Source Tracking
|
|
CREATE TABLE IF NOT EXISTS zeugnis_sources (
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
bundesland VARCHAR(10) NOT NULL,
|
|
name VARCHAR(255) NOT NULL,
|
|
base_url TEXT,
|
|
license_type VARCHAR(50) NOT NULL,
|
|
training_allowed BOOLEAN DEFAULT FALSE,
|
|
verified_by VARCHAR(100),
|
|
verified_at TIMESTAMP,
|
|
created_at TIMESTAMP DEFAULT NOW(),
|
|
updated_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_sources_bundesland ON zeugnis_sources(bundesland);
|
|
|
|
-- Zeugnisse Seed URLs
|
|
CREATE TABLE IF NOT EXISTS zeugnis_seed_urls (
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
|
|
url TEXT NOT NULL,
|
|
doc_type VARCHAR(50),
|
|
status VARCHAR(20) DEFAULT 'pending',
|
|
last_crawled TIMESTAMP,
|
|
error_message TEXT,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_source ON zeugnis_seed_urls(source_id);
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_status ON zeugnis_seed_urls(status);
|
|
|
|
-- Zeugnisse Documents
|
|
CREATE TABLE IF NOT EXISTS zeugnis_documents (
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
seed_url_id VARCHAR(36) REFERENCES zeugnis_seed_urls(id),
|
|
title VARCHAR(500),
|
|
url TEXT NOT NULL,
|
|
content_hash VARCHAR(64),
|
|
minio_path TEXT,
|
|
training_allowed BOOLEAN DEFAULT FALSE,
|
|
indexed_in_qdrant BOOLEAN DEFAULT FALSE,
|
|
file_size INTEGER,
|
|
content_type VARCHAR(100),
|
|
created_at TIMESTAMP DEFAULT NOW(),
|
|
updated_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_seed ON zeugnis_documents(seed_url_id);
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_hash ON zeugnis_documents(content_hash);
|
|
|
|
-- Zeugnisse Document Versions
|
|
CREATE TABLE IF NOT EXISTS zeugnis_document_versions (
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
|
|
version INTEGER NOT NULL,
|
|
content_hash VARCHAR(64),
|
|
minio_path TEXT,
|
|
change_summary TEXT,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_versions_doc ON zeugnis_document_versions(document_id);
|
|
|
|
-- Zeugnisse Usage Events (Audit Trail)
|
|
CREATE TABLE IF NOT EXISTS zeugnis_usage_events (
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
|
|
event_type VARCHAR(50) NOT NULL,
|
|
user_id VARCHAR(100),
|
|
details JSONB,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_doc ON zeugnis_usage_events(document_id);
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_type ON zeugnis_usage_events(event_type);
|
|
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_created ON zeugnis_usage_events(created_at);
|
|
|
|
-- Crawler Queue
|
|
CREATE TABLE IF NOT EXISTS zeugnis_crawler_queue (
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
|
|
priority INTEGER DEFAULT 5,
|
|
status VARCHAR(20) DEFAULT 'pending',
|
|
started_at TIMESTAMP,
|
|
completed_at TIMESTAMP,
|
|
documents_found INTEGER DEFAULT 0,
|
|
documents_indexed INTEGER DEFAULT 0,
|
|
error_count INTEGER DEFAULT 0,
|
|
created_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_crawler_queue_status ON zeugnis_crawler_queue(status);
|
|
"""
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(create_tables_sql)
|
|
print("RAG metrics tables initialized")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to initialize metrics tables: {e}")
|
|
return False
|