[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,182 @@
"""
PostgreSQL Metrics Database - Schema Initialization
Table creation DDL for all metrics, feedback, and zeugnis tables.
Extracted from metrics_db_core.py to keep files under 500 LOC.
"""
from metrics_db_core import get_pool
async def init_metrics_tables() -> bool:
"""Initialize metrics tables in PostgreSQL."""
pool = await get_pool()
if pool is None:
return False
create_tables_sql = """
-- RAG Search Feedback Table
CREATE TABLE IF NOT EXISTS rag_search_feedback (
id SERIAL PRIMARY KEY,
result_id VARCHAR(255) NOT NULL,
query_text TEXT,
collection_name VARCHAR(100),
score FLOAT,
rating INTEGER CHECK (rating >= 1 AND rating <= 5),
notes TEXT,
user_id VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW()
);
-- Index for efficient querying
CREATE INDEX IF NOT EXISTS idx_feedback_created_at ON rag_search_feedback(created_at);
CREATE INDEX IF NOT EXISTS idx_feedback_collection ON rag_search_feedback(collection_name);
CREATE INDEX IF NOT EXISTS idx_feedback_rating ON rag_search_feedback(rating);
-- RAG Search Logs Table (for latency tracking)
CREATE TABLE IF NOT EXISTS rag_search_logs (
id SERIAL PRIMARY KEY,
query_text TEXT NOT NULL,
collection_name VARCHAR(100),
result_count INTEGER,
latency_ms INTEGER,
top_score FLOAT,
filters JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_search_logs_created_at ON rag_search_logs(created_at);
-- RAG Upload History Table
CREATE TABLE IF NOT EXISTS rag_upload_history (
id SERIAL PRIMARY KEY,
filename VARCHAR(500) NOT NULL,
collection_name VARCHAR(100),
year INTEGER,
pdfs_extracted INTEGER,
minio_path VARCHAR(1000),
uploaded_by VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_upload_history_created_at ON rag_upload_history(created_at);
-- Binaere Relevanz-Judgments fuer echte Precision/Recall
CREATE TABLE IF NOT EXISTS rag_relevance_judgments (
id SERIAL PRIMARY KEY,
query_id VARCHAR(255) NOT NULL,
query_text TEXT NOT NULL,
result_id VARCHAR(255) NOT NULL,
result_rank INTEGER,
is_relevant BOOLEAN NOT NULL,
collection_name VARCHAR(100),
user_id VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_relevance_query ON rag_relevance_judgments(query_id);
CREATE INDEX IF NOT EXISTS idx_relevance_created_at ON rag_relevance_judgments(created_at);
-- Zeugnisse Source Tracking
CREATE TABLE IF NOT EXISTS zeugnis_sources (
id VARCHAR(36) PRIMARY KEY,
bundesland VARCHAR(10) NOT NULL,
name VARCHAR(255) NOT NULL,
base_url TEXT,
license_type VARCHAR(50) NOT NULL,
training_allowed BOOLEAN DEFAULT FALSE,
verified_by VARCHAR(100),
verified_at TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_sources_bundesland ON zeugnis_sources(bundesland);
-- Zeugnisse Seed URLs
CREATE TABLE IF NOT EXISTS zeugnis_seed_urls (
id VARCHAR(36) PRIMARY KEY,
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
url TEXT NOT NULL,
doc_type VARCHAR(50),
status VARCHAR(20) DEFAULT 'pending',
last_crawled TIMESTAMP,
error_message TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_source ON zeugnis_seed_urls(source_id);
CREATE INDEX IF NOT EXISTS idx_zeugnis_seed_urls_status ON zeugnis_seed_urls(status);
-- Zeugnisse Documents
CREATE TABLE IF NOT EXISTS zeugnis_documents (
id VARCHAR(36) PRIMARY KEY,
seed_url_id VARCHAR(36) REFERENCES zeugnis_seed_urls(id),
title VARCHAR(500),
url TEXT NOT NULL,
content_hash VARCHAR(64),
minio_path TEXT,
training_allowed BOOLEAN DEFAULT FALSE,
indexed_in_qdrant BOOLEAN DEFAULT FALSE,
file_size INTEGER,
content_type VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_seed ON zeugnis_documents(seed_url_id);
CREATE INDEX IF NOT EXISTS idx_zeugnis_documents_hash ON zeugnis_documents(content_hash);
-- Zeugnisse Document Versions
CREATE TABLE IF NOT EXISTS zeugnis_document_versions (
id VARCHAR(36) PRIMARY KEY,
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
version INTEGER NOT NULL,
content_hash VARCHAR(64),
minio_path TEXT,
change_summary TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_versions_doc ON zeugnis_document_versions(document_id);
-- Zeugnisse Usage Events (Audit Trail)
CREATE TABLE IF NOT EXISTS zeugnis_usage_events (
id VARCHAR(36) PRIMARY KEY,
document_id VARCHAR(36) REFERENCES zeugnis_documents(id),
event_type VARCHAR(50) NOT NULL,
user_id VARCHAR(100),
details JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_doc ON zeugnis_usage_events(document_id);
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_type ON zeugnis_usage_events(event_type);
CREATE INDEX IF NOT EXISTS idx_zeugnis_events_created ON zeugnis_usage_events(created_at);
-- Crawler Queue
CREATE TABLE IF NOT EXISTS zeugnis_crawler_queue (
id VARCHAR(36) PRIMARY KEY,
source_id VARCHAR(36) REFERENCES zeugnis_sources(id),
priority INTEGER DEFAULT 5,
status VARCHAR(20) DEFAULT 'pending',
started_at TIMESTAMP,
completed_at TIMESTAMP,
documents_found INTEGER DEFAULT 0,
documents_indexed INTEGER DEFAULT 0,
error_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_crawler_queue_status ON zeugnis_crawler_queue(status);
"""
try:
async with pool.acquire() as conn:
await conn.execute(create_tables_sql)
print("RAG metrics tables initialized")
return True
except Exception as e:
print(f"Failed to initialize metrics tables: {e}")
return False