[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
193
klausur-service/backend/metrics_db_zeugnis.py
Normal file
193
klausur-service/backend/metrics_db_zeugnis.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
PostgreSQL Metrics Database - Zeugnis Operations
|
||||
|
||||
Zeugnis source management, document queries, statistics, and event logging.
|
||||
|
||||
Extracted from metrics_db.py to keep files under 500 LOC.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
from metrics_db_core import get_pool
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Zeugnis Database Operations
|
||||
# =============================================================================
|
||||
|
||||
async def get_zeugnis_sources() -> List[Dict]:
|
||||
"""Get all zeugnis sources (Bundeslaender)."""
|
||||
pool = await get_pool()
|
||||
if pool is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id, bundesland, name, base_url, license_type, training_allowed,
|
||||
verified_by, verified_at, created_at, updated_at
|
||||
FROM zeugnis_sources
|
||||
ORDER BY bundesland
|
||||
"""
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
print(f"Failed to get zeugnis sources: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def upsert_zeugnis_source(
|
||||
id: str,
|
||||
bundesland: str,
|
||||
name: str,
|
||||
license_type: str,
|
||||
training_allowed: bool,
|
||||
base_url: Optional[str] = None,
|
||||
verified_by: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Insert or update a zeugnis source."""
|
||||
pool = await get_pool()
|
||||
if pool is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
base_url = EXCLUDED.base_url,
|
||||
license_type = EXCLUDED.license_type,
|
||||
training_allowed = EXCLUDED.training_allowed,
|
||||
verified_by = EXCLUDED.verified_by,
|
||||
verified_at = NOW(),
|
||||
updated_at = NOW()
|
||||
""",
|
||||
id, bundesland, name, base_url, license_type, training_allowed, verified_by
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Failed to upsert zeugnis source: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def get_zeugnis_documents(
|
||||
bundesland: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> List[Dict]:
|
||||
"""Get zeugnis documents with optional filtering."""
|
||||
pool = await get_pool()
|
||||
if pool is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
if bundesland:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT d.*, s.bundesland, s.name as source_name
|
||||
FROM zeugnis_documents d
|
||||
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
||||
JOIN zeugnis_sources s ON u.source_id = s.id
|
||||
WHERE s.bundesland = $1
|
||||
ORDER BY d.created_at DESC
|
||||
LIMIT $2 OFFSET $3
|
||||
""",
|
||||
bundesland, limit, offset
|
||||
)
|
||||
else:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT d.*, s.bundesland, s.name as source_name
|
||||
FROM zeugnis_documents d
|
||||
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
||||
JOIN zeugnis_sources s ON u.source_id = s.id
|
||||
ORDER BY d.created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
""",
|
||||
limit, offset
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
print(f"Failed to get zeugnis documents: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def get_zeugnis_stats() -> Dict:
|
||||
"""Get zeugnis crawler statistics."""
|
||||
pool = await get_pool()
|
||||
if pool is None:
|
||||
return {"error": "Database not available"}
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources")
|
||||
documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents")
|
||||
|
||||
indexed = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true"
|
||||
)
|
||||
|
||||
training_allowed = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true"
|
||||
)
|
||||
|
||||
per_bundesland = await conn.fetch(
|
||||
"""
|
||||
SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count
|
||||
FROM zeugnis_sources s
|
||||
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
|
||||
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
|
||||
GROUP BY s.bundesland, s.name, s.training_allowed
|
||||
ORDER BY s.bundesland
|
||||
"""
|
||||
)
|
||||
|
||||
active_crawls = await conn.fetchval(
|
||||
"SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'"
|
||||
)
|
||||
|
||||
return {
|
||||
"total_sources": sources or 0,
|
||||
"total_documents": documents or 0,
|
||||
"indexed_documents": indexed or 0,
|
||||
"training_allowed_documents": training_allowed or 0,
|
||||
"active_crawls": active_crawls or 0,
|
||||
"per_bundesland": [dict(r) for r in per_bundesland],
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Failed to get zeugnis stats: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
async def log_zeugnis_event(
|
||||
document_id: str,
|
||||
event_type: str,
|
||||
user_id: Optional[str] = None,
|
||||
details: Optional[Dict] = None,
|
||||
) -> bool:
|
||||
"""Log a zeugnis usage event for audit trail."""
|
||||
pool = await get_pool()
|
||||
if pool is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
import json
|
||||
import uuid
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
""",
|
||||
str(uuid.uuid4()), document_id, event_type, user_id,
|
||||
json.dumps(details) if details else None
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Failed to log zeugnis event: {e}")
|
||||
return False
|
||||
Reference in New Issue
Block a user