Files
breakpilot-lehrer/klausur-service/backend/metrics_db_zeugnis.py
Benjamin Admin 34da9f4cda [split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00

194 lines
6.4 KiB
Python

"""
PostgreSQL Metrics Database - Zeugnis Operations
Zeugnis source management, document queries, statistics, and event logging.
Extracted from metrics_db.py to keep files under 500 LOC.
"""
from typing import Optional, List, Dict
from metrics_db_core import get_pool
# =============================================================================
# Zeugnis Database Operations
# =============================================================================
async def get_zeugnis_sources() -> List[Dict]:
"""Get all zeugnis sources (Bundeslaender)."""
pool = await get_pool()
if pool is None:
return []
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, bundesland, name, base_url, license_type, training_allowed,
verified_by, verified_at, created_at, updated_at
FROM zeugnis_sources
ORDER BY bundesland
"""
)
return [dict(r) for r in rows]
except Exception as e:
print(f"Failed to get zeugnis sources: {e}")
return []
async def upsert_zeugnis_source(
id: str,
bundesland: str,
name: str,
license_type: str,
training_allowed: bool,
base_url: Optional[str] = None,
verified_by: Optional[str] = None,
) -> bool:
"""Insert or update a zeugnis source."""
pool = await get_pool()
if pool is None:
return False
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
base_url = EXCLUDED.base_url,
license_type = EXCLUDED.license_type,
training_allowed = EXCLUDED.training_allowed,
verified_by = EXCLUDED.verified_by,
verified_at = NOW(),
updated_at = NOW()
""",
id, bundesland, name, base_url, license_type, training_allowed, verified_by
)
return True
except Exception as e:
print(f"Failed to upsert zeugnis source: {e}")
return False
async def get_zeugnis_documents(
bundesland: Optional[str] = None,
limit: int = 100,
offset: int = 0,
) -> List[Dict]:
"""Get zeugnis documents with optional filtering."""
pool = await get_pool()
if pool is None:
return []
try:
async with pool.acquire() as conn:
if bundesland:
rows = await conn.fetch(
"""
SELECT d.*, s.bundesland, s.name as source_name
FROM zeugnis_documents d
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
JOIN zeugnis_sources s ON u.source_id = s.id
WHERE s.bundesland = $1
ORDER BY d.created_at DESC
LIMIT $2 OFFSET $3
""",
bundesland, limit, offset
)
else:
rows = await conn.fetch(
"""
SELECT d.*, s.bundesland, s.name as source_name
FROM zeugnis_documents d
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
JOIN zeugnis_sources s ON u.source_id = s.id
ORDER BY d.created_at DESC
LIMIT $1 OFFSET $2
""",
limit, offset
)
return [dict(r) for r in rows]
except Exception as e:
print(f"Failed to get zeugnis documents: {e}")
return []
async def get_zeugnis_stats() -> Dict:
"""Get zeugnis crawler statistics."""
pool = await get_pool()
if pool is None:
return {"error": "Database not available"}
try:
async with pool.acquire() as conn:
sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources")
documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents")
indexed = await conn.fetchval(
"SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true"
)
training_allowed = await conn.fetchval(
"SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true"
)
per_bundesland = await conn.fetch(
"""
SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count
FROM zeugnis_sources s
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
GROUP BY s.bundesland, s.name, s.training_allowed
ORDER BY s.bundesland
"""
)
active_crawls = await conn.fetchval(
"SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'"
)
return {
"total_sources": sources or 0,
"total_documents": documents or 0,
"indexed_documents": indexed or 0,
"training_allowed_documents": training_allowed or 0,
"active_crawls": active_crawls or 0,
"per_bundesland": [dict(r) for r in per_bundesland],
}
except Exception as e:
print(f"Failed to get zeugnis stats: {e}")
return {"error": str(e)}
async def log_zeugnis_event(
document_id: str,
event_type: str,
user_id: Optional[str] = None,
details: Optional[Dict] = None,
) -> bool:
"""Log a zeugnis usage event for audit trail."""
pool = await get_pool()
if pool is None:
return False
try:
import json
import uuid
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details)
VALUES ($1, $2, $3, $4, $5)
""",
str(uuid.uuid4()), document_id, event_type, user_id,
json.dumps(details) if details else None
)
return True
except Exception as e:
print(f"Failed to log zeugnis event: {e}")
return False