backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
194 lines
6.4 KiB
Python
194 lines
6.4 KiB
Python
"""
|
|
PostgreSQL Metrics Database - Zeugnis Operations
|
|
|
|
Zeugnis source management, document queries, statistics, and event logging.
|
|
|
|
Extracted from metrics_db.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
from typing import Optional, List, Dict
|
|
|
|
from metrics_db_core import get_pool
|
|
|
|
|
|
# =============================================================================
|
|
# Zeugnis Database Operations
|
|
# =============================================================================
|
|
|
|
async def get_zeugnis_sources() -> List[Dict]:
|
|
"""Get all zeugnis sources (Bundeslaender)."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT id, bundesland, name, base_url, license_type, training_allowed,
|
|
verified_by, verified_at, created_at, updated_at
|
|
FROM zeugnis_sources
|
|
ORDER BY bundesland
|
|
"""
|
|
)
|
|
return [dict(r) for r in rows]
|
|
except Exception as e:
|
|
print(f"Failed to get zeugnis sources: {e}")
|
|
return []
|
|
|
|
|
|
async def upsert_zeugnis_source(
|
|
id: str,
|
|
bundesland: str,
|
|
name: str,
|
|
license_type: str,
|
|
training_allowed: bool,
|
|
base_url: Optional[str] = None,
|
|
verified_by: Optional[str] = None,
|
|
) -> bool:
|
|
"""Insert or update a zeugnis source."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
base_url = EXCLUDED.base_url,
|
|
license_type = EXCLUDED.license_type,
|
|
training_allowed = EXCLUDED.training_allowed,
|
|
verified_by = EXCLUDED.verified_by,
|
|
verified_at = NOW(),
|
|
updated_at = NOW()
|
|
""",
|
|
id, bundesland, name, base_url, license_type, training_allowed, verified_by
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to upsert zeugnis source: {e}")
|
|
return False
|
|
|
|
|
|
async def get_zeugnis_documents(
|
|
bundesland: Optional[str] = None,
|
|
limit: int = 100,
|
|
offset: int = 0,
|
|
) -> List[Dict]:
|
|
"""Get zeugnis documents with optional filtering."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
if bundesland:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT d.*, s.bundesland, s.name as source_name
|
|
FROM zeugnis_documents d
|
|
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
|
JOIN zeugnis_sources s ON u.source_id = s.id
|
|
WHERE s.bundesland = $1
|
|
ORDER BY d.created_at DESC
|
|
LIMIT $2 OFFSET $3
|
|
""",
|
|
bundesland, limit, offset
|
|
)
|
|
else:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT d.*, s.bundesland, s.name as source_name
|
|
FROM zeugnis_documents d
|
|
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
|
JOIN zeugnis_sources s ON u.source_id = s.id
|
|
ORDER BY d.created_at DESC
|
|
LIMIT $1 OFFSET $2
|
|
""",
|
|
limit, offset
|
|
)
|
|
return [dict(r) for r in rows]
|
|
except Exception as e:
|
|
print(f"Failed to get zeugnis documents: {e}")
|
|
return []
|
|
|
|
|
|
async def get_zeugnis_stats() -> Dict:
|
|
"""Get zeugnis crawler statistics."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return {"error": "Database not available"}
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources")
|
|
documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents")
|
|
|
|
indexed = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true"
|
|
)
|
|
|
|
training_allowed = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true"
|
|
)
|
|
|
|
per_bundesland = await conn.fetch(
|
|
"""
|
|
SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count
|
|
FROM zeugnis_sources s
|
|
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
|
|
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
|
|
GROUP BY s.bundesland, s.name, s.training_allowed
|
|
ORDER BY s.bundesland
|
|
"""
|
|
)
|
|
|
|
active_crawls = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'"
|
|
)
|
|
|
|
return {
|
|
"total_sources": sources or 0,
|
|
"total_documents": documents or 0,
|
|
"indexed_documents": indexed or 0,
|
|
"training_allowed_documents": training_allowed or 0,
|
|
"active_crawls": active_crawls or 0,
|
|
"per_bundesland": [dict(r) for r in per_bundesland],
|
|
}
|
|
except Exception as e:
|
|
print(f"Failed to get zeugnis stats: {e}")
|
|
return {"error": str(e)}
|
|
|
|
|
|
async def log_zeugnis_event(
|
|
document_id: str,
|
|
event_type: str,
|
|
user_id: Optional[str] = None,
|
|
details: Optional[Dict] = None,
|
|
) -> bool:
|
|
"""Log a zeugnis usage event for audit trail."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
import json
|
|
import uuid
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
""",
|
|
str(uuid.uuid4()), document_id, event_type, user_id,
|
|
json.dumps(details) if details else None
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to log zeugnis event: {e}")
|
|
return False
|