Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
194 lines
6.4 KiB
Python
194 lines
6.4 KiB
Python
"""
|
|
PostgreSQL Metrics Database - Zeugnis Operations
|
|
|
|
Zeugnis source management, document queries, statistics, and event logging.
|
|
|
|
Extracted from metrics_db.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
from typing import Optional, List, Dict
|
|
|
|
from .db_core import get_pool
|
|
|
|
|
|
# =============================================================================
|
|
# Zeugnis Database Operations
|
|
# =============================================================================
|
|
|
|
async def get_zeugnis_sources() -> List[Dict]:
|
|
"""Get all zeugnis sources (Bundeslaender)."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT id, bundesland, name, base_url, license_type, training_allowed,
|
|
verified_by, verified_at, created_at, updated_at
|
|
FROM zeugnis_sources
|
|
ORDER BY bundesland
|
|
"""
|
|
)
|
|
return [dict(r) for r in rows]
|
|
except Exception as e:
|
|
print(f"Failed to get zeugnis sources: {e}")
|
|
return []
|
|
|
|
|
|
async def upsert_zeugnis_source(
|
|
id: str,
|
|
bundesland: str,
|
|
name: str,
|
|
license_type: str,
|
|
training_allowed: bool,
|
|
base_url: Optional[str] = None,
|
|
verified_by: Optional[str] = None,
|
|
) -> bool:
|
|
"""Insert or update a zeugnis source."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
base_url = EXCLUDED.base_url,
|
|
license_type = EXCLUDED.license_type,
|
|
training_allowed = EXCLUDED.training_allowed,
|
|
verified_by = EXCLUDED.verified_by,
|
|
verified_at = NOW(),
|
|
updated_at = NOW()
|
|
""",
|
|
id, bundesland, name, base_url, license_type, training_allowed, verified_by
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to upsert zeugnis source: {e}")
|
|
return False
|
|
|
|
|
|
async def get_zeugnis_documents(
|
|
bundesland: Optional[str] = None,
|
|
limit: int = 100,
|
|
offset: int = 0,
|
|
) -> List[Dict]:
|
|
"""Get zeugnis documents with optional filtering."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
if bundesland:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT d.*, s.bundesland, s.name as source_name
|
|
FROM zeugnis_documents d
|
|
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
|
JOIN zeugnis_sources s ON u.source_id = s.id
|
|
WHERE s.bundesland = $1
|
|
ORDER BY d.created_at DESC
|
|
LIMIT $2 OFFSET $3
|
|
""",
|
|
bundesland, limit, offset
|
|
)
|
|
else:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT d.*, s.bundesland, s.name as source_name
|
|
FROM zeugnis_documents d
|
|
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
|
JOIN zeugnis_sources s ON u.source_id = s.id
|
|
ORDER BY d.created_at DESC
|
|
LIMIT $1 OFFSET $2
|
|
""",
|
|
limit, offset
|
|
)
|
|
return [dict(r) for r in rows]
|
|
except Exception as e:
|
|
print(f"Failed to get zeugnis documents: {e}")
|
|
return []
|
|
|
|
|
|
async def get_zeugnis_stats() -> Dict:
|
|
"""Get zeugnis crawler statistics."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return {"error": "Database not available"}
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources")
|
|
documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents")
|
|
|
|
indexed = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true"
|
|
)
|
|
|
|
training_allowed = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true"
|
|
)
|
|
|
|
per_bundesland = await conn.fetch(
|
|
"""
|
|
SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count
|
|
FROM zeugnis_sources s
|
|
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
|
|
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
|
|
GROUP BY s.bundesland, s.name, s.training_allowed
|
|
ORDER BY s.bundesland
|
|
"""
|
|
)
|
|
|
|
active_crawls = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'"
|
|
)
|
|
|
|
return {
|
|
"total_sources": sources or 0,
|
|
"total_documents": documents or 0,
|
|
"indexed_documents": indexed or 0,
|
|
"training_allowed_documents": training_allowed or 0,
|
|
"active_crawls": active_crawls or 0,
|
|
"per_bundesland": [dict(r) for r in per_bundesland],
|
|
}
|
|
except Exception as e:
|
|
print(f"Failed to get zeugnis stats: {e}")
|
|
return {"error": str(e)}
|
|
|
|
|
|
async def log_zeugnis_event(
|
|
document_id: str,
|
|
event_type: str,
|
|
user_id: Optional[str] = None,
|
|
details: Optional[Dict] = None,
|
|
) -> bool:
|
|
"""Log a zeugnis usage event for audit trail."""
|
|
pool = await get_pool()
|
|
if pool is None:
|
|
return False
|
|
|
|
try:
|
|
import json
|
|
import uuid
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
""",
|
|
str(uuid.uuid4()), document_id, event_type, user_id,
|
|
json.dumps(details) if details else None
|
|
)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Failed to log zeugnis event: {e}")
|
|
return False
|