""" PostgreSQL Metrics Database - Zeugnis Operations Zeugnis source management, document queries, statistics, and event logging. Extracted from metrics_db.py to keep files under 500 LOC. """ from typing import Optional, List, Dict from metrics_db_core import get_pool # ============================================================================= # Zeugnis Database Operations # ============================================================================= async def get_zeugnis_sources() -> List[Dict]: """Get all zeugnis sources (Bundeslaender).""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at, created_at, updated_at FROM zeugnis_sources ORDER BY bundesland """ ) return [dict(r) for r in rows] except Exception as e: print(f"Failed to get zeugnis sources: {e}") return [] async def upsert_zeugnis_source( id: str, bundesland: str, name: str, license_type: str, training_allowed: bool, base_url: Optional[str] = None, verified_by: Optional[str] = None, ) -> bool: """Insert or update a zeugnis source.""" pool = await get_pool() if pool is None: return False try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO zeugnis_sources (id, bundesland, name, base_url, license_type, training_allowed, verified_by, verified_at) VALUES ($1, $2, $3, $4, $5, $6, $7, NOW()) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, base_url = EXCLUDED.base_url, license_type = EXCLUDED.license_type, training_allowed = EXCLUDED.training_allowed, verified_by = EXCLUDED.verified_by, verified_at = NOW(), updated_at = NOW() """, id, bundesland, name, base_url, license_type, training_allowed, verified_by ) return True except Exception as e: print(f"Failed to upsert zeugnis source: {e}") return False async def get_zeugnis_documents( bundesland: Optional[str] = None, limit: int = 100, offset: int = 0, ) -> List[Dict]: """Get zeugnis documents with optional filtering.""" pool = await get_pool() if pool is None: return [] try: async with pool.acquire() as conn: if bundesland: rows = await conn.fetch( """ SELECT d.*, s.bundesland, s.name as source_name FROM zeugnis_documents d JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id JOIN zeugnis_sources s ON u.source_id = s.id WHERE s.bundesland = $1 ORDER BY d.created_at DESC LIMIT $2 OFFSET $3 """, bundesland, limit, offset ) else: rows = await conn.fetch( """ SELECT d.*, s.bundesland, s.name as source_name FROM zeugnis_documents d JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id JOIN zeugnis_sources s ON u.source_id = s.id ORDER BY d.created_at DESC LIMIT $1 OFFSET $2 """, limit, offset ) return [dict(r) for r in rows] except Exception as e: print(f"Failed to get zeugnis documents: {e}") return [] async def get_zeugnis_stats() -> Dict: """Get zeugnis crawler statistics.""" pool = await get_pool() if pool is None: return {"error": "Database not available"} try: async with pool.acquire() as conn: sources = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_sources") documents = await conn.fetchval("SELECT COUNT(*) FROM zeugnis_documents") indexed = await conn.fetchval( "SELECT COUNT(*) FROM zeugnis_documents WHERE indexed_in_qdrant = true" ) training_allowed = await conn.fetchval( "SELECT COUNT(*) FROM zeugnis_documents WHERE training_allowed = true" ) per_bundesland = await conn.fetch( """ SELECT s.bundesland, s.name, s.training_allowed, COUNT(d.id) as doc_count FROM zeugnis_sources s LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id GROUP BY s.bundesland, s.name, s.training_allowed ORDER BY s.bundesland """ ) active_crawls = await conn.fetchval( "SELECT COUNT(*) FROM zeugnis_crawler_queue WHERE status = 'running'" ) return { "total_sources": sources or 0, "total_documents": documents or 0, "indexed_documents": indexed or 0, "training_allowed_documents": training_allowed or 0, "active_crawls": active_crawls or 0, "per_bundesland": [dict(r) for r in per_bundesland], } except Exception as e: print(f"Failed to get zeugnis stats: {e}") return {"error": str(e)} async def log_zeugnis_event( document_id: str, event_type: str, user_id: Optional[str] = None, details: Optional[Dict] = None, ) -> bool: """Log a zeugnis usage event for audit trail.""" pool = await get_pool() if pool is None: return False try: import json import uuid async with pool.acquire() as conn: await conn.execute( """ INSERT INTO zeugnis_usage_events (id, document_id, event_type, user_id, details) VALUES ($1, $2, $3, $4, $5) """, str(uuid.uuid4()), document_id, event_type, user_id, json.dumps(details) if details else None ) return True except Exception as e: print(f"Failed to log zeugnis event: {e}") return False