""" Zeugnis API Sources — source and seed URL management endpoints. Extracted from zeugnis_api.py for modularity. """ from typing import Optional, List from fastapi import APIRouter, HTTPException from pydantic import BaseModel from zeugnis_models import ( ZeugnisSourceCreate, ZeugnisSourceVerify, SeedUrlCreate, LicenseType, DocType, BUNDESLAENDER, generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland, ) from metrics_db import ( get_zeugnis_sources, upsert_zeugnis_source, get_pool, ) router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"]) # ============================================================================= # Sources Endpoints # ============================================================================= @router.get("/sources", response_model=List[dict]) async def list_sources(): """Get all zeugnis sources (Bundeslaender).""" sources = await get_zeugnis_sources() if not sources: # Return default sources if none exist return [ { "id": None, "bundesland": code, "name": info["name"], "base_url": None, "license_type": str(get_license_for_bundesland(code).value), "training_allowed": get_training_allowed(code), "verified_by": None, "verified_at": None, "created_at": None, "updated_at": None, } for code, info in BUNDESLAENDER.items() ] return sources @router.post("/sources", response_model=dict) async def create_source(source: ZeugnisSourceCreate): """Create or update a zeugnis source.""" source_id = generate_id() success = await upsert_zeugnis_source( id=source_id, bundesland=source.bundesland, name=source.name, license_type=source.license_type.value, training_allowed=source.training_allowed, base_url=source.base_url, ) if not success: raise HTTPException(status_code=500, detail="Failed to create source") return {"id": source_id, "success": True} @router.put("/sources/{source_id}/verify", response_model=dict) async def verify_source(source_id: str, verification: ZeugnisSourceVerify): """Verify a source's license status.""" pool = await get_pool() if not pool: raise HTTPException(status_code=503, detail="Database not available") try: async with pool.acquire() as conn: await conn.execute( """ UPDATE zeugnis_sources SET license_type = $2, training_allowed = $3, verified_by = $4, verified_at = NOW(), updated_at = NOW() WHERE id = $1 """, source_id, verification.license_type.value, verification.training_allowed, verification.verified_by ) return {"success": True, "source_id": source_id} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.get("/sources/{bundesland}", response_model=dict) async def get_source_by_bundesland(bundesland: str): """Get source details for a specific Bundesland.""" pool = await get_pool() if not pool: # Return default info if bundesland not in BUNDESLAENDER: raise HTTPException(status_code=404, detail=f"Bundesland not found: {bundesland}") return { "bundesland": bundesland, "name": get_bundesland_name(bundesland), "training_allowed": get_training_allowed(bundesland), "license_type": get_license_for_bundesland(bundesland).value, "document_count": 0, } try: async with pool.acquire() as conn: source = await conn.fetchrow( "SELECT * FROM zeugnis_sources WHERE bundesland = $1", bundesland ) if source: doc_count = await conn.fetchval( """ SELECT COUNT(*) FROM zeugnis_documents d JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id WHERE u.source_id = $1 """, source["id"] ) return {**dict(source), "document_count": doc_count or 0} # Return default return { "bundesland": bundesland, "name": get_bundesland_name(bundesland), "training_allowed": get_training_allowed(bundesland), "license_type": get_license_for_bundesland(bundesland).value, "document_count": 0, } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # ============================================================================= # Seed URLs Endpoints # ============================================================================= @router.get("/sources/{source_id}/urls", response_model=List[dict]) async def list_seed_urls(source_id: str): """Get all seed URLs for a source.""" pool = await get_pool() if not pool: return [] try: async with pool.acquire() as conn: rows = await conn.fetch( "SELECT * FROM zeugnis_seed_urls WHERE source_id = $1 ORDER BY created_at", source_id ) return [dict(r) for r in rows] except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.post("/sources/{source_id}/urls", response_model=dict) async def add_seed_url(source_id: str, seed_url: SeedUrlCreate): """Add a new seed URL to a source.""" pool = await get_pool() if not pool: raise HTTPException(status_code=503, detail="Database not available") url_id = generate_id() try: async with pool.acquire() as conn: await conn.execute( """ INSERT INTO zeugnis_seed_urls (id, source_id, url, doc_type, status) VALUES ($1, $2, $3, $4, 'pending') """, url_id, source_id, seed_url.url, seed_url.doc_type.value ) return {"id": url_id, "success": True} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.delete("/urls/{url_id}", response_model=dict) async def delete_seed_url(url_id: str): """Delete a seed URL.""" pool = await get_pool() if not pool: raise HTTPException(status_code=503, detail="Database not available") try: async with pool.acquire() as conn: await conn.execute( "DELETE FROM zeugnis_seed_urls WHERE id = $1", url_id ) return {"success": True} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # ============================================================================= # Initialization Endpoint # ============================================================================= @router.post("/init", response_model=dict) async def initialize_sources(): """Initialize default sources from BUNDESLAENDER.""" pool = await get_pool() if not pool: raise HTTPException(status_code=503, detail="Database not available") created = 0 try: for code, info in BUNDESLAENDER.items(): source_id = generate_id() success = await upsert_zeugnis_source( id=source_id, bundesland=code, name=info["name"], license_type=get_license_for_bundesland(code).value, training_allowed=get_training_allowed(code), ) if success: created += 1 return {"success": True, "sources_created": created} except Exception as e: raise HTTPException(status_code=500, detail=str(e))