klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
233 lines
7.9 KiB
Python
233 lines
7.9 KiB
Python
"""
|
|
Zeugnis API Sources — source and seed URL management endpoints.
|
|
|
|
Extracted from zeugnis_api.py for modularity.
|
|
"""
|
|
|
|
from typing import Optional, List
|
|
from fastapi import APIRouter, HTTPException
|
|
from pydantic import BaseModel
|
|
|
|
from zeugnis_models import (
|
|
ZeugnisSourceCreate, ZeugnisSourceVerify,
|
|
SeedUrlCreate,
|
|
LicenseType, DocType,
|
|
BUNDESLAENDER,
|
|
generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland,
|
|
)
|
|
from metrics_db import (
|
|
get_zeugnis_sources, upsert_zeugnis_source, get_pool,
|
|
)
|
|
|
|
|
|
router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"])
|
|
|
|
|
|
# =============================================================================
|
|
# Sources Endpoints
|
|
# =============================================================================
|
|
|
|
@router.get("/sources", response_model=List[dict])
|
|
async def list_sources():
|
|
"""Get all zeugnis sources (Bundeslaender)."""
|
|
sources = await get_zeugnis_sources()
|
|
if not sources:
|
|
# Return default sources if none exist
|
|
return [
|
|
{
|
|
"id": None,
|
|
"bundesland": code,
|
|
"name": info["name"],
|
|
"base_url": None,
|
|
"license_type": str(get_license_for_bundesland(code).value),
|
|
"training_allowed": get_training_allowed(code),
|
|
"verified_by": None,
|
|
"verified_at": None,
|
|
"created_at": None,
|
|
"updated_at": None,
|
|
}
|
|
for code, info in BUNDESLAENDER.items()
|
|
]
|
|
return sources
|
|
|
|
|
|
@router.post("/sources", response_model=dict)
|
|
async def create_source(source: ZeugnisSourceCreate):
|
|
"""Create or update a zeugnis source."""
|
|
source_id = generate_id()
|
|
success = await upsert_zeugnis_source(
|
|
id=source_id,
|
|
bundesland=source.bundesland,
|
|
name=source.name,
|
|
license_type=source.license_type.value,
|
|
training_allowed=source.training_allowed,
|
|
base_url=source.base_url,
|
|
)
|
|
if not success:
|
|
raise HTTPException(status_code=500, detail="Failed to create source")
|
|
return {"id": source_id, "success": True}
|
|
|
|
|
|
@router.put("/sources/{source_id}/verify", response_model=dict)
|
|
async def verify_source(source_id: str, verification: ZeugnisSourceVerify):
|
|
"""Verify a source's license status."""
|
|
pool = await get_pool()
|
|
if not pool:
|
|
raise HTTPException(status_code=503, detail="Database not available")
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
UPDATE zeugnis_sources
|
|
SET license_type = $2,
|
|
training_allowed = $3,
|
|
verified_by = $4,
|
|
verified_at = NOW(),
|
|
updated_at = NOW()
|
|
WHERE id = $1
|
|
""",
|
|
source_id, verification.license_type.value,
|
|
verification.training_allowed, verification.verified_by
|
|
)
|
|
return {"success": True, "source_id": source_id}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/sources/{bundesland}", response_model=dict)
|
|
async def get_source_by_bundesland(bundesland: str):
|
|
"""Get source details for a specific Bundesland."""
|
|
pool = await get_pool()
|
|
if not pool:
|
|
# Return default info
|
|
if bundesland not in BUNDESLAENDER:
|
|
raise HTTPException(status_code=404, detail=f"Bundesland not found: {bundesland}")
|
|
return {
|
|
"bundesland": bundesland,
|
|
"name": get_bundesland_name(bundesland),
|
|
"training_allowed": get_training_allowed(bundesland),
|
|
"license_type": get_license_for_bundesland(bundesland).value,
|
|
"document_count": 0,
|
|
}
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
source = await conn.fetchrow(
|
|
"SELECT * FROM zeugnis_sources WHERE bundesland = $1",
|
|
bundesland
|
|
)
|
|
if source:
|
|
doc_count = await conn.fetchval(
|
|
"""
|
|
SELECT COUNT(*) FROM zeugnis_documents d
|
|
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
|
WHERE u.source_id = $1
|
|
""",
|
|
source["id"]
|
|
)
|
|
return {**dict(source), "document_count": doc_count or 0}
|
|
|
|
# Return default
|
|
return {
|
|
"bundesland": bundesland,
|
|
"name": get_bundesland_name(bundesland),
|
|
"training_allowed": get_training_allowed(bundesland),
|
|
"license_type": get_license_for_bundesland(bundesland).value,
|
|
"document_count": 0,
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# =============================================================================
|
|
# Seed URLs Endpoints
|
|
# =============================================================================
|
|
|
|
@router.get("/sources/{source_id}/urls", response_model=List[dict])
|
|
async def list_seed_urls(source_id: str):
|
|
"""Get all seed URLs for a source."""
|
|
pool = await get_pool()
|
|
if not pool:
|
|
return []
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"SELECT * FROM zeugnis_seed_urls WHERE source_id = $1 ORDER BY created_at",
|
|
source_id
|
|
)
|
|
return [dict(r) for r in rows]
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/sources/{source_id}/urls", response_model=dict)
|
|
async def add_seed_url(source_id: str, seed_url: SeedUrlCreate):
|
|
"""Add a new seed URL to a source."""
|
|
pool = await get_pool()
|
|
if not pool:
|
|
raise HTTPException(status_code=503, detail="Database not available")
|
|
|
|
url_id = generate_id()
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO zeugnis_seed_urls (id, source_id, url, doc_type, status)
|
|
VALUES ($1, $2, $3, $4, 'pending')
|
|
""",
|
|
url_id, source_id, seed_url.url, seed_url.doc_type.value
|
|
)
|
|
return {"id": url_id, "success": True}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.delete("/urls/{url_id}", response_model=dict)
|
|
async def delete_seed_url(url_id: str):
|
|
"""Delete a seed URL."""
|
|
pool = await get_pool()
|
|
if not pool:
|
|
raise HTTPException(status_code=503, detail="Database not available")
|
|
|
|
try:
|
|
async with pool.acquire() as conn:
|
|
await conn.execute(
|
|
"DELETE FROM zeugnis_seed_urls WHERE id = $1",
|
|
url_id
|
|
)
|
|
return {"success": True}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# =============================================================================
|
|
# Initialization Endpoint
|
|
# =============================================================================
|
|
|
|
@router.post("/init", response_model=dict)
|
|
async def initialize_sources():
|
|
"""Initialize default sources from BUNDESLAENDER."""
|
|
pool = await get_pool()
|
|
if not pool:
|
|
raise HTTPException(status_code=503, detail="Database not available")
|
|
|
|
created = 0
|
|
try:
|
|
for code, info in BUNDESLAENDER.items():
|
|
source_id = generate_id()
|
|
success = await upsert_zeugnis_source(
|
|
id=source_id,
|
|
bundesland=code,
|
|
name=info["name"],
|
|
license_type=get_license_for_bundesland(code).value,
|
|
training_allowed=get_training_allowed(code),
|
|
)
|
|
if success:
|
|
created += 1
|
|
|
|
return {"success": True, "sources_created": created}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|