[split-required] Split final 43 files (500-668 LOC) to complete refactoring
klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
232
klausur-service/backend/zeugnis_api_sources.py
Normal file
232
klausur-service/backend/zeugnis_api_sources.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Zeugnis API Sources — source and seed URL management endpoints.
|
||||
|
||||
Extracted from zeugnis_api.py for modularity.
|
||||
"""
|
||||
|
||||
from typing import Optional, List
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from zeugnis_models import (
|
||||
ZeugnisSourceCreate, ZeugnisSourceVerify,
|
||||
SeedUrlCreate,
|
||||
LicenseType, DocType,
|
||||
BUNDESLAENDER,
|
||||
generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland,
|
||||
)
|
||||
from metrics_db import (
|
||||
get_zeugnis_sources, upsert_zeugnis_source, get_pool,
|
||||
)
|
||||
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sources Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/sources", response_model=List[dict])
|
||||
async def list_sources():
|
||||
"""Get all zeugnis sources (Bundeslaender)."""
|
||||
sources = await get_zeugnis_sources()
|
||||
if not sources:
|
||||
# Return default sources if none exist
|
||||
return [
|
||||
{
|
||||
"id": None,
|
||||
"bundesland": code,
|
||||
"name": info["name"],
|
||||
"base_url": None,
|
||||
"license_type": str(get_license_for_bundesland(code).value),
|
||||
"training_allowed": get_training_allowed(code),
|
||||
"verified_by": None,
|
||||
"verified_at": None,
|
||||
"created_at": None,
|
||||
"updated_at": None,
|
||||
}
|
||||
for code, info in BUNDESLAENDER.items()
|
||||
]
|
||||
return sources
|
||||
|
||||
|
||||
@router.post("/sources", response_model=dict)
|
||||
async def create_source(source: ZeugnisSourceCreate):
|
||||
"""Create or update a zeugnis source."""
|
||||
source_id = generate_id()
|
||||
success = await upsert_zeugnis_source(
|
||||
id=source_id,
|
||||
bundesland=source.bundesland,
|
||||
name=source.name,
|
||||
license_type=source.license_type.value,
|
||||
training_allowed=source.training_allowed,
|
||||
base_url=source.base_url,
|
||||
)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to create source")
|
||||
return {"id": source_id, "success": True}
|
||||
|
||||
|
||||
@router.put("/sources/{source_id}/verify", response_model=dict)
|
||||
async def verify_source(source_id: str, verification: ZeugnisSourceVerify):
|
||||
"""Verify a source's license status."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
UPDATE zeugnis_sources
|
||||
SET license_type = $2,
|
||||
training_allowed = $3,
|
||||
verified_by = $4,
|
||||
verified_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""",
|
||||
source_id, verification.license_type.value,
|
||||
verification.training_allowed, verification.verified_by
|
||||
)
|
||||
return {"success": True, "source_id": source_id}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/sources/{bundesland}", response_model=dict)
|
||||
async def get_source_by_bundesland(bundesland: str):
|
||||
"""Get source details for a specific Bundesland."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
# Return default info
|
||||
if bundesland not in BUNDESLAENDER:
|
||||
raise HTTPException(status_code=404, detail=f"Bundesland not found: {bundesland}")
|
||||
return {
|
||||
"bundesland": bundesland,
|
||||
"name": get_bundesland_name(bundesland),
|
||||
"training_allowed": get_training_allowed(bundesland),
|
||||
"license_type": get_license_for_bundesland(bundesland).value,
|
||||
"document_count": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
source = await conn.fetchrow(
|
||||
"SELECT * FROM zeugnis_sources WHERE bundesland = $1",
|
||||
bundesland
|
||||
)
|
||||
if source:
|
||||
doc_count = await conn.fetchval(
|
||||
"""
|
||||
SELECT COUNT(*) FROM zeugnis_documents d
|
||||
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
||||
WHERE u.source_id = $1
|
||||
""",
|
||||
source["id"]
|
||||
)
|
||||
return {**dict(source), "document_count": doc_count or 0}
|
||||
|
||||
# Return default
|
||||
return {
|
||||
"bundesland": bundesland,
|
||||
"name": get_bundesland_name(bundesland),
|
||||
"training_allowed": get_training_allowed(bundesland),
|
||||
"license_type": get_license_for_bundesland(bundesland).value,
|
||||
"document_count": 0,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Seed URLs Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/sources/{source_id}/urls", response_model=List[dict])
|
||||
async def list_seed_urls(source_id: str):
|
||||
"""Get all seed URLs for a source."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT * FROM zeugnis_seed_urls WHERE source_id = $1 ORDER BY created_at",
|
||||
source_id
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/sources/{source_id}/urls", response_model=dict)
|
||||
async def add_seed_url(source_id: str, seed_url: SeedUrlCreate):
|
||||
"""Add a new seed URL to a source."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
url_id = generate_id()
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO zeugnis_seed_urls (id, source_id, url, doc_type, status)
|
||||
VALUES ($1, $2, $3, $4, 'pending')
|
||||
""",
|
||||
url_id, source_id, seed_url.url, seed_url.doc_type.value
|
||||
)
|
||||
return {"id": url_id, "success": True}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/urls/{url_id}", response_model=dict)
|
||||
async def delete_seed_url(url_id: str):
|
||||
"""Delete a seed URL."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"DELETE FROM zeugnis_seed_urls WHERE id = $1",
|
||||
url_id
|
||||
)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Initialization Endpoint
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/init", response_model=dict)
|
||||
async def initialize_sources():
|
||||
"""Initialize default sources from BUNDESLAENDER."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
created = 0
|
||||
try:
|
||||
for code, info in BUNDESLAENDER.items():
|
||||
source_id = generate_id()
|
||||
success = await upsert_zeugnis_source(
|
||||
id=source_id,
|
||||
bundesland=code,
|
||||
name=info["name"],
|
||||
license_type=get_license_for_bundesland(code).value,
|
||||
training_allowed=get_training_allowed(code),
|
||||
)
|
||||
if success:
|
||||
created += 1
|
||||
|
||||
return {"success": True, "sources_created": created}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user