[split-required] Split final 43 files (500-668 LOC) to complete refactoring

klausur-service (11 files):
- cv_gutter_repair, ocr_pipeline_regression, upload_api
- ocr_pipeline_sessions, smart_spell, nru_worksheet_generator
- ocr_pipeline_overlays, mail/aggregator, zeugnis_api
- cv_syllable_detect, self_rag

backend-lehrer (17 files):
- classroom_engine/suggestions, generators/quiz_generator
- worksheets_api, llm_gateway/comparison, state_engine_api
- classroom/models (→ 4 submodules), services/file_processor
- alerts_agent/api/wizard+digests+routes, content_generators/pdf
- classroom/routes/sessions, llm_gateway/inference
- classroom_engine/analytics, auth/keycloak_auth
- alerts_agent/processing/rule_engine, ai_processor/print_versions

agent-core (5 files):
- brain/memory_store, brain/knowledge_graph, brain/context_manager
- orchestrator/supervisor, sessions/session_manager

admin-lehrer (5 components):
- GridOverlay, StepGridReview, DevOpsPipelineSidebar
- DataFlowDiagram, sbom/wizard/page

website (2 files):
- DependencyMap, lehrer/abitur-archiv

Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 09:41:42 +02:00
parent 451365a312
commit bd4b956e3c
113 changed files with 13790 additions and 14148 deletions

View File

@@ -0,0 +1,232 @@
"""
Zeugnis API Sources — source and seed URL management endpoints.
Extracted from zeugnis_api.py for modularity.
"""
from typing import Optional, List
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from zeugnis_models import (
ZeugnisSourceCreate, ZeugnisSourceVerify,
SeedUrlCreate,
LicenseType, DocType,
BUNDESLAENDER,
generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland,
)
from metrics_db import (
get_zeugnis_sources, upsert_zeugnis_source, get_pool,
)
router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"])
# =============================================================================
# Sources Endpoints
# =============================================================================
@router.get("/sources", response_model=List[dict])
async def list_sources():
"""Get all zeugnis sources (Bundeslaender)."""
sources = await get_zeugnis_sources()
if not sources:
# Return default sources if none exist
return [
{
"id": None,
"bundesland": code,
"name": info["name"],
"base_url": None,
"license_type": str(get_license_for_bundesland(code).value),
"training_allowed": get_training_allowed(code),
"verified_by": None,
"verified_at": None,
"created_at": None,
"updated_at": None,
}
for code, info in BUNDESLAENDER.items()
]
return sources
@router.post("/sources", response_model=dict)
async def create_source(source: ZeugnisSourceCreate):
"""Create or update a zeugnis source."""
source_id = generate_id()
success = await upsert_zeugnis_source(
id=source_id,
bundesland=source.bundesland,
name=source.name,
license_type=source.license_type.value,
training_allowed=source.training_allowed,
base_url=source.base_url,
)
if not success:
raise HTTPException(status_code=500, detail="Failed to create source")
return {"id": source_id, "success": True}
@router.put("/sources/{source_id}/verify", response_model=dict)
async def verify_source(source_id: str, verification: ZeugnisSourceVerify):
"""Verify a source's license status."""
pool = await get_pool()
if not pool:
raise HTTPException(status_code=503, detail="Database not available")
try:
async with pool.acquire() as conn:
await conn.execute(
"""
UPDATE zeugnis_sources
SET license_type = $2,
training_allowed = $3,
verified_by = $4,
verified_at = NOW(),
updated_at = NOW()
WHERE id = $1
""",
source_id, verification.license_type.value,
verification.training_allowed, verification.verified_by
)
return {"success": True, "source_id": source_id}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/sources/{bundesland}", response_model=dict)
async def get_source_by_bundesland(bundesland: str):
"""Get source details for a specific Bundesland."""
pool = await get_pool()
if not pool:
# Return default info
if bundesland not in BUNDESLAENDER:
raise HTTPException(status_code=404, detail=f"Bundesland not found: {bundesland}")
return {
"bundesland": bundesland,
"name": get_bundesland_name(bundesland),
"training_allowed": get_training_allowed(bundesland),
"license_type": get_license_for_bundesland(bundesland).value,
"document_count": 0,
}
try:
async with pool.acquire() as conn:
source = await conn.fetchrow(
"SELECT * FROM zeugnis_sources WHERE bundesland = $1",
bundesland
)
if source:
doc_count = await conn.fetchval(
"""
SELECT COUNT(*) FROM zeugnis_documents d
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
WHERE u.source_id = $1
""",
source["id"]
)
return {**dict(source), "document_count": doc_count or 0}
# Return default
return {
"bundesland": bundesland,
"name": get_bundesland_name(bundesland),
"training_allowed": get_training_allowed(bundesland),
"license_type": get_license_for_bundesland(bundesland).value,
"document_count": 0,
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# Seed URLs Endpoints
# =============================================================================
@router.get("/sources/{source_id}/urls", response_model=List[dict])
async def list_seed_urls(source_id: str):
"""Get all seed URLs for a source."""
pool = await get_pool()
if not pool:
return []
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT * FROM zeugnis_seed_urls WHERE source_id = $1 ORDER BY created_at",
source_id
)
return [dict(r) for r in rows]
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/sources/{source_id}/urls", response_model=dict)
async def add_seed_url(source_id: str, seed_url: SeedUrlCreate):
"""Add a new seed URL to a source."""
pool = await get_pool()
if not pool:
raise HTTPException(status_code=503, detail="Database not available")
url_id = generate_id()
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO zeugnis_seed_urls (id, source_id, url, doc_type, status)
VALUES ($1, $2, $3, $4, 'pending')
""",
url_id, source_id, seed_url.url, seed_url.doc_type.value
)
return {"id": url_id, "success": True}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/urls/{url_id}", response_model=dict)
async def delete_seed_url(url_id: str):
"""Delete a seed URL."""
pool = await get_pool()
if not pool:
raise HTTPException(status_code=503, detail="Database not available")
try:
async with pool.acquire() as conn:
await conn.execute(
"DELETE FROM zeugnis_seed_urls WHERE id = $1",
url_id
)
return {"success": True}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# =============================================================================
# Initialization Endpoint
# =============================================================================
@router.post("/init", response_model=dict)
async def initialize_sources():
"""Initialize default sources from BUNDESLAENDER."""
pool = await get_pool()
if not pool:
raise HTTPException(status_code=503, detail="Database not available")
created = 0
try:
for code, info in BUNDESLAENDER.items():
source_id = generate_id()
success = await upsert_zeugnis_source(
id=source_id,
bundesland=code,
name=info["name"],
license_type=get_license_for_bundesland(code).value,
training_allowed=get_training_allowed(code),
)
if success:
created += 1
return {"success": True, "sources_created": created}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))