[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,386 @@
"""
EduSearch Seeds CRUD Routes.
List, get, create, update, delete, and bulk import for seed URLs.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from fastapi import APIRouter, HTTPException, Query
import asyncpg
from .edu_search_models import (
CategoryResponse,
SeedCreate,
SeedUpdate,
SeedResponse,
SeedsListResponse,
BulkImportRequest,
BulkImportResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["edu-search"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get("DATABASE_URL")
if not database_url:
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
@router.get("/categories", response_model=List[CategoryResponse])
async def list_categories():
"""List all seed categories."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, display_name, description, icon, sort_order, is_active
FROM edu_search_categories
WHERE is_active = TRUE
ORDER BY sort_order
""")
return [
CategoryResponse(
id=str(row["id"]),
name=row["name"],
display_name=row["display_name"],
description=row["description"],
icon=row["icon"],
sort_order=row["sort_order"],
is_active=row["is_active"],
)
for row in rows
]
@router.get("/seeds", response_model=SeedsListResponse)
async def list_seeds(
category: Optional[str] = Query(None, description="Filter by category name"),
state: Optional[str] = Query(None, description="Filter by state code"),
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
search: Optional[str] = Query(None, description="Search in name/url"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List seeds with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = []
params = []
param_idx = 1
if category:
conditions.append(f"c.name = ${param_idx}")
params.append(category)
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state)
param_idx += 1
if enabled is not None:
conditions.append(f"s.enabled = ${param_idx}")
params.append(enabled)
param_idx += 1
if search:
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
params.append(f"%{search}%")
param_idx += 1
where_clause = " AND ".join(conditions) if conditions else "TRUE"
# Count total
count_query = f"""
SELECT COUNT(*) FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Get paginated results
offset = (page - 1) * page_size
params.extend([page_size, offset])
query = f"""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
ORDER BY c.sort_order, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
rows = await conn.fetch(query, *params)
seeds = [_row_to_seed_response(row) for row in rows]
return SeedsListResponse(
seeds=seeds,
total=total,
page=page,
page_size=page_size,
)
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
async def get_seed(seed_id: str):
"""Get a single seed by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.id = $1
""", seed_id)
if not row:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return _row_to_seed_response(row)
@router.post("/seeds", response_model=SeedResponse, status_code=201)
async def create_seed(seed: SeedCreate):
"""Create a new seed URL."""
pool = await get_db_pool()
async with pool.acquire() as conn:
category_id = None
if seed.category_name:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
try:
row = await conn.fetchrow("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
RETURNING id, created_at, updated_at
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
except asyncpg.UniqueViolationError:
raise HTTPException(status_code=409, detail="URL existiert bereits")
return SeedResponse(
id=str(row["id"]),
url=seed.url,
name=seed.name,
description=seed.description,
category=seed.category_name,
category_display_name=None,
source_type=seed.source_type,
scope=seed.scope,
state=seed.state,
trust_boost=seed.trust_boost,
enabled=seed.enabled,
crawl_depth=seed.crawl_depth,
crawl_frequency=seed.crawl_frequency,
last_crawled_at=None,
last_crawl_status=None,
last_crawl_docs=0,
total_documents=0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
async def update_seed(seed_id: str, seed: SeedUpdate):
"""Update an existing seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
updates = []
params = []
param_idx = 1
if seed.url is not None:
updates.append(f"url = ${param_idx}")
params.append(seed.url)
param_idx += 1
if seed.name is not None:
updates.append(f"name = ${param_idx}")
params.append(seed.name)
param_idx += 1
if seed.description is not None:
updates.append(f"description = ${param_idx}")
params.append(seed.description)
param_idx += 1
if seed.category_name is not None:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
updates.append(f"category_id = ${param_idx}")
params.append(category_id)
param_idx += 1
if seed.source_type is not None:
updates.append(f"source_type = ${param_idx}")
params.append(seed.source_type)
param_idx += 1
if seed.scope is not None:
updates.append(f"scope = ${param_idx}")
params.append(seed.scope)
param_idx += 1
if seed.state is not None:
updates.append(f"state = ${param_idx}")
params.append(seed.state)
param_idx += 1
if seed.trust_boost is not None:
updates.append(f"trust_boost = ${param_idx}")
params.append(seed.trust_boost)
param_idx += 1
if seed.enabled is not None:
updates.append(f"enabled = ${param_idx}")
params.append(seed.enabled)
param_idx += 1
if seed.crawl_depth is not None:
updates.append(f"crawl_depth = ${param_idx}")
params.append(seed.crawl_depth)
param_idx += 1
if seed.crawl_frequency is not None:
updates.append(f"crawl_frequency = ${param_idx}")
params.append(seed.crawl_frequency)
param_idx += 1
if not updates:
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
updates.append("updated_at = NOW()")
params.append(seed_id)
query = f"""
UPDATE edu_search_seeds
SET {", ".join(updates)}
WHERE id = ${param_idx}
RETURNING id
"""
result = await conn.fetchrow(query, *params)
if not result:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
# Return updated seed
return await get_seed(seed_id)
@router.delete("/seeds/{seed_id}")
async def delete_seed(seed_id: str):
"""Delete a seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM edu_search_seeds WHERE id = $1",
seed_id
)
if result == "DELETE 0":
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return {"status": "deleted", "id": seed_id}
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
async def bulk_import_seeds(request: BulkImportRequest):
"""Bulk import seeds (skip duplicates)."""
pool = await get_db_pool()
imported = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Pre-fetch all category IDs
categories = {}
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
for row in rows:
categories[row["name"]] = row["id"]
for seed in request.seeds:
try:
category_id = categories.get(seed.category_name) if seed.category_name else None
await conn.execute("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (url) DO NOTHING
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
imported += 1
except asyncpg.UniqueViolationError:
skipped += 1
except Exception as e:
errors.append(f"{seed.url}: {str(e)}")
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
def _row_to_seed_response(row) -> SeedResponse:
"""Convert a database row to SeedResponse."""
return SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)