[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
386
backend-lehrer/llm_gateway/routes/edu_search_crud.py
Normal file
386
backend-lehrer/llm_gateway/routes/edu_search_crud.py
Normal file
@@ -0,0 +1,386 @@
|
||||
"""
|
||||
EduSearch Seeds CRUD Routes.
|
||||
|
||||
List, get, create, update, delete, and bulk import for seed URLs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
import asyncpg
|
||||
|
||||
from .edu_search_models import (
|
||||
CategoryResponse,
|
||||
SeedCreate,
|
||||
SeedUpdate,
|
||||
SeedResponse,
|
||||
SeedsListResponse,
|
||||
BulkImportRequest,
|
||||
BulkImportResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["edu-search"])
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get("DATABASE_URL")
|
||||
if not database_url:
|
||||
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
@router.get("/categories", response_model=List[CategoryResponse])
|
||||
async def list_categories():
|
||||
"""List all seed categories."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, display_name, description, icon, sort_order, is_active
|
||||
FROM edu_search_categories
|
||||
WHERE is_active = TRUE
|
||||
ORDER BY sort_order
|
||||
""")
|
||||
return [
|
||||
CategoryResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
display_name=row["display_name"],
|
||||
description=row["description"],
|
||||
icon=row["icon"],
|
||||
sort_order=row["sort_order"],
|
||||
is_active=row["is_active"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/seeds", response_model=SeedsListResponse)
|
||||
async def list_seeds(
|
||||
category: Optional[str] = Query(None, description="Filter by category name"),
|
||||
state: Optional[str] = Query(None, description="Filter by state code"),
|
||||
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
|
||||
search: Optional[str] = Query(None, description="Search in name/url"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List seeds with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if category:
|
||||
conditions.append(f"c.name = ${param_idx}")
|
||||
params.append(category)
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state)
|
||||
param_idx += 1
|
||||
|
||||
if enabled is not None:
|
||||
conditions.append(f"s.enabled = ${param_idx}")
|
||||
params.append(enabled)
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions) if conditions else "TRUE"
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Get paginated results
|
||||
offset = (page - 1) * page_size
|
||||
params.extend([page_size, offset])
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.sort_order, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
seeds = [_row_to_seed_response(row) for row in rows]
|
||||
|
||||
return SeedsListResponse(
|
||||
seeds=seeds,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def get_seed(seed_id: str):
|
||||
"""Get a single seed by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.id = $1
|
||||
""", seed_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return _row_to_seed_response(row)
|
||||
|
||||
|
||||
@router.post("/seeds", response_model=SeedResponse, status_code=201)
|
||||
async def create_seed(seed: SeedCreate):
|
||||
"""Create a new seed URL."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
category_id = None
|
||||
if seed.category_name:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
|
||||
try:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
RETURNING id, created_at, updated_at
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(status_code=409, detail="URL existiert bereits")
|
||||
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=seed.url,
|
||||
name=seed.name,
|
||||
description=seed.description,
|
||||
category=seed.category_name,
|
||||
category_display_name=None,
|
||||
source_type=seed.source_type,
|
||||
scope=seed.scope,
|
||||
state=seed.state,
|
||||
trust_boost=seed.trust_boost,
|
||||
enabled=seed.enabled,
|
||||
crawl_depth=seed.crawl_depth,
|
||||
crawl_frequency=seed.crawl_frequency,
|
||||
last_crawled_at=None,
|
||||
last_crawl_status=None,
|
||||
last_crawl_docs=0,
|
||||
total_documents=0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def update_seed(seed_id: str, seed: SeedUpdate):
|
||||
"""Update an existing seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
updates = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if seed.url is not None:
|
||||
updates.append(f"url = ${param_idx}")
|
||||
params.append(seed.url)
|
||||
param_idx += 1
|
||||
|
||||
if seed.name is not None:
|
||||
updates.append(f"name = ${param_idx}")
|
||||
params.append(seed.name)
|
||||
param_idx += 1
|
||||
|
||||
if seed.description is not None:
|
||||
updates.append(f"description = ${param_idx}")
|
||||
params.append(seed.description)
|
||||
param_idx += 1
|
||||
|
||||
if seed.category_name is not None:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
updates.append(f"category_id = ${param_idx}")
|
||||
params.append(category_id)
|
||||
param_idx += 1
|
||||
|
||||
if seed.source_type is not None:
|
||||
updates.append(f"source_type = ${param_idx}")
|
||||
params.append(seed.source_type)
|
||||
param_idx += 1
|
||||
|
||||
if seed.scope is not None:
|
||||
updates.append(f"scope = ${param_idx}")
|
||||
params.append(seed.scope)
|
||||
param_idx += 1
|
||||
|
||||
if seed.state is not None:
|
||||
updates.append(f"state = ${param_idx}")
|
||||
params.append(seed.state)
|
||||
param_idx += 1
|
||||
|
||||
if seed.trust_boost is not None:
|
||||
updates.append(f"trust_boost = ${param_idx}")
|
||||
params.append(seed.trust_boost)
|
||||
param_idx += 1
|
||||
|
||||
if seed.enabled is not None:
|
||||
updates.append(f"enabled = ${param_idx}")
|
||||
params.append(seed.enabled)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_depth is not None:
|
||||
updates.append(f"crawl_depth = ${param_idx}")
|
||||
params.append(seed.crawl_depth)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_frequency is not None:
|
||||
updates.append(f"crawl_frequency = ${param_idx}")
|
||||
params.append(seed.crawl_frequency)
|
||||
param_idx += 1
|
||||
|
||||
if not updates:
|
||||
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
|
||||
|
||||
updates.append("updated_at = NOW()")
|
||||
params.append(seed_id)
|
||||
|
||||
query = f"""
|
||||
UPDATE edu_search_seeds
|
||||
SET {", ".join(updates)}
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
result = await conn.fetchrow(query, *params)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
# Return updated seed
|
||||
return await get_seed(seed_id)
|
||||
|
||||
|
||||
@router.delete("/seeds/{seed_id}")
|
||||
async def delete_seed(seed_id: str):
|
||||
"""Delete a seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"DELETE FROM edu_search_seeds WHERE id = $1",
|
||||
seed_id
|
||||
)
|
||||
if result == "DELETE 0":
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return {"status": "deleted", "id": seed_id}
|
||||
|
||||
|
||||
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_seeds(request: BulkImportRequest):
|
||||
"""Bulk import seeds (skip duplicates)."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Pre-fetch all category IDs
|
||||
categories = {}
|
||||
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
|
||||
for row in rows:
|
||||
categories[row["name"]] = row["id"]
|
||||
|
||||
for seed in request.seeds:
|
||||
try:
|
||||
category_id = categories.get(seed.category_name) if seed.category_name else None
|
||||
|
||||
await conn.execute("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
imported += 1
|
||||
except asyncpg.UniqueViolationError:
|
||||
skipped += 1
|
||||
except Exception as e:
|
||||
errors.append(f"{seed.url}: {str(e)}")
|
||||
|
||||
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
def _row_to_seed_response(row) -> SeedResponse:
|
||||
"""Convert a database row to SeedResponse."""
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
category=row["category"],
|
||||
category_display_name=row["category_display_name"],
|
||||
source_type=row["source_type"],
|
||||
scope=row["scope"],
|
||||
state=row["state"],
|
||||
trust_boost=float(row["trust_boost"]),
|
||||
enabled=row["enabled"],
|
||||
crawl_depth=row["crawl_depth"],
|
||||
crawl_frequency=row["crawl_frequency"],
|
||||
last_crawled_at=row["last_crawled_at"],
|
||||
last_crawl_status=row["last_crawl_status"],
|
||||
last_crawl_docs=row["last_crawl_docs"] or 0,
|
||||
total_documents=row["total_documents"] or 0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
Reference in New Issue
Block a user