[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
386
backend-lehrer/llm_gateway/routes/edu_search_crud.py
Normal file
386
backend-lehrer/llm_gateway/routes/edu_search_crud.py
Normal file
@@ -0,0 +1,386 @@
|
||||
"""
|
||||
EduSearch Seeds CRUD Routes.
|
||||
|
||||
List, get, create, update, delete, and bulk import for seed URLs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
import asyncpg
|
||||
|
||||
from .edu_search_models import (
|
||||
CategoryResponse,
|
||||
SeedCreate,
|
||||
SeedUpdate,
|
||||
SeedResponse,
|
||||
SeedsListResponse,
|
||||
BulkImportRequest,
|
||||
BulkImportResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["edu-search"])
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get("DATABASE_URL")
|
||||
if not database_url:
|
||||
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
@router.get("/categories", response_model=List[CategoryResponse])
|
||||
async def list_categories():
|
||||
"""List all seed categories."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, display_name, description, icon, sort_order, is_active
|
||||
FROM edu_search_categories
|
||||
WHERE is_active = TRUE
|
||||
ORDER BY sort_order
|
||||
""")
|
||||
return [
|
||||
CategoryResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
display_name=row["display_name"],
|
||||
description=row["description"],
|
||||
icon=row["icon"],
|
||||
sort_order=row["sort_order"],
|
||||
is_active=row["is_active"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/seeds", response_model=SeedsListResponse)
|
||||
async def list_seeds(
|
||||
category: Optional[str] = Query(None, description="Filter by category name"),
|
||||
state: Optional[str] = Query(None, description="Filter by state code"),
|
||||
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
|
||||
search: Optional[str] = Query(None, description="Search in name/url"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List seeds with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if category:
|
||||
conditions.append(f"c.name = ${param_idx}")
|
||||
params.append(category)
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state)
|
||||
param_idx += 1
|
||||
|
||||
if enabled is not None:
|
||||
conditions.append(f"s.enabled = ${param_idx}")
|
||||
params.append(enabled)
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions) if conditions else "TRUE"
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Get paginated results
|
||||
offset = (page - 1) * page_size
|
||||
params.extend([page_size, offset])
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.sort_order, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
seeds = [_row_to_seed_response(row) for row in rows]
|
||||
|
||||
return SeedsListResponse(
|
||||
seeds=seeds,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def get_seed(seed_id: str):
|
||||
"""Get a single seed by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.id = $1
|
||||
""", seed_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return _row_to_seed_response(row)
|
||||
|
||||
|
||||
@router.post("/seeds", response_model=SeedResponse, status_code=201)
|
||||
async def create_seed(seed: SeedCreate):
|
||||
"""Create a new seed URL."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
category_id = None
|
||||
if seed.category_name:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
|
||||
try:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
RETURNING id, created_at, updated_at
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(status_code=409, detail="URL existiert bereits")
|
||||
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=seed.url,
|
||||
name=seed.name,
|
||||
description=seed.description,
|
||||
category=seed.category_name,
|
||||
category_display_name=None,
|
||||
source_type=seed.source_type,
|
||||
scope=seed.scope,
|
||||
state=seed.state,
|
||||
trust_boost=seed.trust_boost,
|
||||
enabled=seed.enabled,
|
||||
crawl_depth=seed.crawl_depth,
|
||||
crawl_frequency=seed.crawl_frequency,
|
||||
last_crawled_at=None,
|
||||
last_crawl_status=None,
|
||||
last_crawl_docs=0,
|
||||
total_documents=0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def update_seed(seed_id: str, seed: SeedUpdate):
|
||||
"""Update an existing seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
updates = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if seed.url is not None:
|
||||
updates.append(f"url = ${param_idx}")
|
||||
params.append(seed.url)
|
||||
param_idx += 1
|
||||
|
||||
if seed.name is not None:
|
||||
updates.append(f"name = ${param_idx}")
|
||||
params.append(seed.name)
|
||||
param_idx += 1
|
||||
|
||||
if seed.description is not None:
|
||||
updates.append(f"description = ${param_idx}")
|
||||
params.append(seed.description)
|
||||
param_idx += 1
|
||||
|
||||
if seed.category_name is not None:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
updates.append(f"category_id = ${param_idx}")
|
||||
params.append(category_id)
|
||||
param_idx += 1
|
||||
|
||||
if seed.source_type is not None:
|
||||
updates.append(f"source_type = ${param_idx}")
|
||||
params.append(seed.source_type)
|
||||
param_idx += 1
|
||||
|
||||
if seed.scope is not None:
|
||||
updates.append(f"scope = ${param_idx}")
|
||||
params.append(seed.scope)
|
||||
param_idx += 1
|
||||
|
||||
if seed.state is not None:
|
||||
updates.append(f"state = ${param_idx}")
|
||||
params.append(seed.state)
|
||||
param_idx += 1
|
||||
|
||||
if seed.trust_boost is not None:
|
||||
updates.append(f"trust_boost = ${param_idx}")
|
||||
params.append(seed.trust_boost)
|
||||
param_idx += 1
|
||||
|
||||
if seed.enabled is not None:
|
||||
updates.append(f"enabled = ${param_idx}")
|
||||
params.append(seed.enabled)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_depth is not None:
|
||||
updates.append(f"crawl_depth = ${param_idx}")
|
||||
params.append(seed.crawl_depth)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_frequency is not None:
|
||||
updates.append(f"crawl_frequency = ${param_idx}")
|
||||
params.append(seed.crawl_frequency)
|
||||
param_idx += 1
|
||||
|
||||
if not updates:
|
||||
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
|
||||
|
||||
updates.append("updated_at = NOW()")
|
||||
params.append(seed_id)
|
||||
|
||||
query = f"""
|
||||
UPDATE edu_search_seeds
|
||||
SET {", ".join(updates)}
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
result = await conn.fetchrow(query, *params)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
# Return updated seed
|
||||
return await get_seed(seed_id)
|
||||
|
||||
|
||||
@router.delete("/seeds/{seed_id}")
|
||||
async def delete_seed(seed_id: str):
|
||||
"""Delete a seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"DELETE FROM edu_search_seeds WHERE id = $1",
|
||||
seed_id
|
||||
)
|
||||
if result == "DELETE 0":
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return {"status": "deleted", "id": seed_id}
|
||||
|
||||
|
||||
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_seeds(request: BulkImportRequest):
|
||||
"""Bulk import seeds (skip duplicates)."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Pre-fetch all category IDs
|
||||
categories = {}
|
||||
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
|
||||
for row in rows:
|
||||
categories[row["name"]] = row["id"]
|
||||
|
||||
for seed in request.seeds:
|
||||
try:
|
||||
category_id = categories.get(seed.category_name) if seed.category_name else None
|
||||
|
||||
await conn.execute("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
imported += 1
|
||||
except asyncpg.UniqueViolationError:
|
||||
skipped += 1
|
||||
except Exception as e:
|
||||
errors.append(f"{seed.url}: {str(e)}")
|
||||
|
||||
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
def _row_to_seed_response(row) -> SeedResponse:
|
||||
"""Convert a database row to SeedResponse."""
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
category=row["category"],
|
||||
category_display_name=row["category_display_name"],
|
||||
source_type=row["source_type"],
|
||||
scope=row["scope"],
|
||||
state=row["state"],
|
||||
trust_boost=float(row["trust_boost"]),
|
||||
enabled=row["enabled"],
|
||||
crawl_depth=row["crawl_depth"],
|
||||
crawl_frequency=row["crawl_frequency"],
|
||||
last_crawled_at=row["last_crawled_at"],
|
||||
last_crawl_status=row["last_crawl_status"],
|
||||
last_crawl_docs=row["last_crawl_docs"] or 0,
|
||||
total_documents=row["total_documents"] or 0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
137
backend-lehrer/llm_gateway/routes/edu_search_models.py
Normal file
137
backend-lehrer/llm_gateway/routes/edu_search_models.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
EduSearch Seeds Pydantic Models.
|
||||
|
||||
Request/Response models for the education search seed URL API.
|
||||
"""
|
||||
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class CategoryResponse(BaseModel):
|
||||
"""Category response model."""
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: Optional[str] = None
|
||||
icon: Optional[str] = None
|
||||
sort_order: int
|
||||
is_active: bool
|
||||
|
||||
|
||||
class SeedBase(BaseModel):
|
||||
"""Base seed model for creation/update."""
|
||||
url: str = Field(..., max_length=500)
|
||||
name: str = Field(..., max_length=255)
|
||||
description: Optional[str] = None
|
||||
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
|
||||
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
|
||||
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
|
||||
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
|
||||
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
|
||||
enabled: bool = True
|
||||
crawl_depth: int = Field(2, ge=1, le=5)
|
||||
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
|
||||
|
||||
|
||||
class SeedCreate(SeedBase):
|
||||
"""Seed creation model."""
|
||||
pass
|
||||
|
||||
|
||||
class SeedUpdate(BaseModel):
|
||||
"""Seed update model (all fields optional)."""
|
||||
url: Optional[str] = Field(None, max_length=500)
|
||||
name: Optional[str] = Field(None, max_length=255)
|
||||
description: Optional[str] = None
|
||||
category_name: Optional[str] = None
|
||||
source_type: Optional[str] = None
|
||||
scope: Optional[str] = None
|
||||
state: Optional[str] = Field(None, max_length=5)
|
||||
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
|
||||
enabled: Optional[bool] = None
|
||||
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
|
||||
crawl_frequency: Optional[str] = None
|
||||
|
||||
|
||||
class SeedResponse(BaseModel):
|
||||
"""Seed response model."""
|
||||
id: str
|
||||
url: str
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
category_display_name: Optional[str] = None
|
||||
source_type: str
|
||||
scope: str
|
||||
state: Optional[str] = None
|
||||
trust_boost: float
|
||||
enabled: bool
|
||||
crawl_depth: int
|
||||
crawl_frequency: str
|
||||
last_crawled_at: Optional[datetime] = None
|
||||
last_crawl_status: Optional[str] = None
|
||||
last_crawl_docs: int = 0
|
||||
total_documents: int = 0
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class SeedsListResponse(BaseModel):
|
||||
"""List response with pagination info."""
|
||||
seeds: List[SeedResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
class StatsResponse(BaseModel):
|
||||
"""Crawl statistics response."""
|
||||
total_seeds: int
|
||||
enabled_seeds: int
|
||||
total_documents: int
|
||||
seeds_by_category: dict
|
||||
seeds_by_state: dict
|
||||
last_crawl_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class BulkImportRequest(BaseModel):
|
||||
"""Bulk import request."""
|
||||
seeds: List[SeedCreate]
|
||||
|
||||
|
||||
class BulkImportResponse(BaseModel):
|
||||
"""Bulk import response."""
|
||||
imported: int
|
||||
skipped: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
class CrawlStatusUpdate(BaseModel):
|
||||
"""Crawl status update from edu-search-service."""
|
||||
seed_url: str = Field(..., description="The seed URL that was crawled")
|
||||
status: str = Field(..., description="Crawl status: success, error, partial")
|
||||
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
|
||||
error_message: Optional[str] = Field(None, description="Error message if status is error")
|
||||
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
|
||||
|
||||
|
||||
class CrawlStatusResponse(BaseModel):
|
||||
"""Response for crawl status update."""
|
||||
success: bool
|
||||
seed_url: str
|
||||
message: str
|
||||
|
||||
|
||||
class BulkCrawlStatusUpdate(BaseModel):
|
||||
"""Bulk crawl status update."""
|
||||
updates: List[CrawlStatusUpdate]
|
||||
|
||||
|
||||
class BulkCrawlStatusResponse(BaseModel):
|
||||
"""Response for bulk crawl status update."""
|
||||
updated: int
|
||||
failed: int
|
||||
errors: List[str]
|
||||
@@ -1,710 +1,58 @@
|
||||
"""
|
||||
EduSearch Seeds API Routes.
|
||||
EduSearch Seeds API Routes — Barrel Re-export.
|
||||
|
||||
Split into submodules:
|
||||
- edu_search_models.py — Pydantic request/response models
|
||||
- edu_search_crud.py — CRUD endpoints (list, get, create, update, delete, bulk import)
|
||||
- edu_search_status.py — Stats, export for crawler, crawl status feedback
|
||||
|
||||
CRUD operations for managing education search crawler seed URLs.
|
||||
Direct database access to PostgreSQL.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from fastapi import APIRouter
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Query
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
import asyncpg
|
||||
from .edu_search_crud import router as _crud_router, get_db_pool
|
||||
from .edu_search_status import router as _status_router
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Re-export models for consumers that import types from this module
|
||||
from .edu_search_models import (
|
||||
CategoryResponse,
|
||||
SeedBase,
|
||||
SeedCreate,
|
||||
SeedUpdate,
|
||||
SeedResponse,
|
||||
SeedsListResponse,
|
||||
StatsResponse,
|
||||
BulkImportRequest,
|
||||
BulkImportResponse,
|
||||
CrawlStatusUpdate,
|
||||
CrawlStatusResponse,
|
||||
BulkCrawlStatusUpdate,
|
||||
BulkCrawlStatusResponse,
|
||||
)
|
||||
|
||||
# Combine both sub-routers into a single router for backwards compatibility.
|
||||
# The consumer imports `from .edu_search_seeds import router as edu_search_seeds_router`.
|
||||
router = APIRouter(prefix="/edu-search", tags=["edu-search"])
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get("DATABASE_URL")
|
||||
if not database_url:
|
||||
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class CategoryResponse(BaseModel):
|
||||
"""Category response model."""
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: Optional[str] = None
|
||||
icon: Optional[str] = None
|
||||
sort_order: int
|
||||
is_active: bool
|
||||
|
||||
|
||||
class SeedBase(BaseModel):
|
||||
"""Base seed model for creation/update."""
|
||||
url: str = Field(..., max_length=500)
|
||||
name: str = Field(..., max_length=255)
|
||||
description: Optional[str] = None
|
||||
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
|
||||
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
|
||||
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
|
||||
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
|
||||
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
|
||||
enabled: bool = True
|
||||
crawl_depth: int = Field(2, ge=1, le=5)
|
||||
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
|
||||
|
||||
|
||||
class SeedCreate(SeedBase):
|
||||
"""Seed creation model."""
|
||||
pass
|
||||
|
||||
|
||||
class SeedUpdate(BaseModel):
|
||||
"""Seed update model (all fields optional)."""
|
||||
url: Optional[str] = Field(None, max_length=500)
|
||||
name: Optional[str] = Field(None, max_length=255)
|
||||
description: Optional[str] = None
|
||||
category_name: Optional[str] = None
|
||||
source_type: Optional[str] = None
|
||||
scope: Optional[str] = None
|
||||
state: Optional[str] = Field(None, max_length=5)
|
||||
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
|
||||
enabled: Optional[bool] = None
|
||||
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
|
||||
crawl_frequency: Optional[str] = None
|
||||
|
||||
|
||||
class SeedResponse(BaseModel):
|
||||
"""Seed response model."""
|
||||
id: str
|
||||
url: str
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
category_display_name: Optional[str] = None
|
||||
source_type: str
|
||||
scope: str
|
||||
state: Optional[str] = None
|
||||
trust_boost: float
|
||||
enabled: bool
|
||||
crawl_depth: int
|
||||
crawl_frequency: str
|
||||
last_crawled_at: Optional[datetime] = None
|
||||
last_crawl_status: Optional[str] = None
|
||||
last_crawl_docs: int = 0
|
||||
total_documents: int = 0
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class SeedsListResponse(BaseModel):
|
||||
"""List response with pagination info."""
|
||||
seeds: List[SeedResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
class StatsResponse(BaseModel):
|
||||
"""Crawl statistics response."""
|
||||
total_seeds: int
|
||||
enabled_seeds: int
|
||||
total_documents: int
|
||||
seeds_by_category: dict
|
||||
seeds_by_state: dict
|
||||
last_crawl_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class BulkImportRequest(BaseModel):
|
||||
"""Bulk import request."""
|
||||
seeds: List[SeedCreate]
|
||||
|
||||
|
||||
class BulkImportResponse(BaseModel):
|
||||
"""Bulk import response."""
|
||||
imported: int
|
||||
skipped: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/categories", response_model=List[CategoryResponse])
|
||||
async def list_categories():
|
||||
"""List all seed categories."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, display_name, description, icon, sort_order, is_active
|
||||
FROM edu_search_categories
|
||||
WHERE is_active = TRUE
|
||||
ORDER BY sort_order
|
||||
""")
|
||||
return [
|
||||
CategoryResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
display_name=row["display_name"],
|
||||
description=row["description"],
|
||||
icon=row["icon"],
|
||||
sort_order=row["sort_order"],
|
||||
is_active=row["is_active"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/seeds", response_model=SeedsListResponse)
|
||||
async def list_seeds(
|
||||
category: Optional[str] = Query(None, description="Filter by category name"),
|
||||
state: Optional[str] = Query(None, description="Filter by state code"),
|
||||
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
|
||||
search: Optional[str] = Query(None, description="Search in name/url"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List seeds with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if category:
|
||||
conditions.append(f"c.name = ${param_idx}")
|
||||
params.append(category)
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state)
|
||||
param_idx += 1
|
||||
|
||||
if enabled is not None:
|
||||
conditions.append(f"s.enabled = ${param_idx}")
|
||||
params.append(enabled)
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions) if conditions else "TRUE"
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Get paginated results
|
||||
offset = (page - 1) * page_size
|
||||
params.extend([page_size, offset])
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.sort_order, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
seeds = [
|
||||
SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
category=row["category"],
|
||||
category_display_name=row["category_display_name"],
|
||||
source_type=row["source_type"],
|
||||
scope=row["scope"],
|
||||
state=row["state"],
|
||||
trust_boost=float(row["trust_boost"]),
|
||||
enabled=row["enabled"],
|
||||
crawl_depth=row["crawl_depth"],
|
||||
crawl_frequency=row["crawl_frequency"],
|
||||
last_crawled_at=row["last_crawled_at"],
|
||||
last_crawl_status=row["last_crawl_status"],
|
||||
last_crawl_docs=row["last_crawl_docs"] or 0,
|
||||
total_documents=row["total_documents"] or 0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SeedsListResponse(
|
||||
seeds=seeds,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def get_seed(seed_id: str):
|
||||
"""Get a single seed by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.id = $1
|
||||
""", seed_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
category=row["category"],
|
||||
category_display_name=row["category_display_name"],
|
||||
source_type=row["source_type"],
|
||||
scope=row["scope"],
|
||||
state=row["state"],
|
||||
trust_boost=float(row["trust_boost"]),
|
||||
enabled=row["enabled"],
|
||||
crawl_depth=row["crawl_depth"],
|
||||
crawl_frequency=row["crawl_frequency"],
|
||||
last_crawled_at=row["last_crawled_at"],
|
||||
last_crawl_status=row["last_crawl_status"],
|
||||
last_crawl_docs=row["last_crawl_docs"] or 0,
|
||||
total_documents=row["total_documents"] or 0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/seeds", response_model=SeedResponse, status_code=201)
|
||||
async def create_seed(seed: SeedCreate):
|
||||
"""Create a new seed URL."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Get category ID if provided
|
||||
category_id = None
|
||||
if seed.category_name:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
|
||||
try:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
RETURNING id, created_at, updated_at
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(status_code=409, detail="URL existiert bereits")
|
||||
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=seed.url,
|
||||
name=seed.name,
|
||||
description=seed.description,
|
||||
category=seed.category_name,
|
||||
category_display_name=None,
|
||||
source_type=seed.source_type,
|
||||
scope=seed.scope,
|
||||
state=seed.state,
|
||||
trust_boost=seed.trust_boost,
|
||||
enabled=seed.enabled,
|
||||
crawl_depth=seed.crawl_depth,
|
||||
crawl_frequency=seed.crawl_frequency,
|
||||
last_crawled_at=None,
|
||||
last_crawl_status=None,
|
||||
last_crawl_docs=0,
|
||||
total_documents=0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def update_seed(seed_id: str, seed: SeedUpdate):
|
||||
"""Update an existing seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build update statement dynamically
|
||||
updates = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if seed.url is not None:
|
||||
updates.append(f"url = ${param_idx}")
|
||||
params.append(seed.url)
|
||||
param_idx += 1
|
||||
|
||||
if seed.name is not None:
|
||||
updates.append(f"name = ${param_idx}")
|
||||
params.append(seed.name)
|
||||
param_idx += 1
|
||||
|
||||
if seed.description is not None:
|
||||
updates.append(f"description = ${param_idx}")
|
||||
params.append(seed.description)
|
||||
param_idx += 1
|
||||
|
||||
if seed.category_name is not None:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
updates.append(f"category_id = ${param_idx}")
|
||||
params.append(category_id)
|
||||
param_idx += 1
|
||||
|
||||
if seed.source_type is not None:
|
||||
updates.append(f"source_type = ${param_idx}")
|
||||
params.append(seed.source_type)
|
||||
param_idx += 1
|
||||
|
||||
if seed.scope is not None:
|
||||
updates.append(f"scope = ${param_idx}")
|
||||
params.append(seed.scope)
|
||||
param_idx += 1
|
||||
|
||||
if seed.state is not None:
|
||||
updates.append(f"state = ${param_idx}")
|
||||
params.append(seed.state)
|
||||
param_idx += 1
|
||||
|
||||
if seed.trust_boost is not None:
|
||||
updates.append(f"trust_boost = ${param_idx}")
|
||||
params.append(seed.trust_boost)
|
||||
param_idx += 1
|
||||
|
||||
if seed.enabled is not None:
|
||||
updates.append(f"enabled = ${param_idx}")
|
||||
params.append(seed.enabled)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_depth is not None:
|
||||
updates.append(f"crawl_depth = ${param_idx}")
|
||||
params.append(seed.crawl_depth)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_frequency is not None:
|
||||
updates.append(f"crawl_frequency = ${param_idx}")
|
||||
params.append(seed.crawl_frequency)
|
||||
param_idx += 1
|
||||
|
||||
if not updates:
|
||||
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
|
||||
|
||||
updates.append("updated_at = NOW()")
|
||||
params.append(seed_id)
|
||||
|
||||
query = f"""
|
||||
UPDATE edu_search_seeds
|
||||
SET {", ".join(updates)}
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
result = await conn.fetchrow(query, *params)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
# Return updated seed
|
||||
return await get_seed(seed_id)
|
||||
|
||||
|
||||
@router.delete("/seeds/{seed_id}")
|
||||
async def delete_seed(seed_id: str):
|
||||
"""Delete a seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"DELETE FROM edu_search_seeds WHERE id = $1",
|
||||
seed_id
|
||||
)
|
||||
if result == "DELETE 0":
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return {"status": "deleted", "id": seed_id}
|
||||
|
||||
|
||||
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_seeds(request: BulkImportRequest):
|
||||
"""Bulk import seeds (skip duplicates)."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Pre-fetch all category IDs
|
||||
categories = {}
|
||||
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
|
||||
for row in rows:
|
||||
categories[row["name"]] = row["id"]
|
||||
|
||||
for seed in request.seeds:
|
||||
try:
|
||||
category_id = categories.get(seed.category_name) if seed.category_name else None
|
||||
|
||||
await conn.execute("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
imported += 1
|
||||
except asyncpg.UniqueViolationError:
|
||||
skipped += 1
|
||||
except Exception as e:
|
||||
errors.append(f"{seed.url}: {str(e)}")
|
||||
|
||||
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
@router.get("/stats", response_model=StatsResponse)
|
||||
async def get_stats():
|
||||
"""Get crawl statistics."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Basic counts
|
||||
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
|
||||
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
|
||||
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
|
||||
|
||||
# By category
|
||||
cat_rows = await conn.fetch("""
|
||||
SELECT c.name, COUNT(s.id) as count
|
||||
FROM edu_search_categories c
|
||||
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
|
||||
GROUP BY c.name
|
||||
""")
|
||||
by_category = {row["name"]: row["count"] for row in cat_rows}
|
||||
|
||||
# By state
|
||||
state_rows = await conn.fetch("""
|
||||
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
|
||||
FROM edu_search_seeds
|
||||
GROUP BY state
|
||||
""")
|
||||
by_state = {row["state"]: row["count"] for row in state_rows}
|
||||
|
||||
# Last crawl time
|
||||
last_crawl = await conn.fetchval(
|
||||
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
|
||||
)
|
||||
|
||||
return StatsResponse(
|
||||
total_seeds=total,
|
||||
enabled_seeds=enabled,
|
||||
total_documents=total_docs,
|
||||
seeds_by_category=by_category,
|
||||
seeds_by_state=by_state,
|
||||
last_crawl_time=last_crawl,
|
||||
)
|
||||
|
||||
|
||||
# Export for external use (edu-search-service)
|
||||
@router.get("/seeds/export/for-crawler")
|
||||
async def export_seeds_for_crawler():
|
||||
"""Export enabled seeds in format suitable for crawler."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
s.url, s.trust_boost, s.source_type, s.scope, s.state,
|
||||
s.crawl_depth, c.name as category
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.enabled = TRUE
|
||||
ORDER BY s.trust_boost DESC
|
||||
""")
|
||||
|
||||
return {
|
||||
"seeds": [
|
||||
{
|
||||
"url": row["url"],
|
||||
"trust": float(row["trust_boost"]),
|
||||
"source": row["source_type"],
|
||||
"scope": row["scope"],
|
||||
"state": row["state"],
|
||||
"depth": row["crawl_depth"],
|
||||
"category": row["category"],
|
||||
}
|
||||
for row in rows
|
||||
],
|
||||
"total": len(rows),
|
||||
"exported_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Crawl Status Feedback (from edu-search-service)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class CrawlStatusUpdate(BaseModel):
|
||||
"""Crawl status update from edu-search-service."""
|
||||
seed_url: str = Field(..., description="The seed URL that was crawled")
|
||||
status: str = Field(..., description="Crawl status: success, error, partial")
|
||||
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
|
||||
error_message: Optional[str] = Field(None, description="Error message if status is error")
|
||||
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
|
||||
|
||||
|
||||
class CrawlStatusResponse(BaseModel):
|
||||
"""Response for crawl status update."""
|
||||
success: bool
|
||||
seed_url: str
|
||||
message: str
|
||||
|
||||
|
||||
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
|
||||
async def update_crawl_status(update: CrawlStatusUpdate):
|
||||
"""Update crawl status for a seed URL (called by edu-search-service)."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Find the seed by URL
|
||||
seed = await conn.fetchrow(
|
||||
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
|
||||
update.seed_url
|
||||
)
|
||||
|
||||
if not seed:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Seed nicht gefunden: {update.seed_url}"
|
||||
)
|
||||
|
||||
# Update the seed with crawl status
|
||||
new_total = (seed["total_documents"] or 0) + update.documents_crawled
|
||||
|
||||
await conn.execute("""
|
||||
UPDATE edu_search_seeds
|
||||
SET
|
||||
last_crawled_at = NOW(),
|
||||
last_crawl_status = $2,
|
||||
last_crawl_docs = $3,
|
||||
total_documents = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""", seed["id"], update.status, update.documents_crawled, new_total)
|
||||
|
||||
logger.info(
|
||||
f"Crawl status updated: {update.seed_url} - "
|
||||
f"status={update.status}, docs={update.documents_crawled}, "
|
||||
f"duration={update.crawl_duration_seconds:.1f}s"
|
||||
)
|
||||
|
||||
return CrawlStatusResponse(
|
||||
success=True,
|
||||
seed_url=update.seed_url,
|
||||
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
|
||||
)
|
||||
|
||||
|
||||
class BulkCrawlStatusUpdate(BaseModel):
|
||||
"""Bulk crawl status update."""
|
||||
updates: List[CrawlStatusUpdate]
|
||||
|
||||
|
||||
class BulkCrawlStatusResponse(BaseModel):
|
||||
"""Response for bulk crawl status update."""
|
||||
updated: int
|
||||
failed: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
|
||||
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
|
||||
"""Bulk update crawl status for multiple seeds."""
|
||||
pool = await get_db_pool()
|
||||
updated = 0
|
||||
failed = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
for update in request.updates:
|
||||
try:
|
||||
seed = await conn.fetchrow(
|
||||
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
|
||||
update.seed_url
|
||||
)
|
||||
|
||||
if not seed:
|
||||
failed += 1
|
||||
errors.append(f"Seed nicht gefunden: {update.seed_url}")
|
||||
continue
|
||||
|
||||
new_total = (seed["total_documents"] or 0) + update.documents_crawled
|
||||
|
||||
await conn.execute("""
|
||||
UPDATE edu_search_seeds
|
||||
SET
|
||||
last_crawled_at = NOW(),
|
||||
last_crawl_status = $2,
|
||||
last_crawl_docs = $3,
|
||||
total_documents = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""", seed["id"], update.status, update.documents_crawled, new_total)
|
||||
|
||||
updated += 1
|
||||
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
errors.append(f"{update.seed_url}: {str(e)}")
|
||||
|
||||
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
|
||||
|
||||
return BulkCrawlStatusResponse(
|
||||
updated=updated,
|
||||
failed=failed,
|
||||
errors=errors
|
||||
)
|
||||
router.include_router(_crud_router)
|
||||
router.include_router(_status_router)
|
||||
|
||||
__all__ = [
|
||||
"router",
|
||||
"get_db_pool",
|
||||
# Models
|
||||
"CategoryResponse",
|
||||
"SeedBase",
|
||||
"SeedCreate",
|
||||
"SeedUpdate",
|
||||
"SeedResponse",
|
||||
"SeedsListResponse",
|
||||
"StatsResponse",
|
||||
"BulkImportRequest",
|
||||
"BulkImportResponse",
|
||||
"CrawlStatusUpdate",
|
||||
"CrawlStatusResponse",
|
||||
"BulkCrawlStatusUpdate",
|
||||
"BulkCrawlStatusResponse",
|
||||
]
|
||||
|
||||
198
backend-lehrer/llm_gateway/routes/edu_search_status.py
Normal file
198
backend-lehrer/llm_gateway/routes/edu_search_status.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
EduSearch Seeds Stats & Crawl Status Routes.
|
||||
|
||||
Statistics, export for crawler, and crawl status feedback endpoints.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
import asyncpg
|
||||
|
||||
from .edu_search_models import (
|
||||
StatsResponse,
|
||||
CrawlStatusUpdate,
|
||||
CrawlStatusResponse,
|
||||
BulkCrawlStatusUpdate,
|
||||
BulkCrawlStatusResponse,
|
||||
)
|
||||
from .edu_search_crud import get_db_pool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["edu-search"])
|
||||
|
||||
|
||||
@router.get("/stats", response_model=StatsResponse)
|
||||
async def get_stats():
|
||||
"""Get crawl statistics."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Basic counts
|
||||
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
|
||||
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
|
||||
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
|
||||
|
||||
# By category
|
||||
cat_rows = await conn.fetch("""
|
||||
SELECT c.name, COUNT(s.id) as count
|
||||
FROM edu_search_categories c
|
||||
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
|
||||
GROUP BY c.name
|
||||
""")
|
||||
by_category = {row["name"]: row["count"] for row in cat_rows}
|
||||
|
||||
# By state
|
||||
state_rows = await conn.fetch("""
|
||||
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
|
||||
FROM edu_search_seeds
|
||||
GROUP BY state
|
||||
""")
|
||||
by_state = {row["state"]: row["count"] for row in state_rows}
|
||||
|
||||
# Last crawl time
|
||||
last_crawl = await conn.fetchval(
|
||||
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
|
||||
)
|
||||
|
||||
return StatsResponse(
|
||||
total_seeds=total,
|
||||
enabled_seeds=enabled,
|
||||
total_documents=total_docs,
|
||||
seeds_by_category=by_category,
|
||||
seeds_by_state=by_state,
|
||||
last_crawl_time=last_crawl,
|
||||
)
|
||||
|
||||
|
||||
# Export for external use (edu-search-service)
|
||||
@router.get("/seeds/export/for-crawler")
|
||||
async def export_seeds_for_crawler():
|
||||
"""Export enabled seeds in format suitable for crawler."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
s.url, s.trust_boost, s.source_type, s.scope, s.state,
|
||||
s.crawl_depth, c.name as category
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.enabled = TRUE
|
||||
ORDER BY s.trust_boost DESC
|
||||
""")
|
||||
|
||||
return {
|
||||
"seeds": [
|
||||
{
|
||||
"url": row["url"],
|
||||
"trust": float(row["trust_boost"]),
|
||||
"source": row["source_type"],
|
||||
"scope": row["scope"],
|
||||
"state": row["state"],
|
||||
"depth": row["crawl_depth"],
|
||||
"category": row["category"],
|
||||
}
|
||||
for row in rows
|
||||
],
|
||||
"total": len(rows),
|
||||
"exported_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Crawl Status Feedback (from edu-search-service)
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
|
||||
async def update_crawl_status(update: CrawlStatusUpdate):
|
||||
"""Update crawl status for a seed URL (called by edu-search-service)."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Find the seed by URL
|
||||
seed = await conn.fetchrow(
|
||||
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
|
||||
update.seed_url
|
||||
)
|
||||
|
||||
if not seed:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Seed nicht gefunden: {update.seed_url}"
|
||||
)
|
||||
|
||||
# Update the seed with crawl status
|
||||
new_total = (seed["total_documents"] or 0) + update.documents_crawled
|
||||
|
||||
await conn.execute("""
|
||||
UPDATE edu_search_seeds
|
||||
SET
|
||||
last_crawled_at = NOW(),
|
||||
last_crawl_status = $2,
|
||||
last_crawl_docs = $3,
|
||||
total_documents = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""", seed["id"], update.status, update.documents_crawled, new_total)
|
||||
|
||||
logger.info(
|
||||
f"Crawl status updated: {update.seed_url} - "
|
||||
f"status={update.status}, docs={update.documents_crawled}, "
|
||||
f"duration={update.crawl_duration_seconds:.1f}s"
|
||||
)
|
||||
|
||||
return CrawlStatusResponse(
|
||||
success=True,
|
||||
seed_url=update.seed_url,
|
||||
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
|
||||
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
|
||||
"""Bulk update crawl status for multiple seeds."""
|
||||
pool = await get_db_pool()
|
||||
updated = 0
|
||||
failed = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
for update in request.updates:
|
||||
try:
|
||||
seed = await conn.fetchrow(
|
||||
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
|
||||
update.seed_url
|
||||
)
|
||||
|
||||
if not seed:
|
||||
failed += 1
|
||||
errors.append(f"Seed nicht gefunden: {update.seed_url}")
|
||||
continue
|
||||
|
||||
new_total = (seed["total_documents"] or 0) + update.documents_crawled
|
||||
|
||||
await conn.execute("""
|
||||
UPDATE edu_search_seeds
|
||||
SET
|
||||
last_crawled_at = NOW(),
|
||||
last_crawl_status = $2,
|
||||
last_crawl_docs = $3,
|
||||
total_documents = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""", seed["id"], update.status, update.documents_crawled, new_total)
|
||||
|
||||
updated += 1
|
||||
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
errors.append(f"{update.seed_url}: {str(e)}")
|
||||
|
||||
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
|
||||
|
||||
return BulkCrawlStatusResponse(
|
||||
updated=updated,
|
||||
failed=failed,
|
||||
errors=errors
|
||||
)
|
||||
@@ -1,867 +1,38 @@
|
||||
"""
|
||||
Schools API Routes.
|
||||
Schools API Routes — Barrel Re-export.
|
||||
|
||||
CRUD operations for managing German schools (~40,000 schools).
|
||||
Direct database access to PostgreSQL.
|
||||
Split into:
|
||||
- schools_models.py: Pydantic models
|
||||
- schools_db.py: Database connection pool
|
||||
- schools_crud.py: School CRUD & stats routes
|
||||
- schools_staff.py: Staff CRUD & search routes
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from fastapi import APIRouter
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from .schools_crud import router as _crud_router
|
||||
from .schools_staff import router as _staff_router
|
||||
|
||||
# Single router that merges both sub-module routers
|
||||
router = APIRouter(prefix="/schools", tags=["schools"])
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
|
||||
)
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SchoolTypeResponse(BaseModel):
|
||||
"""School type response model."""
|
||||
id: str
|
||||
name: str
|
||||
name_short: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolBase(BaseModel):
|
||||
"""Base school model for creation/update."""
|
||||
name: str = Field(..., max_length=255)
|
||||
school_number: Optional[str] = Field(None, max_length=20)
|
||||
school_type_id: Optional[str] = None
|
||||
school_type_raw: Optional[str] = None
|
||||
state: str = Field(..., max_length=10)
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
address_full: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
fax: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
principal_title: Optional[str] = None
|
||||
principal_email: Optional[str] = None
|
||||
principal_phone: Optional[str] = None
|
||||
secretary_name: Optional[str] = None
|
||||
secretary_email: Optional[str] = None
|
||||
secretary_phone: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
class_count: Optional[int] = None
|
||||
founded_year: Optional[int] = None
|
||||
is_public: bool = True
|
||||
is_all_day: Optional[bool] = None
|
||||
has_inclusion: Optional[bool] = None
|
||||
languages: Optional[List[str]] = None
|
||||
specializations: Optional[List[str]] = None
|
||||
source: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolCreate(SchoolBase):
|
||||
"""School creation model."""
|
||||
pass
|
||||
|
||||
|
||||
class SchoolUpdate(BaseModel):
|
||||
"""School update model (all fields optional)."""
|
||||
name: Optional[str] = Field(None, max_length=255)
|
||||
school_number: Optional[str] = None
|
||||
school_type_id: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class SchoolResponse(BaseModel):
|
||||
"""School response model."""
|
||||
id: str
|
||||
name: str
|
||||
school_number: Optional[str] = None
|
||||
school_type: Optional[str] = None
|
||||
school_type_short: Optional[str] = None
|
||||
school_category: Optional[str] = None
|
||||
state: str
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
address_full: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
fax: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
principal_email: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
is_public: bool = True
|
||||
is_all_day: Optional[bool] = None
|
||||
staff_count: int = 0
|
||||
source: Optional[str] = None
|
||||
crawled_at: Optional[datetime] = None
|
||||
is_active: bool = True
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class SchoolsListResponse(BaseModel):
|
||||
"""List response with pagination info."""
|
||||
schools: List[SchoolResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
class SchoolStaffBase(BaseModel):
|
||||
"""Base school staff model."""
|
||||
first_name: Optional[str] = None
|
||||
last_name: str
|
||||
full_name: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
position: Optional[str] = None
|
||||
position_type: Optional[str] = None
|
||||
subjects: Optional[List[str]] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolStaffCreate(SchoolStaffBase):
|
||||
"""School staff creation model."""
|
||||
school_id: str
|
||||
|
||||
|
||||
class SchoolStaffResponse(SchoolStaffBase):
|
||||
"""School staff response model."""
|
||||
id: str
|
||||
school_id: str
|
||||
school_name: Optional[str] = None
|
||||
profile_url: Optional[str] = None
|
||||
photo_url: Optional[str] = None
|
||||
is_active: bool = True
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class SchoolStaffListResponse(BaseModel):
|
||||
"""Staff list response."""
|
||||
staff: List[SchoolStaffResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class SchoolStatsResponse(BaseModel):
|
||||
"""School statistics response."""
|
||||
total_schools: int
|
||||
total_staff: int
|
||||
schools_by_state: dict
|
||||
schools_by_type: dict
|
||||
schools_with_website: int
|
||||
schools_with_email: int
|
||||
schools_with_principal: int
|
||||
total_students: int
|
||||
total_teachers: int
|
||||
last_crawl_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class BulkImportRequest(BaseModel):
|
||||
"""Bulk import request."""
|
||||
schools: List[SchoolCreate]
|
||||
|
||||
|
||||
class BulkImportResponse(BaseModel):
|
||||
"""Bulk import response."""
|
||||
imported: int
|
||||
updated: int
|
||||
skipped: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Type Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/types", response_model=List[SchoolTypeResponse])
|
||||
async def list_school_types():
|
||||
"""List all school types."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, name_short, category, description
|
||||
FROM school_types
|
||||
ORDER BY category, name
|
||||
""")
|
||||
return [
|
||||
SchoolTypeResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
name_short=row["name_short"],
|
||||
category=row["category"],
|
||||
description=row["description"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("", response_model=SchoolsListResponse)
|
||||
async def list_schools(
|
||||
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type name"),
|
||||
city: Optional[str] = Query(None, description="Filter by city"),
|
||||
district: Optional[str] = Query(None, description="Filter by district"),
|
||||
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
|
||||
search: Optional[str] = Query(None, description="Search in name, city"),
|
||||
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
|
||||
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
|
||||
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List schools with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = ["s.is_active = TRUE"]
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state.upper())
|
||||
param_idx += 1
|
||||
|
||||
if school_type:
|
||||
conditions.append(f"st.name = ${param_idx}")
|
||||
params.append(school_type)
|
||||
param_idx += 1
|
||||
|
||||
if city:
|
||||
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
|
||||
params.append(city)
|
||||
param_idx += 1
|
||||
|
||||
if district:
|
||||
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
|
||||
params.append(f"%{district}%")
|
||||
param_idx += 1
|
||||
|
||||
if postal_code:
|
||||
conditions.append(f"s.postal_code LIKE ${param_idx}")
|
||||
params.append(f"{postal_code}%")
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"""
|
||||
(LOWER(s.name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.city) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.district) LIKE LOWER(${param_idx}))
|
||||
""")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
if has_email is not None:
|
||||
if has_email:
|
||||
conditions.append("s.email IS NOT NULL")
|
||||
else:
|
||||
conditions.append("s.email IS NULL")
|
||||
|
||||
if has_website is not None:
|
||||
if has_website:
|
||||
conditions.append("s.website IS NOT NULL")
|
||||
else:
|
||||
conditions.append("s.website IS NULL")
|
||||
|
||||
if is_public is not None:
|
||||
conditions.append(f"s.is_public = ${param_idx}")
|
||||
params.append(is_public)
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Fetch schools
|
||||
offset = (page - 1) * page_size
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.name, s.school_number, s.state, s.district, s.city,
|
||||
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
|
||||
s.website, s.email, s.phone, s.fax,
|
||||
s.principal_name, s.principal_email,
|
||||
s.student_count, s.teacher_count,
|
||||
s.is_public, s.is_all_day, s.source, s.crawled_at,
|
||||
s.is_active, s.created_at, s.updated_at,
|
||||
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
|
||||
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY s.state, s.city, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
schools = [
|
||||
SchoolResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
school_number=row["school_number"],
|
||||
school_type=row["school_type"],
|
||||
school_type_short=row["school_type_short"],
|
||||
school_category=row["school_category"],
|
||||
state=row["state"],
|
||||
district=row["district"],
|
||||
city=row["city"],
|
||||
postal_code=row["postal_code"],
|
||||
street=row["street"],
|
||||
address_full=row["address_full"],
|
||||
latitude=row["latitude"],
|
||||
longitude=row["longitude"],
|
||||
website=row["website"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
fax=row["fax"],
|
||||
principal_name=row["principal_name"],
|
||||
principal_email=row["principal_email"],
|
||||
student_count=row["student_count"],
|
||||
teacher_count=row["teacher_count"],
|
||||
is_public=row["is_public"],
|
||||
is_all_day=row["is_all_day"],
|
||||
staff_count=row["staff_count"],
|
||||
source=row["source"],
|
||||
crawled_at=row["crawled_at"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolsListResponse(
|
||||
schools=schools,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats", response_model=SchoolStatsResponse)
|
||||
async def get_school_stats():
|
||||
"""Get school statistics."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Total schools and staff
|
||||
totals = await conn.fetchrow("""
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
|
||||
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
|
||||
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
|
||||
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
|
||||
(SELECT MAX(crawled_at) FROM schools) as last_crawl
|
||||
""")
|
||||
|
||||
# By state
|
||||
state_rows = await conn.fetch("""
|
||||
SELECT state, COUNT(*) as count
|
||||
FROM schools
|
||||
WHERE is_active = TRUE
|
||||
GROUP BY state
|
||||
ORDER BY state
|
||||
""")
|
||||
schools_by_state = {row["state"]: row["count"] for row in state_rows}
|
||||
|
||||
# By type
|
||||
type_rows = await conn.fetch("""
|
||||
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE s.is_active = TRUE
|
||||
GROUP BY st.name
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
|
||||
|
||||
return SchoolStatsResponse(
|
||||
total_schools=totals["total_schools"],
|
||||
total_staff=totals["total_staff"],
|
||||
schools_by_state=schools_by_state,
|
||||
schools_by_type=schools_by_type,
|
||||
schools_with_website=totals["with_website"],
|
||||
schools_with_email=totals["with_email"],
|
||||
schools_with_principal=totals["with_principal"],
|
||||
total_students=totals["total_students"],
|
||||
total_teachers=totals["total_teachers"],
|
||||
last_crawl_time=totals["last_crawl"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{school_id}", response_model=SchoolResponse)
|
||||
async def get_school(school_id: str):
|
||||
"""Get a single school by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.name, s.school_number, s.state, s.district, s.city,
|
||||
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
|
||||
s.website, s.email, s.phone, s.fax,
|
||||
s.principal_name, s.principal_email,
|
||||
s.student_count, s.teacher_count,
|
||||
s.is_public, s.is_all_day, s.source, s.crawled_at,
|
||||
s.is_active, s.created_at, s.updated_at,
|
||||
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
|
||||
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE s.id = $1
|
||||
""", school_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
return SchoolResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
school_number=row["school_number"],
|
||||
school_type=row["school_type"],
|
||||
school_type_short=row["school_type_short"],
|
||||
school_category=row["school_category"],
|
||||
state=row["state"],
|
||||
district=row["district"],
|
||||
city=row["city"],
|
||||
postal_code=row["postal_code"],
|
||||
street=row["street"],
|
||||
address_full=row["address_full"],
|
||||
latitude=row["latitude"],
|
||||
longitude=row["longitude"],
|
||||
website=row["website"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
fax=row["fax"],
|
||||
principal_name=row["principal_name"],
|
||||
principal_email=row["principal_email"],
|
||||
student_count=row["student_count"],
|
||||
teacher_count=row["teacher_count"],
|
||||
is_public=row["is_public"],
|
||||
is_all_day=row["is_all_day"],
|
||||
staff_count=row["staff_count"],
|
||||
source=row["source"],
|
||||
crawled_at=row["crawled_at"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_schools(request: BulkImportRequest):
|
||||
"""Bulk import schools. Updates existing schools based on school_number + state."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get school type mapping
|
||||
type_rows = await conn.fetch("SELECT id, name FROM school_types")
|
||||
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
|
||||
|
||||
for school in request.schools:
|
||||
try:
|
||||
# Find school type ID
|
||||
school_type_id = None
|
||||
if school.school_type_raw:
|
||||
school_type_id = type_map.get(school.school_type_raw.lower())
|
||||
|
||||
# Check if school exists (by school_number + state, or by name + city + state)
|
||||
existing = None
|
||||
if school.school_number:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
|
||||
school.school_number, school.state
|
||||
)
|
||||
if not existing and school.city:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
|
||||
school.name, school.city, school.state
|
||||
)
|
||||
|
||||
if existing:
|
||||
# Update existing school
|
||||
await conn.execute("""
|
||||
UPDATE schools SET
|
||||
name = $2,
|
||||
school_type_id = COALESCE($3, school_type_id),
|
||||
school_type_raw = COALESCE($4, school_type_raw),
|
||||
district = COALESCE($5, district),
|
||||
city = COALESCE($6, city),
|
||||
postal_code = COALESCE($7, postal_code),
|
||||
street = COALESCE($8, street),
|
||||
address_full = COALESCE($9, address_full),
|
||||
latitude = COALESCE($10, latitude),
|
||||
longitude = COALESCE($11, longitude),
|
||||
website = COALESCE($12, website),
|
||||
email = COALESCE($13, email),
|
||||
phone = COALESCE($14, phone),
|
||||
fax = COALESCE($15, fax),
|
||||
principal_name = COALESCE($16, principal_name),
|
||||
principal_title = COALESCE($17, principal_title),
|
||||
principal_email = COALESCE($18, principal_email),
|
||||
principal_phone = COALESCE($19, principal_phone),
|
||||
student_count = COALESCE($20, student_count),
|
||||
teacher_count = COALESCE($21, teacher_count),
|
||||
is_public = $22,
|
||||
source = COALESCE($23, source),
|
||||
source_url = COALESCE($24, source_url),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""",
|
||||
existing["id"],
|
||||
school.name,
|
||||
school_type_id,
|
||||
school.school_type_raw,
|
||||
school.district,
|
||||
school.city,
|
||||
school.postal_code,
|
||||
school.street,
|
||||
school.address_full,
|
||||
school.latitude,
|
||||
school.longitude,
|
||||
school.website,
|
||||
school.email,
|
||||
school.phone,
|
||||
school.fax,
|
||||
school.principal_name,
|
||||
school.principal_title,
|
||||
school.principal_email,
|
||||
school.principal_phone,
|
||||
school.student_count,
|
||||
school.teacher_count,
|
||||
school.is_public,
|
||||
school.source,
|
||||
school.source_url,
|
||||
)
|
||||
updated += 1
|
||||
else:
|
||||
# Insert new school
|
||||
await conn.execute("""
|
||||
INSERT INTO schools (
|
||||
name, school_number, school_type_id, school_type_raw,
|
||||
state, district, city, postal_code, street, address_full,
|
||||
latitude, longitude, website, email, phone, fax,
|
||||
principal_name, principal_title, principal_email, principal_phone,
|
||||
student_count, teacher_count, is_public,
|
||||
source, source_url, crawled_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||
$21, $22, $23, $24, $25, NOW()
|
||||
)
|
||||
""",
|
||||
school.name,
|
||||
school.school_number,
|
||||
school_type_id,
|
||||
school.school_type_raw,
|
||||
school.state,
|
||||
school.district,
|
||||
school.city,
|
||||
school.postal_code,
|
||||
school.street,
|
||||
school.address_full,
|
||||
school.latitude,
|
||||
school.longitude,
|
||||
school.website,
|
||||
school.email,
|
||||
school.phone,
|
||||
school.fax,
|
||||
school.principal_name,
|
||||
school.principal_title,
|
||||
school.principal_email,
|
||||
school.principal_phone,
|
||||
school.student_count,
|
||||
school.teacher_count,
|
||||
school.is_public,
|
||||
school.source,
|
||||
school.source_url,
|
||||
)
|
||||
imported += 1
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Error importing {school.name}: {str(e)}")
|
||||
if len(errors) > 100:
|
||||
errors.append("... (more errors truncated)")
|
||||
break
|
||||
|
||||
return BulkImportResponse(
|
||||
imported=imported,
|
||||
updated=updated,
|
||||
skipped=skipped,
|
||||
errors=errors[:100],
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Staff Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
|
||||
async def get_school_staff(school_id: str):
|
||||
"""Get staff members for a school."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
|
||||
ss.title, ss.position, ss.position_type, ss.subjects,
|
||||
ss.email, ss.phone, ss.profile_url, ss.photo_url,
|
||||
ss.is_active, ss.created_at,
|
||||
s.name as school_name
|
||||
FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE ss.school_id = $1 AND ss.is_active = TRUE
|
||||
ORDER BY
|
||||
CASE ss.position_type
|
||||
WHEN 'principal' THEN 1
|
||||
WHEN 'vice_principal' THEN 2
|
||||
WHEN 'secretary' THEN 3
|
||||
ELSE 4
|
||||
END,
|
||||
ss.last_name
|
||||
""", school_id)
|
||||
|
||||
staff = [
|
||||
SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=str(row["school_id"]),
|
||||
school_name=row["school_name"],
|
||||
first_name=row["first_name"],
|
||||
last_name=row["last_name"],
|
||||
full_name=row["full_name"],
|
||||
title=row["title"],
|
||||
position=row["position"],
|
||||
position_type=row["position_type"],
|
||||
subjects=row["subjects"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
profile_url=row["profile_url"],
|
||||
photo_url=row["photo_url"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolStaffListResponse(
|
||||
staff=staff,
|
||||
total=len(staff),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
|
||||
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
|
||||
"""Add a staff member to a school."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Verify school exists
|
||||
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
|
||||
if not school:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
# Create full name
|
||||
full_name = staff.full_name
|
||||
if not full_name:
|
||||
parts = []
|
||||
if staff.title:
|
||||
parts.append(staff.title)
|
||||
if staff.first_name:
|
||||
parts.append(staff.first_name)
|
||||
parts.append(staff.last_name)
|
||||
full_name = " ".join(parts)
|
||||
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO school_staff (
|
||||
school_id, first_name, last_name, full_name, title,
|
||||
position, position_type, subjects, email, phone
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
RETURNING id, created_at
|
||||
""",
|
||||
school_id,
|
||||
staff.first_name,
|
||||
staff.last_name,
|
||||
full_name,
|
||||
staff.title,
|
||||
staff.position,
|
||||
staff.position_type,
|
||||
staff.subjects,
|
||||
staff.email,
|
||||
staff.phone,
|
||||
)
|
||||
|
||||
return SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=school_id,
|
||||
school_name=school["name"],
|
||||
first_name=staff.first_name,
|
||||
last_name=staff.last_name,
|
||||
full_name=full_name,
|
||||
title=staff.title,
|
||||
position=staff.position,
|
||||
position_type=staff.position_type,
|
||||
subjects=staff.subjects,
|
||||
email=staff.email,
|
||||
phone=staff.phone,
|
||||
is_active=True,
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Search Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/search/staff", response_model=SchoolStaffListResponse)
|
||||
async def search_school_staff(
|
||||
q: Optional[str] = Query(None, description="Search query"),
|
||||
state: Optional[str] = Query(None, description="Filter by state"),
|
||||
position_type: Optional[str] = Query(None, description="Filter by position type"),
|
||||
has_email: Optional[bool] = Query(None, description="Only staff with email"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""Search school staff across all schools."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if q:
|
||||
conditions.append(f"""
|
||||
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.name) LIKE LOWER(${param_idx}))
|
||||
""")
|
||||
params.append(f"%{q}%")
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state.upper())
|
||||
param_idx += 1
|
||||
|
||||
if position_type:
|
||||
conditions.append(f"ss.position_type = ${param_idx}")
|
||||
params.append(position_type)
|
||||
param_idx += 1
|
||||
|
||||
if has_email is not None and has_email:
|
||||
conditions.append("ss.email IS NOT NULL")
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
# Count total
|
||||
total = await conn.fetchval(f"""
|
||||
SELECT COUNT(*) FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE {where_clause}
|
||||
""", *params)
|
||||
|
||||
# Fetch staff
|
||||
offset = (page - 1) * page_size
|
||||
rows = await conn.fetch(f"""
|
||||
SELECT
|
||||
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
|
||||
ss.title, ss.position, ss.position_type, ss.subjects,
|
||||
ss.email, ss.phone, ss.profile_url, ss.photo_url,
|
||||
ss.is_active, ss.created_at,
|
||||
s.name as school_name
|
||||
FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY ss.last_name, ss.first_name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
""", *params, page_size, offset)
|
||||
|
||||
staff = [
|
||||
SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=str(row["school_id"]),
|
||||
school_name=row["school_name"],
|
||||
first_name=row["first_name"],
|
||||
last_name=row["last_name"],
|
||||
full_name=row["full_name"],
|
||||
title=row["title"],
|
||||
position=row["position"],
|
||||
position_type=row["position_type"],
|
||||
subjects=row["subjects"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
profile_url=row["profile_url"],
|
||||
photo_url=row["photo_url"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolStaffListResponse(
|
||||
staff=staff,
|
||||
total=total,
|
||||
)
|
||||
router.include_router(_crud_router)
|
||||
router.include_router(_staff_router)
|
||||
|
||||
# Re-export models for any external consumers
|
||||
from .schools_models import ( # noqa: E402, F401
|
||||
SchoolTypeResponse,
|
||||
SchoolBase,
|
||||
SchoolCreate,
|
||||
SchoolUpdate,
|
||||
SchoolResponse,
|
||||
SchoolsListResponse,
|
||||
SchoolStaffBase,
|
||||
SchoolStaffCreate,
|
||||
SchoolStaffResponse,
|
||||
SchoolStaffListResponse,
|
||||
SchoolStatsResponse,
|
||||
BulkImportRequest,
|
||||
BulkImportResponse,
|
||||
)
|
||||
from .schools_db import get_db_pool # noqa: E402, F401
|
||||
|
||||
464
backend-lehrer/llm_gateway/routes/schools_crud.py
Normal file
464
backend-lehrer/llm_gateway/routes/schools_crud.py
Normal file
@@ -0,0 +1,464 @@
|
||||
"""
|
||||
Schools API - School CRUD & Stats Routes.
|
||||
|
||||
List, get, stats, and bulk-import endpoints for schools.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from .schools_db import get_db_pool
|
||||
from .schools_models import (
|
||||
SchoolResponse,
|
||||
SchoolsListResponse,
|
||||
SchoolStatsResponse,
|
||||
SchoolTypeResponse,
|
||||
BulkImportRequest,
|
||||
BulkImportResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["schools"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Type Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/types", response_model=list[SchoolTypeResponse])
|
||||
async def list_school_types():
|
||||
"""List all school types."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, name_short, category, description
|
||||
FROM school_types
|
||||
ORDER BY category, name
|
||||
""")
|
||||
return [
|
||||
SchoolTypeResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
name_short=row["name_short"],
|
||||
category=row["category"],
|
||||
description=row["description"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("", response_model=SchoolsListResponse)
|
||||
async def list_schools(
|
||||
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type name"),
|
||||
city: Optional[str] = Query(None, description="Filter by city"),
|
||||
district: Optional[str] = Query(None, description="Filter by district"),
|
||||
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
|
||||
search: Optional[str] = Query(None, description="Search in name, city"),
|
||||
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
|
||||
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
|
||||
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List schools with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = ["s.is_active = TRUE"]
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state.upper())
|
||||
param_idx += 1
|
||||
|
||||
if school_type:
|
||||
conditions.append(f"st.name = ${param_idx}")
|
||||
params.append(school_type)
|
||||
param_idx += 1
|
||||
|
||||
if city:
|
||||
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
|
||||
params.append(city)
|
||||
param_idx += 1
|
||||
|
||||
if district:
|
||||
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
|
||||
params.append(f"%{district}%")
|
||||
param_idx += 1
|
||||
|
||||
if postal_code:
|
||||
conditions.append(f"s.postal_code LIKE ${param_idx}")
|
||||
params.append(f"{postal_code}%")
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"""
|
||||
(LOWER(s.name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.city) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.district) LIKE LOWER(${param_idx}))
|
||||
""")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
if has_email is not None:
|
||||
if has_email:
|
||||
conditions.append("s.email IS NOT NULL")
|
||||
else:
|
||||
conditions.append("s.email IS NULL")
|
||||
|
||||
if has_website is not None:
|
||||
if has_website:
|
||||
conditions.append("s.website IS NOT NULL")
|
||||
else:
|
||||
conditions.append("s.website IS NULL")
|
||||
|
||||
if is_public is not None:
|
||||
conditions.append(f"s.is_public = ${param_idx}")
|
||||
params.append(is_public)
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Fetch schools
|
||||
offset = (page - 1) * page_size
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.name, s.school_number, s.state, s.district, s.city,
|
||||
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
|
||||
s.website, s.email, s.phone, s.fax,
|
||||
s.principal_name, s.principal_email,
|
||||
s.student_count, s.teacher_count,
|
||||
s.is_public, s.is_all_day, s.source, s.crawled_at,
|
||||
s.is_active, s.created_at, s.updated_at,
|
||||
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
|
||||
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY s.state, s.city, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
schools = [
|
||||
SchoolResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
school_number=row["school_number"],
|
||||
school_type=row["school_type"],
|
||||
school_type_short=row["school_type_short"],
|
||||
school_category=row["school_category"],
|
||||
state=row["state"],
|
||||
district=row["district"],
|
||||
city=row["city"],
|
||||
postal_code=row["postal_code"],
|
||||
street=row["street"],
|
||||
address_full=row["address_full"],
|
||||
latitude=row["latitude"],
|
||||
longitude=row["longitude"],
|
||||
website=row["website"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
fax=row["fax"],
|
||||
principal_name=row["principal_name"],
|
||||
principal_email=row["principal_email"],
|
||||
student_count=row["student_count"],
|
||||
teacher_count=row["teacher_count"],
|
||||
is_public=row["is_public"],
|
||||
is_all_day=row["is_all_day"],
|
||||
staff_count=row["staff_count"],
|
||||
source=row["source"],
|
||||
crawled_at=row["crawled_at"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolsListResponse(
|
||||
schools=schools,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats", response_model=SchoolStatsResponse)
|
||||
async def get_school_stats():
|
||||
"""Get school statistics."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Total schools and staff
|
||||
totals = await conn.fetchrow("""
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
|
||||
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
|
||||
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
|
||||
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
|
||||
(SELECT MAX(crawled_at) FROM schools) as last_crawl
|
||||
""")
|
||||
|
||||
# By state
|
||||
state_rows = await conn.fetch("""
|
||||
SELECT state, COUNT(*) as count
|
||||
FROM schools
|
||||
WHERE is_active = TRUE
|
||||
GROUP BY state
|
||||
ORDER BY state
|
||||
""")
|
||||
schools_by_state = {row["state"]: row["count"] for row in state_rows}
|
||||
|
||||
# By type
|
||||
type_rows = await conn.fetch("""
|
||||
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE s.is_active = TRUE
|
||||
GROUP BY st.name
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
|
||||
|
||||
return SchoolStatsResponse(
|
||||
total_schools=totals["total_schools"],
|
||||
total_staff=totals["total_staff"],
|
||||
schools_by_state=schools_by_state,
|
||||
schools_by_type=schools_by_type,
|
||||
schools_with_website=totals["with_website"],
|
||||
schools_with_email=totals["with_email"],
|
||||
schools_with_principal=totals["with_principal"],
|
||||
total_students=totals["total_students"],
|
||||
total_teachers=totals["total_teachers"],
|
||||
last_crawl_time=totals["last_crawl"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{school_id}", response_model=SchoolResponse)
|
||||
async def get_school(school_id: str):
|
||||
"""Get a single school by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.name, s.school_number, s.state, s.district, s.city,
|
||||
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
|
||||
s.website, s.email, s.phone, s.fax,
|
||||
s.principal_name, s.principal_email,
|
||||
s.student_count, s.teacher_count,
|
||||
s.is_public, s.is_all_day, s.source, s.crawled_at,
|
||||
s.is_active, s.created_at, s.updated_at,
|
||||
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
|
||||
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE s.id = $1
|
||||
""", school_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
return SchoolResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
school_number=row["school_number"],
|
||||
school_type=row["school_type"],
|
||||
school_type_short=row["school_type_short"],
|
||||
school_category=row["school_category"],
|
||||
state=row["state"],
|
||||
district=row["district"],
|
||||
city=row["city"],
|
||||
postal_code=row["postal_code"],
|
||||
street=row["street"],
|
||||
address_full=row["address_full"],
|
||||
latitude=row["latitude"],
|
||||
longitude=row["longitude"],
|
||||
website=row["website"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
fax=row["fax"],
|
||||
principal_name=row["principal_name"],
|
||||
principal_email=row["principal_email"],
|
||||
student_count=row["student_count"],
|
||||
teacher_count=row["teacher_count"],
|
||||
is_public=row["is_public"],
|
||||
is_all_day=row["is_all_day"],
|
||||
staff_count=row["staff_count"],
|
||||
source=row["source"],
|
||||
crawled_at=row["crawled_at"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_schools(request: BulkImportRequest):
|
||||
"""Bulk import schools. Updates existing schools based on school_number + state."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get school type mapping
|
||||
type_rows = await conn.fetch("SELECT id, name FROM school_types")
|
||||
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
|
||||
|
||||
for school in request.schools:
|
||||
try:
|
||||
# Find school type ID
|
||||
school_type_id = None
|
||||
if school.school_type_raw:
|
||||
school_type_id = type_map.get(school.school_type_raw.lower())
|
||||
|
||||
# Check if school exists (by school_number + state, or by name + city + state)
|
||||
existing = None
|
||||
if school.school_number:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
|
||||
school.school_number, school.state
|
||||
)
|
||||
if not existing and school.city:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
|
||||
school.name, school.city, school.state
|
||||
)
|
||||
|
||||
if existing:
|
||||
# Update existing school
|
||||
await conn.execute("""
|
||||
UPDATE schools SET
|
||||
name = $2,
|
||||
school_type_id = COALESCE($3, school_type_id),
|
||||
school_type_raw = COALESCE($4, school_type_raw),
|
||||
district = COALESCE($5, district),
|
||||
city = COALESCE($6, city),
|
||||
postal_code = COALESCE($7, postal_code),
|
||||
street = COALESCE($8, street),
|
||||
address_full = COALESCE($9, address_full),
|
||||
latitude = COALESCE($10, latitude),
|
||||
longitude = COALESCE($11, longitude),
|
||||
website = COALESCE($12, website),
|
||||
email = COALESCE($13, email),
|
||||
phone = COALESCE($14, phone),
|
||||
fax = COALESCE($15, fax),
|
||||
principal_name = COALESCE($16, principal_name),
|
||||
principal_title = COALESCE($17, principal_title),
|
||||
principal_email = COALESCE($18, principal_email),
|
||||
principal_phone = COALESCE($19, principal_phone),
|
||||
student_count = COALESCE($20, student_count),
|
||||
teacher_count = COALESCE($21, teacher_count),
|
||||
is_public = $22,
|
||||
source = COALESCE($23, source),
|
||||
source_url = COALESCE($24, source_url),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""",
|
||||
existing["id"],
|
||||
school.name,
|
||||
school_type_id,
|
||||
school.school_type_raw,
|
||||
school.district,
|
||||
school.city,
|
||||
school.postal_code,
|
||||
school.street,
|
||||
school.address_full,
|
||||
school.latitude,
|
||||
school.longitude,
|
||||
school.website,
|
||||
school.email,
|
||||
school.phone,
|
||||
school.fax,
|
||||
school.principal_name,
|
||||
school.principal_title,
|
||||
school.principal_email,
|
||||
school.principal_phone,
|
||||
school.student_count,
|
||||
school.teacher_count,
|
||||
school.is_public,
|
||||
school.source,
|
||||
school.source_url,
|
||||
)
|
||||
updated += 1
|
||||
else:
|
||||
# Insert new school
|
||||
await conn.execute("""
|
||||
INSERT INTO schools (
|
||||
name, school_number, school_type_id, school_type_raw,
|
||||
state, district, city, postal_code, street, address_full,
|
||||
latitude, longitude, website, email, phone, fax,
|
||||
principal_name, principal_title, principal_email, principal_phone,
|
||||
student_count, teacher_count, is_public,
|
||||
source, source_url, crawled_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||
$21, $22, $23, $24, $25, NOW()
|
||||
)
|
||||
""",
|
||||
school.name,
|
||||
school.school_number,
|
||||
school_type_id,
|
||||
school.school_type_raw,
|
||||
school.state,
|
||||
school.district,
|
||||
school.city,
|
||||
school.postal_code,
|
||||
school.street,
|
||||
school.address_full,
|
||||
school.latitude,
|
||||
school.longitude,
|
||||
school.website,
|
||||
school.email,
|
||||
school.phone,
|
||||
school.fax,
|
||||
school.principal_name,
|
||||
school.principal_title,
|
||||
school.principal_email,
|
||||
school.principal_phone,
|
||||
school.student_count,
|
||||
school.teacher_count,
|
||||
school.is_public,
|
||||
school.source,
|
||||
school.source_url,
|
||||
)
|
||||
imported += 1
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Error importing {school.name}: {str(e)}")
|
||||
if len(errors) > 100:
|
||||
errors.append("... (more errors truncated)")
|
||||
break
|
||||
|
||||
return BulkImportResponse(
|
||||
imported=imported,
|
||||
updated=updated,
|
||||
skipped=skipped,
|
||||
errors=errors[:100],
|
||||
)
|
||||
25
backend-lehrer/llm_gateway/routes/schools_db.py
Normal file
25
backend-lehrer/llm_gateway/routes/schools_db.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Schools API - Database Connection.
|
||||
|
||||
Shared database pool for school endpoints.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import asyncpg
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
|
||||
)
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
200
backend-lehrer/llm_gateway/routes/schools_models.py
Normal file
200
backend-lehrer/llm_gateway/routes/schools_models.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""
|
||||
Schools API - Pydantic Models.
|
||||
|
||||
Data models for school and school staff endpoints.
|
||||
"""
|
||||
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Type Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SchoolTypeResponse(BaseModel):
|
||||
"""School type response model."""
|
||||
id: str
|
||||
name: str
|
||||
name_short: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SchoolBase(BaseModel):
|
||||
"""Base school model for creation/update."""
|
||||
name: str = Field(..., max_length=255)
|
||||
school_number: Optional[str] = Field(None, max_length=20)
|
||||
school_type_id: Optional[str] = None
|
||||
school_type_raw: Optional[str] = None
|
||||
state: str = Field(..., max_length=10)
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
address_full: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
fax: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
principal_title: Optional[str] = None
|
||||
principal_email: Optional[str] = None
|
||||
principal_phone: Optional[str] = None
|
||||
secretary_name: Optional[str] = None
|
||||
secretary_email: Optional[str] = None
|
||||
secretary_phone: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
class_count: Optional[int] = None
|
||||
founded_year: Optional[int] = None
|
||||
is_public: bool = True
|
||||
is_all_day: Optional[bool] = None
|
||||
has_inclusion: Optional[bool] = None
|
||||
languages: Optional[List[str]] = None
|
||||
specializations: Optional[List[str]] = None
|
||||
source: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolCreate(SchoolBase):
|
||||
"""School creation model."""
|
||||
pass
|
||||
|
||||
|
||||
class SchoolUpdate(BaseModel):
|
||||
"""School update model (all fields optional)."""
|
||||
name: Optional[str] = Field(None, max_length=255)
|
||||
school_number: Optional[str] = None
|
||||
school_type_id: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class SchoolResponse(BaseModel):
|
||||
"""School response model."""
|
||||
id: str
|
||||
name: str
|
||||
school_number: Optional[str] = None
|
||||
school_type: Optional[str] = None
|
||||
school_type_short: Optional[str] = None
|
||||
school_category: Optional[str] = None
|
||||
state: str
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
address_full: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
fax: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
principal_email: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
is_public: bool = True
|
||||
is_all_day: Optional[bool] = None
|
||||
staff_count: int = 0
|
||||
source: Optional[str] = None
|
||||
crawled_at: Optional[datetime] = None
|
||||
is_active: bool = True
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class SchoolsListResponse(BaseModel):
|
||||
"""List response with pagination info."""
|
||||
schools: List[SchoolResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
class SchoolStatsResponse(BaseModel):
|
||||
"""School statistics response."""
|
||||
total_schools: int
|
||||
total_staff: int
|
||||
schools_by_state: dict
|
||||
schools_by_type: dict
|
||||
schools_with_website: int
|
||||
schools_with_email: int
|
||||
schools_with_principal: int
|
||||
total_students: int
|
||||
total_teachers: int
|
||||
last_crawl_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class BulkImportRequest(BaseModel):
|
||||
"""Bulk import request."""
|
||||
schools: List[SchoolCreate]
|
||||
|
||||
|
||||
class BulkImportResponse(BaseModel):
|
||||
"""Bulk import response."""
|
||||
imported: int
|
||||
updated: int
|
||||
skipped: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Staff Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SchoolStaffBase(BaseModel):
|
||||
"""Base school staff model."""
|
||||
first_name: Optional[str] = None
|
||||
last_name: str
|
||||
full_name: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
position: Optional[str] = None
|
||||
position_type: Optional[str] = None
|
||||
subjects: Optional[List[str]] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolStaffCreate(SchoolStaffBase):
|
||||
"""School staff creation model."""
|
||||
school_id: str
|
||||
|
||||
|
||||
class SchoolStaffResponse(SchoolStaffBase):
|
||||
"""School staff response model."""
|
||||
id: str
|
||||
school_id: str
|
||||
school_name: Optional[str] = None
|
||||
profile_url: Optional[str] = None
|
||||
photo_url: Optional[str] = None
|
||||
is_active: bool = True
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class SchoolStaffListResponse(BaseModel):
|
||||
"""Staff list response."""
|
||||
staff: List[SchoolStaffResponse]
|
||||
total: int
|
||||
233
backend-lehrer/llm_gateway/routes/schools_staff.py
Normal file
233
backend-lehrer/llm_gateway/routes/schools_staff.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""
|
||||
Schools API - Staff Routes.
|
||||
|
||||
CRUD and search endpoints for school staff members.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from .schools_db import get_db_pool
|
||||
from .schools_models import (
|
||||
SchoolStaffBase,
|
||||
SchoolStaffResponse,
|
||||
SchoolStaffListResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["schools"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Staff Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
|
||||
async def get_school_staff(school_id: str):
|
||||
"""Get staff members for a school."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
|
||||
ss.title, ss.position, ss.position_type, ss.subjects,
|
||||
ss.email, ss.phone, ss.profile_url, ss.photo_url,
|
||||
ss.is_active, ss.created_at,
|
||||
s.name as school_name
|
||||
FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE ss.school_id = $1 AND ss.is_active = TRUE
|
||||
ORDER BY
|
||||
CASE ss.position_type
|
||||
WHEN 'principal' THEN 1
|
||||
WHEN 'vice_principal' THEN 2
|
||||
WHEN 'secretary' THEN 3
|
||||
ELSE 4
|
||||
END,
|
||||
ss.last_name
|
||||
""", school_id)
|
||||
|
||||
staff = [
|
||||
SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=str(row["school_id"]),
|
||||
school_name=row["school_name"],
|
||||
first_name=row["first_name"],
|
||||
last_name=row["last_name"],
|
||||
full_name=row["full_name"],
|
||||
title=row["title"],
|
||||
position=row["position"],
|
||||
position_type=row["position_type"],
|
||||
subjects=row["subjects"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
profile_url=row["profile_url"],
|
||||
photo_url=row["photo_url"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolStaffListResponse(
|
||||
staff=staff,
|
||||
total=len(staff),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
|
||||
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
|
||||
"""Add a staff member to a school."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Verify school exists
|
||||
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
|
||||
if not school:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
# Create full name
|
||||
full_name = staff.full_name
|
||||
if not full_name:
|
||||
parts = []
|
||||
if staff.title:
|
||||
parts.append(staff.title)
|
||||
if staff.first_name:
|
||||
parts.append(staff.first_name)
|
||||
parts.append(staff.last_name)
|
||||
full_name = " ".join(parts)
|
||||
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO school_staff (
|
||||
school_id, first_name, last_name, full_name, title,
|
||||
position, position_type, subjects, email, phone
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
RETURNING id, created_at
|
||||
""",
|
||||
school_id,
|
||||
staff.first_name,
|
||||
staff.last_name,
|
||||
full_name,
|
||||
staff.title,
|
||||
staff.position,
|
||||
staff.position_type,
|
||||
staff.subjects,
|
||||
staff.email,
|
||||
staff.phone,
|
||||
)
|
||||
|
||||
return SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=school_id,
|
||||
school_name=school["name"],
|
||||
first_name=staff.first_name,
|
||||
last_name=staff.last_name,
|
||||
full_name=full_name,
|
||||
title=staff.title,
|
||||
position=staff.position,
|
||||
position_type=staff.position_type,
|
||||
subjects=staff.subjects,
|
||||
email=staff.email,
|
||||
phone=staff.phone,
|
||||
is_active=True,
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Search Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/search/staff", response_model=SchoolStaffListResponse)
|
||||
async def search_school_staff(
|
||||
q: Optional[str] = Query(None, description="Search query"),
|
||||
state: Optional[str] = Query(None, description="Filter by state"),
|
||||
position_type: Optional[str] = Query(None, description="Filter by position type"),
|
||||
has_email: Optional[bool] = Query(None, description="Only staff with email"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""Search school staff across all schools."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if q:
|
||||
conditions.append(f"""
|
||||
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.name) LIKE LOWER(${param_idx}))
|
||||
""")
|
||||
params.append(f"%{q}%")
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state.upper())
|
||||
param_idx += 1
|
||||
|
||||
if position_type:
|
||||
conditions.append(f"ss.position_type = ${param_idx}")
|
||||
params.append(position_type)
|
||||
param_idx += 1
|
||||
|
||||
if has_email is not None and has_email:
|
||||
conditions.append("ss.email IS NOT NULL")
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
# Count total
|
||||
total = await conn.fetchval(f"""
|
||||
SELECT COUNT(*) FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE {where_clause}
|
||||
""", *params)
|
||||
|
||||
# Fetch staff
|
||||
offset = (page - 1) * page_size
|
||||
rows = await conn.fetch(f"""
|
||||
SELECT
|
||||
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
|
||||
ss.title, ss.position, ss.position_type, ss.subjects,
|
||||
ss.email, ss.phone, ss.profile_url, ss.photo_url,
|
||||
ss.is_active, ss.created_at,
|
||||
s.name as school_name
|
||||
FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY ss.last_name, ss.first_name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
""", *params, page_size, offset)
|
||||
|
||||
staff = [
|
||||
SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=str(row["school_id"]),
|
||||
school_name=row["school_name"],
|
||||
first_name=row["first_name"],
|
||||
last_name=row["last_name"],
|
||||
full_name=row["full_name"],
|
||||
title=row["title"],
|
||||
position=row["position"],
|
||||
position_type=row["position_type"],
|
||||
subjects=row["subjects"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
profile_url=row["profile_url"],
|
||||
photo_url=row["photo_url"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolStaffListResponse(
|
||||
staff=staff,
|
||||
total=total,
|
||||
)
|
||||
Reference in New Issue
Block a user