[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,386 @@
"""
EduSearch Seeds CRUD Routes.
List, get, create, update, delete, and bulk import for seed URLs.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from fastapi import APIRouter, HTTPException, Query
import asyncpg
from .edu_search_models import (
CategoryResponse,
SeedCreate,
SeedUpdate,
SeedResponse,
SeedsListResponse,
BulkImportRequest,
BulkImportResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["edu-search"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get("DATABASE_URL")
if not database_url:
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
@router.get("/categories", response_model=List[CategoryResponse])
async def list_categories():
"""List all seed categories."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, display_name, description, icon, sort_order, is_active
FROM edu_search_categories
WHERE is_active = TRUE
ORDER BY sort_order
""")
return [
CategoryResponse(
id=str(row["id"]),
name=row["name"],
display_name=row["display_name"],
description=row["description"],
icon=row["icon"],
sort_order=row["sort_order"],
is_active=row["is_active"],
)
for row in rows
]
@router.get("/seeds", response_model=SeedsListResponse)
async def list_seeds(
category: Optional[str] = Query(None, description="Filter by category name"),
state: Optional[str] = Query(None, description="Filter by state code"),
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
search: Optional[str] = Query(None, description="Search in name/url"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List seeds with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = []
params = []
param_idx = 1
if category:
conditions.append(f"c.name = ${param_idx}")
params.append(category)
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state)
param_idx += 1
if enabled is not None:
conditions.append(f"s.enabled = ${param_idx}")
params.append(enabled)
param_idx += 1
if search:
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
params.append(f"%{search}%")
param_idx += 1
where_clause = " AND ".join(conditions) if conditions else "TRUE"
# Count total
count_query = f"""
SELECT COUNT(*) FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Get paginated results
offset = (page - 1) * page_size
params.extend([page_size, offset])
query = f"""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
ORDER BY c.sort_order, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
rows = await conn.fetch(query, *params)
seeds = [_row_to_seed_response(row) for row in rows]
return SeedsListResponse(
seeds=seeds,
total=total,
page=page,
page_size=page_size,
)
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
async def get_seed(seed_id: str):
"""Get a single seed by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.id = $1
""", seed_id)
if not row:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return _row_to_seed_response(row)
@router.post("/seeds", response_model=SeedResponse, status_code=201)
async def create_seed(seed: SeedCreate):
"""Create a new seed URL."""
pool = await get_db_pool()
async with pool.acquire() as conn:
category_id = None
if seed.category_name:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
try:
row = await conn.fetchrow("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
RETURNING id, created_at, updated_at
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
except asyncpg.UniqueViolationError:
raise HTTPException(status_code=409, detail="URL existiert bereits")
return SeedResponse(
id=str(row["id"]),
url=seed.url,
name=seed.name,
description=seed.description,
category=seed.category_name,
category_display_name=None,
source_type=seed.source_type,
scope=seed.scope,
state=seed.state,
trust_boost=seed.trust_boost,
enabled=seed.enabled,
crawl_depth=seed.crawl_depth,
crawl_frequency=seed.crawl_frequency,
last_crawled_at=None,
last_crawl_status=None,
last_crawl_docs=0,
total_documents=0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
async def update_seed(seed_id: str, seed: SeedUpdate):
"""Update an existing seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
updates = []
params = []
param_idx = 1
if seed.url is not None:
updates.append(f"url = ${param_idx}")
params.append(seed.url)
param_idx += 1
if seed.name is not None:
updates.append(f"name = ${param_idx}")
params.append(seed.name)
param_idx += 1
if seed.description is not None:
updates.append(f"description = ${param_idx}")
params.append(seed.description)
param_idx += 1
if seed.category_name is not None:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
updates.append(f"category_id = ${param_idx}")
params.append(category_id)
param_idx += 1
if seed.source_type is not None:
updates.append(f"source_type = ${param_idx}")
params.append(seed.source_type)
param_idx += 1
if seed.scope is not None:
updates.append(f"scope = ${param_idx}")
params.append(seed.scope)
param_idx += 1
if seed.state is not None:
updates.append(f"state = ${param_idx}")
params.append(seed.state)
param_idx += 1
if seed.trust_boost is not None:
updates.append(f"trust_boost = ${param_idx}")
params.append(seed.trust_boost)
param_idx += 1
if seed.enabled is not None:
updates.append(f"enabled = ${param_idx}")
params.append(seed.enabled)
param_idx += 1
if seed.crawl_depth is not None:
updates.append(f"crawl_depth = ${param_idx}")
params.append(seed.crawl_depth)
param_idx += 1
if seed.crawl_frequency is not None:
updates.append(f"crawl_frequency = ${param_idx}")
params.append(seed.crawl_frequency)
param_idx += 1
if not updates:
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
updates.append("updated_at = NOW()")
params.append(seed_id)
query = f"""
UPDATE edu_search_seeds
SET {", ".join(updates)}
WHERE id = ${param_idx}
RETURNING id
"""
result = await conn.fetchrow(query, *params)
if not result:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
# Return updated seed
return await get_seed(seed_id)
@router.delete("/seeds/{seed_id}")
async def delete_seed(seed_id: str):
"""Delete a seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM edu_search_seeds WHERE id = $1",
seed_id
)
if result == "DELETE 0":
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return {"status": "deleted", "id": seed_id}
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
async def bulk_import_seeds(request: BulkImportRequest):
"""Bulk import seeds (skip duplicates)."""
pool = await get_db_pool()
imported = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Pre-fetch all category IDs
categories = {}
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
for row in rows:
categories[row["name"]] = row["id"]
for seed in request.seeds:
try:
category_id = categories.get(seed.category_name) if seed.category_name else None
await conn.execute("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (url) DO NOTHING
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
imported += 1
except asyncpg.UniqueViolationError:
skipped += 1
except Exception as e:
errors.append(f"{seed.url}: {str(e)}")
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
def _row_to_seed_response(row) -> SeedResponse:
"""Convert a database row to SeedResponse."""
return SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)

View File

@@ -0,0 +1,137 @@
"""
EduSearch Seeds Pydantic Models.
Request/Response models for the education search seed URL API.
"""
from typing import Optional, List
from datetime import datetime
from pydantic import BaseModel, Field
class CategoryResponse(BaseModel):
"""Category response model."""
id: str
name: str
display_name: str
description: Optional[str] = None
icon: Optional[str] = None
sort_order: int
is_active: bool
class SeedBase(BaseModel):
"""Base seed model for creation/update."""
url: str = Field(..., max_length=500)
name: str = Field(..., max_length=255)
description: Optional[str] = None
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
enabled: bool = True
crawl_depth: int = Field(2, ge=1, le=5)
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
class SeedCreate(SeedBase):
"""Seed creation model."""
pass
class SeedUpdate(BaseModel):
"""Seed update model (all fields optional)."""
url: Optional[str] = Field(None, max_length=500)
name: Optional[str] = Field(None, max_length=255)
description: Optional[str] = None
category_name: Optional[str] = None
source_type: Optional[str] = None
scope: Optional[str] = None
state: Optional[str] = Field(None, max_length=5)
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
enabled: Optional[bool] = None
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
crawl_frequency: Optional[str] = None
class SeedResponse(BaseModel):
"""Seed response model."""
id: str
url: str
name: str
description: Optional[str] = None
category: Optional[str] = None
category_display_name: Optional[str] = None
source_type: str
scope: str
state: Optional[str] = None
trust_boost: float
enabled: bool
crawl_depth: int
crawl_frequency: str
last_crawled_at: Optional[datetime] = None
last_crawl_status: Optional[str] = None
last_crawl_docs: int = 0
total_documents: int = 0
created_at: datetime
updated_at: datetime
class SeedsListResponse(BaseModel):
"""List response with pagination info."""
seeds: List[SeedResponse]
total: int
page: int
page_size: int
class StatsResponse(BaseModel):
"""Crawl statistics response."""
total_seeds: int
enabled_seeds: int
total_documents: int
seeds_by_category: dict
seeds_by_state: dict
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
seeds: List[SeedCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
skipped: int
errors: List[str]
class CrawlStatusUpdate(BaseModel):
"""Crawl status update from edu-search-service."""
seed_url: str = Field(..., description="The seed URL that was crawled")
status: str = Field(..., description="Crawl status: success, error, partial")
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
error_message: Optional[str] = Field(None, description="Error message if status is error")
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
class CrawlStatusResponse(BaseModel):
"""Response for crawl status update."""
success: bool
seed_url: str
message: str
class BulkCrawlStatusUpdate(BaseModel):
"""Bulk crawl status update."""
updates: List[CrawlStatusUpdate]
class BulkCrawlStatusResponse(BaseModel):
"""Response for bulk crawl status update."""
updated: int
failed: int
errors: List[str]

View File

@@ -1,710 +1,58 @@
"""
EduSearch Seeds API Routes.
EduSearch Seeds API Routes — Barrel Re-export.
Split into submodules:
- edu_search_models.py — Pydantic request/response models
- edu_search_crud.py — CRUD endpoints (list, get, create, update, delete, bulk import)
- edu_search_status.py — Stats, export for crawler, crawl status feedback
CRUD operations for managing education search crawler seed URLs.
Direct database access to PostgreSQL.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from uuid import UUID
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException, Depends, Query
from pydantic import BaseModel, Field, HttpUrl
import asyncpg
from .edu_search_crud import router as _crud_router, get_db_pool
from .edu_search_status import router as _status_router
logger = logging.getLogger(__name__)
# Re-export models for consumers that import types from this module
from .edu_search_models import (
CategoryResponse,
SeedBase,
SeedCreate,
SeedUpdate,
SeedResponse,
SeedsListResponse,
StatsResponse,
BulkImportRequest,
BulkImportResponse,
CrawlStatusUpdate,
CrawlStatusResponse,
BulkCrawlStatusUpdate,
BulkCrawlStatusResponse,
)
# Combine both sub-routers into a single router for backwards compatibility.
# The consumer imports `from .edu_search_seeds import router as edu_search_seeds_router`.
router = APIRouter(prefix="/edu-search", tags=["edu-search"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get("DATABASE_URL")
if not database_url:
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
# =============================================================================
# Pydantic Models
# =============================================================================
class CategoryResponse(BaseModel):
"""Category response model."""
id: str
name: str
display_name: str
description: Optional[str] = None
icon: Optional[str] = None
sort_order: int
is_active: bool
class SeedBase(BaseModel):
"""Base seed model for creation/update."""
url: str = Field(..., max_length=500)
name: str = Field(..., max_length=255)
description: Optional[str] = None
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
enabled: bool = True
crawl_depth: int = Field(2, ge=1, le=5)
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
class SeedCreate(SeedBase):
"""Seed creation model."""
pass
class SeedUpdate(BaseModel):
"""Seed update model (all fields optional)."""
url: Optional[str] = Field(None, max_length=500)
name: Optional[str] = Field(None, max_length=255)
description: Optional[str] = None
category_name: Optional[str] = None
source_type: Optional[str] = None
scope: Optional[str] = None
state: Optional[str] = Field(None, max_length=5)
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
enabled: Optional[bool] = None
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
crawl_frequency: Optional[str] = None
class SeedResponse(BaseModel):
"""Seed response model."""
id: str
url: str
name: str
description: Optional[str] = None
category: Optional[str] = None
category_display_name: Optional[str] = None
source_type: str
scope: str
state: Optional[str] = None
trust_boost: float
enabled: bool
crawl_depth: int
crawl_frequency: str
last_crawled_at: Optional[datetime] = None
last_crawl_status: Optional[str] = None
last_crawl_docs: int = 0
total_documents: int = 0
created_at: datetime
updated_at: datetime
class SeedsListResponse(BaseModel):
"""List response with pagination info."""
seeds: List[SeedResponse]
total: int
page: int
page_size: int
class StatsResponse(BaseModel):
"""Crawl statistics response."""
total_seeds: int
enabled_seeds: int
total_documents: int
seeds_by_category: dict
seeds_by_state: dict
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
seeds: List[SeedCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
skipped: int
errors: List[str]
# =============================================================================
# API Endpoints
# =============================================================================
@router.get("/categories", response_model=List[CategoryResponse])
async def list_categories():
"""List all seed categories."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, display_name, description, icon, sort_order, is_active
FROM edu_search_categories
WHERE is_active = TRUE
ORDER BY sort_order
""")
return [
CategoryResponse(
id=str(row["id"]),
name=row["name"],
display_name=row["display_name"],
description=row["description"],
icon=row["icon"],
sort_order=row["sort_order"],
is_active=row["is_active"],
)
for row in rows
]
@router.get("/seeds", response_model=SeedsListResponse)
async def list_seeds(
category: Optional[str] = Query(None, description="Filter by category name"),
state: Optional[str] = Query(None, description="Filter by state code"),
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
search: Optional[str] = Query(None, description="Search in name/url"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List seeds with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = []
params = []
param_idx = 1
if category:
conditions.append(f"c.name = ${param_idx}")
params.append(category)
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state)
param_idx += 1
if enabled is not None:
conditions.append(f"s.enabled = ${param_idx}")
params.append(enabled)
param_idx += 1
if search:
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
params.append(f"%{search}%")
param_idx += 1
where_clause = " AND ".join(conditions) if conditions else "TRUE"
# Count total
count_query = f"""
SELECT COUNT(*) FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Get paginated results
offset = (page - 1) * page_size
params.extend([page_size, offset])
query = f"""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
ORDER BY c.sort_order, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
rows = await conn.fetch(query, *params)
seeds = [
SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SeedsListResponse(
seeds=seeds,
total=total,
page=page,
page_size=page_size,
)
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
async def get_seed(seed_id: str):
"""Get a single seed by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.id = $1
""", seed_id)
if not row:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/seeds", response_model=SeedResponse, status_code=201)
async def create_seed(seed: SeedCreate):
"""Create a new seed URL."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Get category ID if provided
category_id = None
if seed.category_name:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
try:
row = await conn.fetchrow("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
RETURNING id, created_at, updated_at
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
except asyncpg.UniqueViolationError:
raise HTTPException(status_code=409, detail="URL existiert bereits")
return SeedResponse(
id=str(row["id"]),
url=seed.url,
name=seed.name,
description=seed.description,
category=seed.category_name,
category_display_name=None,
source_type=seed.source_type,
scope=seed.scope,
state=seed.state,
trust_boost=seed.trust_boost,
enabled=seed.enabled,
crawl_depth=seed.crawl_depth,
crawl_frequency=seed.crawl_frequency,
last_crawled_at=None,
last_crawl_status=None,
last_crawl_docs=0,
total_documents=0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
async def update_seed(seed_id: str, seed: SeedUpdate):
"""Update an existing seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build update statement dynamically
updates = []
params = []
param_idx = 1
if seed.url is not None:
updates.append(f"url = ${param_idx}")
params.append(seed.url)
param_idx += 1
if seed.name is not None:
updates.append(f"name = ${param_idx}")
params.append(seed.name)
param_idx += 1
if seed.description is not None:
updates.append(f"description = ${param_idx}")
params.append(seed.description)
param_idx += 1
if seed.category_name is not None:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
updates.append(f"category_id = ${param_idx}")
params.append(category_id)
param_idx += 1
if seed.source_type is not None:
updates.append(f"source_type = ${param_idx}")
params.append(seed.source_type)
param_idx += 1
if seed.scope is not None:
updates.append(f"scope = ${param_idx}")
params.append(seed.scope)
param_idx += 1
if seed.state is not None:
updates.append(f"state = ${param_idx}")
params.append(seed.state)
param_idx += 1
if seed.trust_boost is not None:
updates.append(f"trust_boost = ${param_idx}")
params.append(seed.trust_boost)
param_idx += 1
if seed.enabled is not None:
updates.append(f"enabled = ${param_idx}")
params.append(seed.enabled)
param_idx += 1
if seed.crawl_depth is not None:
updates.append(f"crawl_depth = ${param_idx}")
params.append(seed.crawl_depth)
param_idx += 1
if seed.crawl_frequency is not None:
updates.append(f"crawl_frequency = ${param_idx}")
params.append(seed.crawl_frequency)
param_idx += 1
if not updates:
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
updates.append("updated_at = NOW()")
params.append(seed_id)
query = f"""
UPDATE edu_search_seeds
SET {", ".join(updates)}
WHERE id = ${param_idx}
RETURNING id
"""
result = await conn.fetchrow(query, *params)
if not result:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
# Return updated seed
return await get_seed(seed_id)
@router.delete("/seeds/{seed_id}")
async def delete_seed(seed_id: str):
"""Delete a seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM edu_search_seeds WHERE id = $1",
seed_id
)
if result == "DELETE 0":
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return {"status": "deleted", "id": seed_id}
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
async def bulk_import_seeds(request: BulkImportRequest):
"""Bulk import seeds (skip duplicates)."""
pool = await get_db_pool()
imported = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Pre-fetch all category IDs
categories = {}
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
for row in rows:
categories[row["name"]] = row["id"]
for seed in request.seeds:
try:
category_id = categories.get(seed.category_name) if seed.category_name else None
await conn.execute("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (url) DO NOTHING
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
imported += 1
except asyncpg.UniqueViolationError:
skipped += 1
except Exception as e:
errors.append(f"{seed.url}: {str(e)}")
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
@router.get("/stats", response_model=StatsResponse)
async def get_stats():
"""Get crawl statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Basic counts
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
# By category
cat_rows = await conn.fetch("""
SELECT c.name, COUNT(s.id) as count
FROM edu_search_categories c
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
GROUP BY c.name
""")
by_category = {row["name"]: row["count"] for row in cat_rows}
# By state
state_rows = await conn.fetch("""
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
FROM edu_search_seeds
GROUP BY state
""")
by_state = {row["state"]: row["count"] for row in state_rows}
# Last crawl time
last_crawl = await conn.fetchval(
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
)
return StatsResponse(
total_seeds=total,
enabled_seeds=enabled,
total_documents=total_docs,
seeds_by_category=by_category,
seeds_by_state=by_state,
last_crawl_time=last_crawl,
)
# Export for external use (edu-search-service)
@router.get("/seeds/export/for-crawler")
async def export_seeds_for_crawler():
"""Export enabled seeds in format suitable for crawler."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
s.url, s.trust_boost, s.source_type, s.scope, s.state,
s.crawl_depth, c.name as category
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.enabled = TRUE
ORDER BY s.trust_boost DESC
""")
return {
"seeds": [
{
"url": row["url"],
"trust": float(row["trust_boost"]),
"source": row["source_type"],
"scope": row["scope"],
"state": row["state"],
"depth": row["crawl_depth"],
"category": row["category"],
}
for row in rows
],
"total": len(rows),
"exported_at": datetime.utcnow().isoformat(),
}
# =============================================================================
# Crawl Status Feedback (from edu-search-service)
# =============================================================================
class CrawlStatusUpdate(BaseModel):
"""Crawl status update from edu-search-service."""
seed_url: str = Field(..., description="The seed URL that was crawled")
status: str = Field(..., description="Crawl status: success, error, partial")
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
error_message: Optional[str] = Field(None, description="Error message if status is error")
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
class CrawlStatusResponse(BaseModel):
"""Response for crawl status update."""
success: bool
seed_url: str
message: str
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
async def update_crawl_status(update: CrawlStatusUpdate):
"""Update crawl status for a seed URL (called by edu-search-service)."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Find the seed by URL
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
raise HTTPException(
status_code=404,
detail=f"Seed nicht gefunden: {update.seed_url}"
)
# Update the seed with crawl status
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
logger.info(
f"Crawl status updated: {update.seed_url} - "
f"status={update.status}, docs={update.documents_crawled}, "
f"duration={update.crawl_duration_seconds:.1f}s"
)
return CrawlStatusResponse(
success=True,
seed_url=update.seed_url,
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
)
class BulkCrawlStatusUpdate(BaseModel):
"""Bulk crawl status update."""
updates: List[CrawlStatusUpdate]
class BulkCrawlStatusResponse(BaseModel):
"""Response for bulk crawl status update."""
updated: int
failed: int
errors: List[str]
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
"""Bulk update crawl status for multiple seeds."""
pool = await get_db_pool()
updated = 0
failed = 0
errors = []
async with pool.acquire() as conn:
for update in request.updates:
try:
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
failed += 1
errors.append(f"Seed nicht gefunden: {update.seed_url}")
continue
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
updated += 1
except Exception as e:
failed += 1
errors.append(f"{update.seed_url}: {str(e)}")
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
return BulkCrawlStatusResponse(
updated=updated,
failed=failed,
errors=errors
)
router.include_router(_crud_router)
router.include_router(_status_router)
__all__ = [
"router",
"get_db_pool",
# Models
"CategoryResponse",
"SeedBase",
"SeedCreate",
"SeedUpdate",
"SeedResponse",
"SeedsListResponse",
"StatsResponse",
"BulkImportRequest",
"BulkImportResponse",
"CrawlStatusUpdate",
"CrawlStatusResponse",
"BulkCrawlStatusUpdate",
"BulkCrawlStatusResponse",
]

View File

@@ -0,0 +1,198 @@
"""
EduSearch Seeds Stats & Crawl Status Routes.
Statistics, export for crawler, and crawl status feedback endpoints.
"""
import logging
from typing import List
from datetime import datetime
from fastapi import APIRouter, HTTPException
import asyncpg
from .edu_search_models import (
StatsResponse,
CrawlStatusUpdate,
CrawlStatusResponse,
BulkCrawlStatusUpdate,
BulkCrawlStatusResponse,
)
from .edu_search_crud import get_db_pool
logger = logging.getLogger(__name__)
router = APIRouter(tags=["edu-search"])
@router.get("/stats", response_model=StatsResponse)
async def get_stats():
"""Get crawl statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Basic counts
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
# By category
cat_rows = await conn.fetch("""
SELECT c.name, COUNT(s.id) as count
FROM edu_search_categories c
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
GROUP BY c.name
""")
by_category = {row["name"]: row["count"] for row in cat_rows}
# By state
state_rows = await conn.fetch("""
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
FROM edu_search_seeds
GROUP BY state
""")
by_state = {row["state"]: row["count"] for row in state_rows}
# Last crawl time
last_crawl = await conn.fetchval(
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
)
return StatsResponse(
total_seeds=total,
enabled_seeds=enabled,
total_documents=total_docs,
seeds_by_category=by_category,
seeds_by_state=by_state,
last_crawl_time=last_crawl,
)
# Export for external use (edu-search-service)
@router.get("/seeds/export/for-crawler")
async def export_seeds_for_crawler():
"""Export enabled seeds in format suitable for crawler."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
s.url, s.trust_boost, s.source_type, s.scope, s.state,
s.crawl_depth, c.name as category
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.enabled = TRUE
ORDER BY s.trust_boost DESC
""")
return {
"seeds": [
{
"url": row["url"],
"trust": float(row["trust_boost"]),
"source": row["source_type"],
"scope": row["scope"],
"state": row["state"],
"depth": row["crawl_depth"],
"category": row["category"],
}
for row in rows
],
"total": len(rows),
"exported_at": datetime.utcnow().isoformat(),
}
# =============================================================================
# Crawl Status Feedback (from edu-search-service)
# =============================================================================
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
async def update_crawl_status(update: CrawlStatusUpdate):
"""Update crawl status for a seed URL (called by edu-search-service)."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Find the seed by URL
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
raise HTTPException(
status_code=404,
detail=f"Seed nicht gefunden: {update.seed_url}"
)
# Update the seed with crawl status
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
logger.info(
f"Crawl status updated: {update.seed_url} - "
f"status={update.status}, docs={update.documents_crawled}, "
f"duration={update.crawl_duration_seconds:.1f}s"
)
return CrawlStatusResponse(
success=True,
seed_url=update.seed_url,
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
)
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
"""Bulk update crawl status for multiple seeds."""
pool = await get_db_pool()
updated = 0
failed = 0
errors = []
async with pool.acquire() as conn:
for update in request.updates:
try:
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
failed += 1
errors.append(f"Seed nicht gefunden: {update.seed_url}")
continue
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
updated += 1
except Exception as e:
failed += 1
errors.append(f"{update.seed_url}: {str(e)}")
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
return BulkCrawlStatusResponse(
updated=updated,
failed=failed,
errors=errors
)

View File

@@ -1,867 +1,38 @@
"""
Schools API Routes.
Schools API Routes — Barrel Re-export.
CRUD operations for managing German schools (~40,000 schools).
Direct database access to PostgreSQL.
Split into:
- schools_models.py: Pydantic models
- schools_db.py: Database connection pool
- schools_crud.py: School CRUD & stats routes
- schools_staff.py: Staff CRUD & search routes
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from uuid import UUID
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
import asyncpg
logger = logging.getLogger(__name__)
from .schools_crud import router as _crud_router
from .schools_staff import router as _staff_router
# Single router that merges both sub-module routers
router = APIRouter(prefix="/schools", tags=["schools"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
)
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
# =============================================================================
# Pydantic Models
# =============================================================================
class SchoolTypeResponse(BaseModel):
"""School type response model."""
id: str
name: str
name_short: Optional[str] = None
category: Optional[str] = None
description: Optional[str] = None
class SchoolBase(BaseModel):
"""Base school model for creation/update."""
name: str = Field(..., max_length=255)
school_number: Optional[str] = Field(None, max_length=20)
school_type_id: Optional[str] = None
school_type_raw: Optional[str] = None
state: str = Field(..., max_length=10)
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_title: Optional[str] = None
principal_email: Optional[str] = None
principal_phone: Optional[str] = None
secretary_name: Optional[str] = None
secretary_email: Optional[str] = None
secretary_phone: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
class_count: Optional[int] = None
founded_year: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
has_inclusion: Optional[bool] = None
languages: Optional[List[str]] = None
specializations: Optional[List[str]] = None
source: Optional[str] = None
source_url: Optional[str] = None
class SchoolCreate(SchoolBase):
"""School creation model."""
pass
class SchoolUpdate(BaseModel):
"""School update model (all fields optional)."""
name: Optional[str] = Field(None, max_length=255)
school_number: Optional[str] = None
school_type_id: Optional[str] = None
state: Optional[str] = None
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
principal_name: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_active: Optional[bool] = None
class SchoolResponse(BaseModel):
"""School response model."""
id: str
name: str
school_number: Optional[str] = None
school_type: Optional[str] = None
school_type_short: Optional[str] = None
school_category: Optional[str] = None
state: str
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_email: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
staff_count: int = 0
source: Optional[str] = None
crawled_at: Optional[datetime] = None
is_active: bool = True
created_at: datetime
updated_at: datetime
class SchoolsListResponse(BaseModel):
"""List response with pagination info."""
schools: List[SchoolResponse]
total: int
page: int
page_size: int
class SchoolStaffBase(BaseModel):
"""Base school staff model."""
first_name: Optional[str] = None
last_name: str
full_name: Optional[str] = None
title: Optional[str] = None
position: Optional[str] = None
position_type: Optional[str] = None
subjects: Optional[List[str]] = None
email: Optional[str] = None
phone: Optional[str] = None
class SchoolStaffCreate(SchoolStaffBase):
"""School staff creation model."""
school_id: str
class SchoolStaffResponse(SchoolStaffBase):
"""School staff response model."""
id: str
school_id: str
school_name: Optional[str] = None
profile_url: Optional[str] = None
photo_url: Optional[str] = None
is_active: bool = True
created_at: datetime
class SchoolStaffListResponse(BaseModel):
"""Staff list response."""
staff: List[SchoolStaffResponse]
total: int
class SchoolStatsResponse(BaseModel):
"""School statistics response."""
total_schools: int
total_staff: int
schools_by_state: dict
schools_by_type: dict
schools_with_website: int
schools_with_email: int
schools_with_principal: int
total_students: int
total_teachers: int
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
schools: List[SchoolCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
updated: int
skipped: int
errors: List[str]
# =============================================================================
# School Type Endpoints
# =============================================================================
@router.get("/types", response_model=List[SchoolTypeResponse])
async def list_school_types():
"""List all school types."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, name_short, category, description
FROM school_types
ORDER BY category, name
""")
return [
SchoolTypeResponse(
id=str(row["id"]),
name=row["name"],
name_short=row["name_short"],
category=row["category"],
description=row["description"],
)
for row in rows
]
# =============================================================================
# School Endpoints
# =============================================================================
@router.get("", response_model=SchoolsListResponse)
async def list_schools(
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
school_type: Optional[str] = Query(None, description="Filter by school type name"),
city: Optional[str] = Query(None, description="Filter by city"),
district: Optional[str] = Query(None, description="Filter by district"),
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
search: Optional[str] = Query(None, description="Search in name, city"),
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List schools with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = ["s.is_active = TRUE"]
params = []
param_idx = 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if school_type:
conditions.append(f"st.name = ${param_idx}")
params.append(school_type)
param_idx += 1
if city:
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
params.append(city)
param_idx += 1
if district:
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
params.append(f"%{district}%")
param_idx += 1
if postal_code:
conditions.append(f"s.postal_code LIKE ${param_idx}")
params.append(f"{postal_code}%")
param_idx += 1
if search:
conditions.append(f"""
(LOWER(s.name) LIKE LOWER(${param_idx})
OR LOWER(s.city) LIKE LOWER(${param_idx})
OR LOWER(s.district) LIKE LOWER(${param_idx}))
""")
params.append(f"%{search}%")
param_idx += 1
if has_email is not None:
if has_email:
conditions.append("s.email IS NOT NULL")
else:
conditions.append("s.email IS NULL")
if has_website is not None:
if has_website:
conditions.append("s.website IS NOT NULL")
else:
conditions.append("s.website IS NULL")
if is_public is not None:
conditions.append(f"s.is_public = ${param_idx}")
params.append(is_public)
param_idx += 1
where_clause = " AND ".join(conditions)
# Count total
count_query = f"""
SELECT COUNT(*) FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Fetch schools
offset = (page - 1) * page_size
query = f"""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
ORDER BY s.state, s.city, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
params.extend([page_size, offset])
rows = await conn.fetch(query, *params)
schools = [
SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SchoolsListResponse(
schools=schools,
total=total,
page=page,
page_size=page_size,
)
@router.get("/stats", response_model=SchoolStatsResponse)
async def get_school_stats():
"""Get school statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Total schools and staff
totals = await conn.fetchrow("""
SELECT
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
(SELECT MAX(crawled_at) FROM schools) as last_crawl
""")
# By state
state_rows = await conn.fetch("""
SELECT state, COUNT(*) as count
FROM schools
WHERE is_active = TRUE
GROUP BY state
ORDER BY state
""")
schools_by_state = {row["state"]: row["count"] for row in state_rows}
# By type
type_rows = await conn.fetch("""
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.is_active = TRUE
GROUP BY st.name
ORDER BY count DESC
""")
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
return SchoolStatsResponse(
total_schools=totals["total_schools"],
total_staff=totals["total_staff"],
schools_by_state=schools_by_state,
schools_by_type=schools_by_type,
schools_with_website=totals["with_website"],
schools_with_email=totals["with_email"],
schools_with_principal=totals["with_principal"],
total_students=totals["total_students"],
total_teachers=totals["total_teachers"],
last_crawl_time=totals["last_crawl"],
)
@router.get("/{school_id}", response_model=SchoolResponse)
async def get_school(school_id: str):
"""Get a single school by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.id = $1
""", school_id)
if not row:
raise HTTPException(status_code=404, detail="School not found")
return SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/bulk-import", response_model=BulkImportResponse)
async def bulk_import_schools(request: BulkImportRequest):
"""Bulk import schools. Updates existing schools based on school_number + state."""
pool = await get_db_pool()
imported = 0
updated = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Get school type mapping
type_rows = await conn.fetch("SELECT id, name FROM school_types")
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
for school in request.schools:
try:
# Find school type ID
school_type_id = None
if school.school_type_raw:
school_type_id = type_map.get(school.school_type_raw.lower())
# Check if school exists (by school_number + state, or by name + city + state)
existing = None
if school.school_number:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
school.school_number, school.state
)
if not existing and school.city:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
school.name, school.city, school.state
)
if existing:
# Update existing school
await conn.execute("""
UPDATE schools SET
name = $2,
school_type_id = COALESCE($3, school_type_id),
school_type_raw = COALESCE($4, school_type_raw),
district = COALESCE($5, district),
city = COALESCE($6, city),
postal_code = COALESCE($7, postal_code),
street = COALESCE($8, street),
address_full = COALESCE($9, address_full),
latitude = COALESCE($10, latitude),
longitude = COALESCE($11, longitude),
website = COALESCE($12, website),
email = COALESCE($13, email),
phone = COALESCE($14, phone),
fax = COALESCE($15, fax),
principal_name = COALESCE($16, principal_name),
principal_title = COALESCE($17, principal_title),
principal_email = COALESCE($18, principal_email),
principal_phone = COALESCE($19, principal_phone),
student_count = COALESCE($20, student_count),
teacher_count = COALESCE($21, teacher_count),
is_public = $22,
source = COALESCE($23, source),
source_url = COALESCE($24, source_url),
updated_at = NOW()
WHERE id = $1
""",
existing["id"],
school.name,
school_type_id,
school.school_type_raw,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
updated += 1
else:
# Insert new school
await conn.execute("""
INSERT INTO schools (
name, school_number, school_type_id, school_type_raw,
state, district, city, postal_code, street, address_full,
latitude, longitude, website, email, phone, fax,
principal_name, principal_title, principal_email, principal_phone,
student_count, teacher_count, is_public,
source, source_url, crawled_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, NOW()
)
""",
school.name,
school.school_number,
school_type_id,
school.school_type_raw,
school.state,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
imported += 1
except Exception as e:
errors.append(f"Error importing {school.name}: {str(e)}")
if len(errors) > 100:
errors.append("... (more errors truncated)")
break
return BulkImportResponse(
imported=imported,
updated=updated,
skipped=skipped,
errors=errors[:100],
)
# =============================================================================
# School Staff Endpoints
# =============================================================================
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
async def get_school_staff(school_id: str):
"""Get staff members for a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE ss.school_id = $1 AND ss.is_active = TRUE
ORDER BY
CASE ss.position_type
WHEN 'principal' THEN 1
WHEN 'vice_principal' THEN 2
WHEN 'secretary' THEN 3
ELSE 4
END,
ss.last_name
""", school_id)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=len(staff),
)
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
"""Add a staff member to a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Verify school exists
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
if not school:
raise HTTPException(status_code=404, detail="School not found")
# Create full name
full_name = staff.full_name
if not full_name:
parts = []
if staff.title:
parts.append(staff.title)
if staff.first_name:
parts.append(staff.first_name)
parts.append(staff.last_name)
full_name = " ".join(parts)
row = await conn.fetchrow("""
INSERT INTO school_staff (
school_id, first_name, last_name, full_name, title,
position, position_type, subjects, email, phone
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING id, created_at
""",
school_id,
staff.first_name,
staff.last_name,
full_name,
staff.title,
staff.position,
staff.position_type,
staff.subjects,
staff.email,
staff.phone,
)
return SchoolStaffResponse(
id=str(row["id"]),
school_id=school_id,
school_name=school["name"],
first_name=staff.first_name,
last_name=staff.last_name,
full_name=full_name,
title=staff.title,
position=staff.position,
position_type=staff.position_type,
subjects=staff.subjects,
email=staff.email,
phone=staff.phone,
is_active=True,
created_at=row["created_at"],
)
# =============================================================================
# Search Endpoints
# =============================================================================
@router.get("/search/staff", response_model=SchoolStaffListResponse)
async def search_school_staff(
q: Optional[str] = Query(None, description="Search query"),
state: Optional[str] = Query(None, description="Filter by state"),
position_type: Optional[str] = Query(None, description="Filter by position type"),
has_email: Optional[bool] = Query(None, description="Only staff with email"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""Search school staff across all schools."""
pool = await get_db_pool()
async with pool.acquire() as conn:
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
params = []
param_idx = 1
if q:
conditions.append(f"""
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
OR LOWER(s.name) LIKE LOWER(${param_idx}))
""")
params.append(f"%{q}%")
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if position_type:
conditions.append(f"ss.position_type = ${param_idx}")
params.append(position_type)
param_idx += 1
if has_email is not None and has_email:
conditions.append("ss.email IS NOT NULL")
where_clause = " AND ".join(conditions)
# Count total
total = await conn.fetchval(f"""
SELECT COUNT(*) FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
""", *params)
# Fetch staff
offset = (page - 1) * page_size
rows = await conn.fetch(f"""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
ORDER BY ss.last_name, ss.first_name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
""", *params, page_size, offset)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=total,
)
router.include_router(_crud_router)
router.include_router(_staff_router)
# Re-export models for any external consumers
from .schools_models import ( # noqa: E402, F401
SchoolTypeResponse,
SchoolBase,
SchoolCreate,
SchoolUpdate,
SchoolResponse,
SchoolsListResponse,
SchoolStaffBase,
SchoolStaffCreate,
SchoolStaffResponse,
SchoolStaffListResponse,
SchoolStatsResponse,
BulkImportRequest,
BulkImportResponse,
)
from .schools_db import get_db_pool # noqa: E402, F401

View File

@@ -0,0 +1,464 @@
"""
Schools API - School CRUD & Stats Routes.
List, get, stats, and bulk-import endpoints for schools.
"""
import logging
from typing import Optional
from fastapi import APIRouter, HTTPException, Query
from .schools_db import get_db_pool
from .schools_models import (
SchoolResponse,
SchoolsListResponse,
SchoolStatsResponse,
SchoolTypeResponse,
BulkImportRequest,
BulkImportResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["schools"])
# =============================================================================
# School Type Endpoints
# =============================================================================
@router.get("/types", response_model=list[SchoolTypeResponse])
async def list_school_types():
"""List all school types."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, name_short, category, description
FROM school_types
ORDER BY category, name
""")
return [
SchoolTypeResponse(
id=str(row["id"]),
name=row["name"],
name_short=row["name_short"],
category=row["category"],
description=row["description"],
)
for row in rows
]
# =============================================================================
# School Endpoints
# =============================================================================
@router.get("", response_model=SchoolsListResponse)
async def list_schools(
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
school_type: Optional[str] = Query(None, description="Filter by school type name"),
city: Optional[str] = Query(None, description="Filter by city"),
district: Optional[str] = Query(None, description="Filter by district"),
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
search: Optional[str] = Query(None, description="Search in name, city"),
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List schools with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = ["s.is_active = TRUE"]
params = []
param_idx = 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if school_type:
conditions.append(f"st.name = ${param_idx}")
params.append(school_type)
param_idx += 1
if city:
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
params.append(city)
param_idx += 1
if district:
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
params.append(f"%{district}%")
param_idx += 1
if postal_code:
conditions.append(f"s.postal_code LIKE ${param_idx}")
params.append(f"{postal_code}%")
param_idx += 1
if search:
conditions.append(f"""
(LOWER(s.name) LIKE LOWER(${param_idx})
OR LOWER(s.city) LIKE LOWER(${param_idx})
OR LOWER(s.district) LIKE LOWER(${param_idx}))
""")
params.append(f"%{search}%")
param_idx += 1
if has_email is not None:
if has_email:
conditions.append("s.email IS NOT NULL")
else:
conditions.append("s.email IS NULL")
if has_website is not None:
if has_website:
conditions.append("s.website IS NOT NULL")
else:
conditions.append("s.website IS NULL")
if is_public is not None:
conditions.append(f"s.is_public = ${param_idx}")
params.append(is_public)
param_idx += 1
where_clause = " AND ".join(conditions)
# Count total
count_query = f"""
SELECT COUNT(*) FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Fetch schools
offset = (page - 1) * page_size
query = f"""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
ORDER BY s.state, s.city, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
params.extend([page_size, offset])
rows = await conn.fetch(query, *params)
schools = [
SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SchoolsListResponse(
schools=schools,
total=total,
page=page,
page_size=page_size,
)
@router.get("/stats", response_model=SchoolStatsResponse)
async def get_school_stats():
"""Get school statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Total schools and staff
totals = await conn.fetchrow("""
SELECT
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
(SELECT MAX(crawled_at) FROM schools) as last_crawl
""")
# By state
state_rows = await conn.fetch("""
SELECT state, COUNT(*) as count
FROM schools
WHERE is_active = TRUE
GROUP BY state
ORDER BY state
""")
schools_by_state = {row["state"]: row["count"] for row in state_rows}
# By type
type_rows = await conn.fetch("""
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.is_active = TRUE
GROUP BY st.name
ORDER BY count DESC
""")
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
return SchoolStatsResponse(
total_schools=totals["total_schools"],
total_staff=totals["total_staff"],
schools_by_state=schools_by_state,
schools_by_type=schools_by_type,
schools_with_website=totals["with_website"],
schools_with_email=totals["with_email"],
schools_with_principal=totals["with_principal"],
total_students=totals["total_students"],
total_teachers=totals["total_teachers"],
last_crawl_time=totals["last_crawl"],
)
@router.get("/{school_id}", response_model=SchoolResponse)
async def get_school(school_id: str):
"""Get a single school by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.id = $1
""", school_id)
if not row:
raise HTTPException(status_code=404, detail="School not found")
return SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/bulk-import", response_model=BulkImportResponse)
async def bulk_import_schools(request: BulkImportRequest):
"""Bulk import schools. Updates existing schools based on school_number + state."""
pool = await get_db_pool()
imported = 0
updated = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Get school type mapping
type_rows = await conn.fetch("SELECT id, name FROM school_types")
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
for school in request.schools:
try:
# Find school type ID
school_type_id = None
if school.school_type_raw:
school_type_id = type_map.get(school.school_type_raw.lower())
# Check if school exists (by school_number + state, or by name + city + state)
existing = None
if school.school_number:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
school.school_number, school.state
)
if not existing and school.city:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
school.name, school.city, school.state
)
if existing:
# Update existing school
await conn.execute("""
UPDATE schools SET
name = $2,
school_type_id = COALESCE($3, school_type_id),
school_type_raw = COALESCE($4, school_type_raw),
district = COALESCE($5, district),
city = COALESCE($6, city),
postal_code = COALESCE($7, postal_code),
street = COALESCE($8, street),
address_full = COALESCE($9, address_full),
latitude = COALESCE($10, latitude),
longitude = COALESCE($11, longitude),
website = COALESCE($12, website),
email = COALESCE($13, email),
phone = COALESCE($14, phone),
fax = COALESCE($15, fax),
principal_name = COALESCE($16, principal_name),
principal_title = COALESCE($17, principal_title),
principal_email = COALESCE($18, principal_email),
principal_phone = COALESCE($19, principal_phone),
student_count = COALESCE($20, student_count),
teacher_count = COALESCE($21, teacher_count),
is_public = $22,
source = COALESCE($23, source),
source_url = COALESCE($24, source_url),
updated_at = NOW()
WHERE id = $1
""",
existing["id"],
school.name,
school_type_id,
school.school_type_raw,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
updated += 1
else:
# Insert new school
await conn.execute("""
INSERT INTO schools (
name, school_number, school_type_id, school_type_raw,
state, district, city, postal_code, street, address_full,
latitude, longitude, website, email, phone, fax,
principal_name, principal_title, principal_email, principal_phone,
student_count, teacher_count, is_public,
source, source_url, crawled_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, NOW()
)
""",
school.name,
school.school_number,
school_type_id,
school.school_type_raw,
school.state,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
imported += 1
except Exception as e:
errors.append(f"Error importing {school.name}: {str(e)}")
if len(errors) > 100:
errors.append("... (more errors truncated)")
break
return BulkImportResponse(
imported=imported,
updated=updated,
skipped=skipped,
errors=errors[:100],
)

View File

@@ -0,0 +1,25 @@
"""
Schools API - Database Connection.
Shared database pool for school endpoints.
"""
import os
from typing import Optional
import asyncpg
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
)
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool

View File

@@ -0,0 +1,200 @@
"""
Schools API - Pydantic Models.
Data models for school and school staff endpoints.
"""
from typing import Optional, List
from datetime import datetime
from pydantic import BaseModel, Field
# =============================================================================
# School Type Models
# =============================================================================
class SchoolTypeResponse(BaseModel):
"""School type response model."""
id: str
name: str
name_short: Optional[str] = None
category: Optional[str] = None
description: Optional[str] = None
# =============================================================================
# School Models
# =============================================================================
class SchoolBase(BaseModel):
"""Base school model for creation/update."""
name: str = Field(..., max_length=255)
school_number: Optional[str] = Field(None, max_length=20)
school_type_id: Optional[str] = None
school_type_raw: Optional[str] = None
state: str = Field(..., max_length=10)
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_title: Optional[str] = None
principal_email: Optional[str] = None
principal_phone: Optional[str] = None
secretary_name: Optional[str] = None
secretary_email: Optional[str] = None
secretary_phone: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
class_count: Optional[int] = None
founded_year: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
has_inclusion: Optional[bool] = None
languages: Optional[List[str]] = None
specializations: Optional[List[str]] = None
source: Optional[str] = None
source_url: Optional[str] = None
class SchoolCreate(SchoolBase):
"""School creation model."""
pass
class SchoolUpdate(BaseModel):
"""School update model (all fields optional)."""
name: Optional[str] = Field(None, max_length=255)
school_number: Optional[str] = None
school_type_id: Optional[str] = None
state: Optional[str] = None
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
principal_name: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_active: Optional[bool] = None
class SchoolResponse(BaseModel):
"""School response model."""
id: str
name: str
school_number: Optional[str] = None
school_type: Optional[str] = None
school_type_short: Optional[str] = None
school_category: Optional[str] = None
state: str
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_email: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
staff_count: int = 0
source: Optional[str] = None
crawled_at: Optional[datetime] = None
is_active: bool = True
created_at: datetime
updated_at: datetime
class SchoolsListResponse(BaseModel):
"""List response with pagination info."""
schools: List[SchoolResponse]
total: int
page: int
page_size: int
class SchoolStatsResponse(BaseModel):
"""School statistics response."""
total_schools: int
total_staff: int
schools_by_state: dict
schools_by_type: dict
schools_with_website: int
schools_with_email: int
schools_with_principal: int
total_students: int
total_teachers: int
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
schools: List[SchoolCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
updated: int
skipped: int
errors: List[str]
# =============================================================================
# School Staff Models
# =============================================================================
class SchoolStaffBase(BaseModel):
"""Base school staff model."""
first_name: Optional[str] = None
last_name: str
full_name: Optional[str] = None
title: Optional[str] = None
position: Optional[str] = None
position_type: Optional[str] = None
subjects: Optional[List[str]] = None
email: Optional[str] = None
phone: Optional[str] = None
class SchoolStaffCreate(SchoolStaffBase):
"""School staff creation model."""
school_id: str
class SchoolStaffResponse(SchoolStaffBase):
"""School staff response model."""
id: str
school_id: str
school_name: Optional[str] = None
profile_url: Optional[str] = None
photo_url: Optional[str] = None
is_active: bool = True
created_at: datetime
class SchoolStaffListResponse(BaseModel):
"""Staff list response."""
staff: List[SchoolStaffResponse]
total: int

View File

@@ -0,0 +1,233 @@
"""
Schools API - Staff Routes.
CRUD and search endpoints for school staff members.
"""
import logging
from typing import Optional
from fastapi import APIRouter, HTTPException, Query
from .schools_db import get_db_pool
from .schools_models import (
SchoolStaffBase,
SchoolStaffResponse,
SchoolStaffListResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["schools"])
# =============================================================================
# School Staff Endpoints
# =============================================================================
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
async def get_school_staff(school_id: str):
"""Get staff members for a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE ss.school_id = $1 AND ss.is_active = TRUE
ORDER BY
CASE ss.position_type
WHEN 'principal' THEN 1
WHEN 'vice_principal' THEN 2
WHEN 'secretary' THEN 3
ELSE 4
END,
ss.last_name
""", school_id)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=len(staff),
)
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
"""Add a staff member to a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Verify school exists
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
if not school:
raise HTTPException(status_code=404, detail="School not found")
# Create full name
full_name = staff.full_name
if not full_name:
parts = []
if staff.title:
parts.append(staff.title)
if staff.first_name:
parts.append(staff.first_name)
parts.append(staff.last_name)
full_name = " ".join(parts)
row = await conn.fetchrow("""
INSERT INTO school_staff (
school_id, first_name, last_name, full_name, title,
position, position_type, subjects, email, phone
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING id, created_at
""",
school_id,
staff.first_name,
staff.last_name,
full_name,
staff.title,
staff.position,
staff.position_type,
staff.subjects,
staff.email,
staff.phone,
)
return SchoolStaffResponse(
id=str(row["id"]),
school_id=school_id,
school_name=school["name"],
first_name=staff.first_name,
last_name=staff.last_name,
full_name=full_name,
title=staff.title,
position=staff.position,
position_type=staff.position_type,
subjects=staff.subjects,
email=staff.email,
phone=staff.phone,
is_active=True,
created_at=row["created_at"],
)
# =============================================================================
# Search Endpoints
# =============================================================================
@router.get("/search/staff", response_model=SchoolStaffListResponse)
async def search_school_staff(
q: Optional[str] = Query(None, description="Search query"),
state: Optional[str] = Query(None, description="Filter by state"),
position_type: Optional[str] = Query(None, description="Filter by position type"),
has_email: Optional[bool] = Query(None, description="Only staff with email"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""Search school staff across all schools."""
pool = await get_db_pool()
async with pool.acquire() as conn:
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
params = []
param_idx = 1
if q:
conditions.append(f"""
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
OR LOWER(s.name) LIKE LOWER(${param_idx}))
""")
params.append(f"%{q}%")
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if position_type:
conditions.append(f"ss.position_type = ${param_idx}")
params.append(position_type)
param_idx += 1
if has_email is not None and has_email:
conditions.append("ss.email IS NOT NULL")
where_clause = " AND ".join(conditions)
# Count total
total = await conn.fetchval(f"""
SELECT COUNT(*) FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
""", *params)
# Fetch staff
offset = (page - 1) * page_size
rows = await conn.fetch(f"""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
ORDER BY ss.last_name, ss.first_name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
""", *params, page_size, offset)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=total,
)