""" Schools API Routes. CRUD operations for managing German schools (~40,000 schools). Direct database access to PostgreSQL. """ import os import logging from typing import Optional, List from datetime import datetime from uuid import UUID from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel, Field import asyncpg logger = logging.getLogger(__name__) router = APIRouter(prefix="/schools", tags=["schools"]) # Database connection pool _pool: Optional[asyncpg.Pool] = None async def get_db_pool() -> asyncpg.Pool: """Get or create database connection pool.""" global _pool if _pool is None: database_url = os.environ.get( "DATABASE_URL", "postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db" ) _pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10) return _pool # ============================================================================= # Pydantic Models # ============================================================================= class SchoolTypeResponse(BaseModel): """School type response model.""" id: str name: str name_short: Optional[str] = None category: Optional[str] = None description: Optional[str] = None class SchoolBase(BaseModel): """Base school model for creation/update.""" name: str = Field(..., max_length=255) school_number: Optional[str] = Field(None, max_length=20) school_type_id: Optional[str] = None school_type_raw: Optional[str] = None state: str = Field(..., max_length=10) district: Optional[str] = None city: Optional[str] = None postal_code: Optional[str] = None street: Optional[str] = None address_full: Optional[str] = None latitude: Optional[float] = None longitude: Optional[float] = None website: Optional[str] = None email: Optional[str] = None phone: Optional[str] = None fax: Optional[str] = None principal_name: Optional[str] = None principal_title: Optional[str] = None principal_email: Optional[str] = None principal_phone: Optional[str] = None secretary_name: Optional[str] = None secretary_email: Optional[str] = None secretary_phone: Optional[str] = None student_count: Optional[int] = None teacher_count: Optional[int] = None class_count: Optional[int] = None founded_year: Optional[int] = None is_public: bool = True is_all_day: Optional[bool] = None has_inclusion: Optional[bool] = None languages: Optional[List[str]] = None specializations: Optional[List[str]] = None source: Optional[str] = None source_url: Optional[str] = None class SchoolCreate(SchoolBase): """School creation model.""" pass class SchoolUpdate(BaseModel): """School update model (all fields optional).""" name: Optional[str] = Field(None, max_length=255) school_number: Optional[str] = None school_type_id: Optional[str] = None state: Optional[str] = None district: Optional[str] = None city: Optional[str] = None postal_code: Optional[str] = None street: Optional[str] = None website: Optional[str] = None email: Optional[str] = None phone: Optional[str] = None principal_name: Optional[str] = None student_count: Optional[int] = None teacher_count: Optional[int] = None is_active: Optional[bool] = None class SchoolResponse(BaseModel): """School response model.""" id: str name: str school_number: Optional[str] = None school_type: Optional[str] = None school_type_short: Optional[str] = None school_category: Optional[str] = None state: str district: Optional[str] = None city: Optional[str] = None postal_code: Optional[str] = None street: Optional[str] = None address_full: Optional[str] = None latitude: Optional[float] = None longitude: Optional[float] = None website: Optional[str] = None email: Optional[str] = None phone: Optional[str] = None fax: Optional[str] = None principal_name: Optional[str] = None principal_email: Optional[str] = None student_count: Optional[int] = None teacher_count: Optional[int] = None is_public: bool = True is_all_day: Optional[bool] = None staff_count: int = 0 source: Optional[str] = None crawled_at: Optional[datetime] = None is_active: bool = True created_at: datetime updated_at: datetime class SchoolsListResponse(BaseModel): """List response with pagination info.""" schools: List[SchoolResponse] total: int page: int page_size: int class SchoolStaffBase(BaseModel): """Base school staff model.""" first_name: Optional[str] = None last_name: str full_name: Optional[str] = None title: Optional[str] = None position: Optional[str] = None position_type: Optional[str] = None subjects: Optional[List[str]] = None email: Optional[str] = None phone: Optional[str] = None class SchoolStaffCreate(SchoolStaffBase): """School staff creation model.""" school_id: str class SchoolStaffResponse(SchoolStaffBase): """School staff response model.""" id: str school_id: str school_name: Optional[str] = None profile_url: Optional[str] = None photo_url: Optional[str] = None is_active: bool = True created_at: datetime class SchoolStaffListResponse(BaseModel): """Staff list response.""" staff: List[SchoolStaffResponse] total: int class SchoolStatsResponse(BaseModel): """School statistics response.""" total_schools: int total_staff: int schools_by_state: dict schools_by_type: dict schools_with_website: int schools_with_email: int schools_with_principal: int total_students: int total_teachers: int last_crawl_time: Optional[datetime] = None class BulkImportRequest(BaseModel): """Bulk import request.""" schools: List[SchoolCreate] class BulkImportResponse(BaseModel): """Bulk import response.""" imported: int updated: int skipped: int errors: List[str] # ============================================================================= # School Type Endpoints # ============================================================================= @router.get("/types", response_model=List[SchoolTypeResponse]) async def list_school_types(): """List all school types.""" pool = await get_db_pool() async with pool.acquire() as conn: rows = await conn.fetch(""" SELECT id, name, name_short, category, description FROM school_types ORDER BY category, name """) return [ SchoolTypeResponse( id=str(row["id"]), name=row["name"], name_short=row["name_short"], category=row["category"], description=row["description"], ) for row in rows ] # ============================================================================= # School Endpoints # ============================================================================= @router.get("", response_model=SchoolsListResponse) async def list_schools( state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"), school_type: Optional[str] = Query(None, description="Filter by school type name"), city: Optional[str] = Query(None, description="Filter by city"), district: Optional[str] = Query(None, description="Filter by district"), postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"), search: Optional[str] = Query(None, description="Search in name, city"), has_email: Optional[bool] = Query(None, description="Filter schools with email"), has_website: Optional[bool] = Query(None, description="Filter schools with website"), is_public: Optional[bool] = Query(None, description="Filter public/private schools"), page: int = Query(1, ge=1), page_size: int = Query(50, ge=1, le=200), ): """List schools with optional filtering and pagination.""" pool = await get_db_pool() async with pool.acquire() as conn: # Build WHERE clause conditions = ["s.is_active = TRUE"] params = [] param_idx = 1 if state: conditions.append(f"s.state = ${param_idx}") params.append(state.upper()) param_idx += 1 if school_type: conditions.append(f"st.name = ${param_idx}") params.append(school_type) param_idx += 1 if city: conditions.append(f"LOWER(s.city) = LOWER(${param_idx})") params.append(city) param_idx += 1 if district: conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})") params.append(f"%{district}%") param_idx += 1 if postal_code: conditions.append(f"s.postal_code LIKE ${param_idx}") params.append(f"{postal_code}%") param_idx += 1 if search: conditions.append(f""" (LOWER(s.name) LIKE LOWER(${param_idx}) OR LOWER(s.city) LIKE LOWER(${param_idx}) OR LOWER(s.district) LIKE LOWER(${param_idx})) """) params.append(f"%{search}%") param_idx += 1 if has_email is not None: if has_email: conditions.append("s.email IS NOT NULL") else: conditions.append("s.email IS NULL") if has_website is not None: if has_website: conditions.append("s.website IS NOT NULL") else: conditions.append("s.website IS NULL") if is_public is not None: conditions.append(f"s.is_public = ${param_idx}") params.append(is_public) param_idx += 1 where_clause = " AND ".join(conditions) # Count total count_query = f""" SELECT COUNT(*) FROM schools s LEFT JOIN school_types st ON s.school_type_id = st.id WHERE {where_clause} """ total = await conn.fetchval(count_query, *params) # Fetch schools offset = (page - 1) * page_size query = f""" SELECT s.id, s.name, s.school_number, s.state, s.district, s.city, s.postal_code, s.street, s.address_full, s.latitude, s.longitude, s.website, s.email, s.phone, s.fax, s.principal_name, s.principal_email, s.student_count, s.teacher_count, s.is_public, s.is_all_day, s.source, s.crawled_at, s.is_active, s.created_at, s.updated_at, st.name as school_type, st.name_short as school_type_short, st.category as school_category, (SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count FROM schools s LEFT JOIN school_types st ON s.school_type_id = st.id WHERE {where_clause} ORDER BY s.state, s.city, s.name LIMIT ${param_idx} OFFSET ${param_idx + 1} """ params.extend([page_size, offset]) rows = await conn.fetch(query, *params) schools = [ SchoolResponse( id=str(row["id"]), name=row["name"], school_number=row["school_number"], school_type=row["school_type"], school_type_short=row["school_type_short"], school_category=row["school_category"], state=row["state"], district=row["district"], city=row["city"], postal_code=row["postal_code"], street=row["street"], address_full=row["address_full"], latitude=row["latitude"], longitude=row["longitude"], website=row["website"], email=row["email"], phone=row["phone"], fax=row["fax"], principal_name=row["principal_name"], principal_email=row["principal_email"], student_count=row["student_count"], teacher_count=row["teacher_count"], is_public=row["is_public"], is_all_day=row["is_all_day"], staff_count=row["staff_count"], source=row["source"], crawled_at=row["crawled_at"], is_active=row["is_active"], created_at=row["created_at"], updated_at=row["updated_at"], ) for row in rows ] return SchoolsListResponse( schools=schools, total=total, page=page, page_size=page_size, ) @router.get("/stats", response_model=SchoolStatsResponse) async def get_school_stats(): """Get school statistics.""" pool = await get_db_pool() async with pool.acquire() as conn: # Total schools and staff totals = await conn.fetchrow(""" SELECT (SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools, (SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff, (SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website, (SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email, (SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal, (SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students, (SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers, (SELECT MAX(crawled_at) FROM schools) as last_crawl """) # By state state_rows = await conn.fetch(""" SELECT state, COUNT(*) as count FROM schools WHERE is_active = TRUE GROUP BY state ORDER BY state """) schools_by_state = {row["state"]: row["count"] for row in state_rows} # By type type_rows = await conn.fetch(""" SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count FROM schools s LEFT JOIN school_types st ON s.school_type_id = st.id WHERE s.is_active = TRUE GROUP BY st.name ORDER BY count DESC """) schools_by_type = {row["type_name"]: row["count"] for row in type_rows} return SchoolStatsResponse( total_schools=totals["total_schools"], total_staff=totals["total_staff"], schools_by_state=schools_by_state, schools_by_type=schools_by_type, schools_with_website=totals["with_website"], schools_with_email=totals["with_email"], schools_with_principal=totals["with_principal"], total_students=totals["total_students"], total_teachers=totals["total_teachers"], last_crawl_time=totals["last_crawl"], ) @router.get("/{school_id}", response_model=SchoolResponse) async def get_school(school_id: str): """Get a single school by ID.""" pool = await get_db_pool() async with pool.acquire() as conn: row = await conn.fetchrow(""" SELECT s.id, s.name, s.school_number, s.state, s.district, s.city, s.postal_code, s.street, s.address_full, s.latitude, s.longitude, s.website, s.email, s.phone, s.fax, s.principal_name, s.principal_email, s.student_count, s.teacher_count, s.is_public, s.is_all_day, s.source, s.crawled_at, s.is_active, s.created_at, s.updated_at, st.name as school_type, st.name_short as school_type_short, st.category as school_category, (SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count FROM schools s LEFT JOIN school_types st ON s.school_type_id = st.id WHERE s.id = $1 """, school_id) if not row: raise HTTPException(status_code=404, detail="School not found") return SchoolResponse( id=str(row["id"]), name=row["name"], school_number=row["school_number"], school_type=row["school_type"], school_type_short=row["school_type_short"], school_category=row["school_category"], state=row["state"], district=row["district"], city=row["city"], postal_code=row["postal_code"], street=row["street"], address_full=row["address_full"], latitude=row["latitude"], longitude=row["longitude"], website=row["website"], email=row["email"], phone=row["phone"], fax=row["fax"], principal_name=row["principal_name"], principal_email=row["principal_email"], student_count=row["student_count"], teacher_count=row["teacher_count"], is_public=row["is_public"], is_all_day=row["is_all_day"], staff_count=row["staff_count"], source=row["source"], crawled_at=row["crawled_at"], is_active=row["is_active"], created_at=row["created_at"], updated_at=row["updated_at"], ) @router.post("/bulk-import", response_model=BulkImportResponse) async def bulk_import_schools(request: BulkImportRequest): """Bulk import schools. Updates existing schools based on school_number + state.""" pool = await get_db_pool() imported = 0 updated = 0 skipped = 0 errors = [] async with pool.acquire() as conn: # Get school type mapping type_rows = await conn.fetch("SELECT id, name FROM school_types") type_map = {row["name"].lower(): str(row["id"]) for row in type_rows} for school in request.schools: try: # Find school type ID school_type_id = None if school.school_type_raw: school_type_id = type_map.get(school.school_type_raw.lower()) # Check if school exists (by school_number + state, or by name + city + state) existing = None if school.school_number: existing = await conn.fetchrow( "SELECT id FROM schools WHERE school_number = $1 AND state = $2", school.school_number, school.state ) if not existing and school.city: existing = await conn.fetchrow( "SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3", school.name, school.city, school.state ) if existing: # Update existing school await conn.execute(""" UPDATE schools SET name = $2, school_type_id = COALESCE($3, school_type_id), school_type_raw = COALESCE($4, school_type_raw), district = COALESCE($5, district), city = COALESCE($6, city), postal_code = COALESCE($7, postal_code), street = COALESCE($8, street), address_full = COALESCE($9, address_full), latitude = COALESCE($10, latitude), longitude = COALESCE($11, longitude), website = COALESCE($12, website), email = COALESCE($13, email), phone = COALESCE($14, phone), fax = COALESCE($15, fax), principal_name = COALESCE($16, principal_name), principal_title = COALESCE($17, principal_title), principal_email = COALESCE($18, principal_email), principal_phone = COALESCE($19, principal_phone), student_count = COALESCE($20, student_count), teacher_count = COALESCE($21, teacher_count), is_public = $22, source = COALESCE($23, source), source_url = COALESCE($24, source_url), updated_at = NOW() WHERE id = $1 """, existing["id"], school.name, school_type_id, school.school_type_raw, school.district, school.city, school.postal_code, school.street, school.address_full, school.latitude, school.longitude, school.website, school.email, school.phone, school.fax, school.principal_name, school.principal_title, school.principal_email, school.principal_phone, school.student_count, school.teacher_count, school.is_public, school.source, school.source_url, ) updated += 1 else: # Insert new school await conn.execute(""" INSERT INTO schools ( name, school_number, school_type_id, school_type_raw, state, district, city, postal_code, street, address_full, latitude, longitude, website, email, phone, fax, principal_name, principal_title, principal_email, principal_phone, student_count, teacher_count, is_public, source, source_url, crawled_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, NOW() ) """, school.name, school.school_number, school_type_id, school.school_type_raw, school.state, school.district, school.city, school.postal_code, school.street, school.address_full, school.latitude, school.longitude, school.website, school.email, school.phone, school.fax, school.principal_name, school.principal_title, school.principal_email, school.principal_phone, school.student_count, school.teacher_count, school.is_public, school.source, school.source_url, ) imported += 1 except Exception as e: errors.append(f"Error importing {school.name}: {str(e)}") if len(errors) > 100: errors.append("... (more errors truncated)") break return BulkImportResponse( imported=imported, updated=updated, skipped=skipped, errors=errors[:100], ) # ============================================================================= # School Staff Endpoints # ============================================================================= @router.get("/{school_id}/staff", response_model=SchoolStaffListResponse) async def get_school_staff(school_id: str): """Get staff members for a school.""" pool = await get_db_pool() async with pool.acquire() as conn: rows = await conn.fetch(""" SELECT ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name, ss.title, ss.position, ss.position_type, ss.subjects, ss.email, ss.phone, ss.profile_url, ss.photo_url, ss.is_active, ss.created_at, s.name as school_name FROM school_staff ss JOIN schools s ON ss.school_id = s.id WHERE ss.school_id = $1 AND ss.is_active = TRUE ORDER BY CASE ss.position_type WHEN 'principal' THEN 1 WHEN 'vice_principal' THEN 2 WHEN 'secretary' THEN 3 ELSE 4 END, ss.last_name """, school_id) staff = [ SchoolStaffResponse( id=str(row["id"]), school_id=str(row["school_id"]), school_name=row["school_name"], first_name=row["first_name"], last_name=row["last_name"], full_name=row["full_name"], title=row["title"], position=row["position"], position_type=row["position_type"], subjects=row["subjects"], email=row["email"], phone=row["phone"], profile_url=row["profile_url"], photo_url=row["photo_url"], is_active=row["is_active"], created_at=row["created_at"], ) for row in rows ] return SchoolStaffListResponse( staff=staff, total=len(staff), ) @router.post("/{school_id}/staff", response_model=SchoolStaffResponse) async def create_school_staff(school_id: str, staff: SchoolStaffBase): """Add a staff member to a school.""" pool = await get_db_pool() async with pool.acquire() as conn: # Verify school exists school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id) if not school: raise HTTPException(status_code=404, detail="School not found") # Create full name full_name = staff.full_name if not full_name: parts = [] if staff.title: parts.append(staff.title) if staff.first_name: parts.append(staff.first_name) parts.append(staff.last_name) full_name = " ".join(parts) row = await conn.fetchrow(""" INSERT INTO school_staff ( school_id, first_name, last_name, full_name, title, position, position_type, subjects, email, phone ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) RETURNING id, created_at """, school_id, staff.first_name, staff.last_name, full_name, staff.title, staff.position, staff.position_type, staff.subjects, staff.email, staff.phone, ) return SchoolStaffResponse( id=str(row["id"]), school_id=school_id, school_name=school["name"], first_name=staff.first_name, last_name=staff.last_name, full_name=full_name, title=staff.title, position=staff.position, position_type=staff.position_type, subjects=staff.subjects, email=staff.email, phone=staff.phone, is_active=True, created_at=row["created_at"], ) # ============================================================================= # Search Endpoints # ============================================================================= @router.get("/search/staff", response_model=SchoolStaffListResponse) async def search_school_staff( q: Optional[str] = Query(None, description="Search query"), state: Optional[str] = Query(None, description="Filter by state"), position_type: Optional[str] = Query(None, description="Filter by position type"), has_email: Optional[bool] = Query(None, description="Only staff with email"), page: int = Query(1, ge=1), page_size: int = Query(50, ge=1, le=200), ): """Search school staff across all schools.""" pool = await get_db_pool() async with pool.acquire() as conn: conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"] params = [] param_idx = 1 if q: conditions.append(f""" (LOWER(ss.full_name) LIKE LOWER(${param_idx}) OR LOWER(ss.last_name) LIKE LOWER(${param_idx}) OR LOWER(s.name) LIKE LOWER(${param_idx})) """) params.append(f"%{q}%") param_idx += 1 if state: conditions.append(f"s.state = ${param_idx}") params.append(state.upper()) param_idx += 1 if position_type: conditions.append(f"ss.position_type = ${param_idx}") params.append(position_type) param_idx += 1 if has_email is not None and has_email: conditions.append("ss.email IS NOT NULL") where_clause = " AND ".join(conditions) # Count total total = await conn.fetchval(f""" SELECT COUNT(*) FROM school_staff ss JOIN schools s ON ss.school_id = s.id WHERE {where_clause} """, *params) # Fetch staff offset = (page - 1) * page_size rows = await conn.fetch(f""" SELECT ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name, ss.title, ss.position, ss.position_type, ss.subjects, ss.email, ss.phone, ss.profile_url, ss.photo_url, ss.is_active, ss.created_at, s.name as school_name FROM school_staff ss JOIN schools s ON ss.school_id = s.id WHERE {where_clause} ORDER BY ss.last_name, ss.first_name LIMIT ${param_idx} OFFSET ${param_idx + 1} """, *params, page_size, offset) staff = [ SchoolStaffResponse( id=str(row["id"]), school_id=str(row["school_id"]), school_name=row["school_name"], first_name=row["first_name"], last_name=row["last_name"], full_name=row["full_name"], title=row["title"], position=row["position"], position_type=row["position_type"], subjects=row["subjects"], email=row["email"], phone=row["phone"], profile_url=row["profile_url"], photo_url=row["photo_url"], is_active=row["is_active"], created_at=row["created_at"], ) for row in rows ] return SchoolStaffListResponse( staff=staff, total=total, )