backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
138 lines
3.9 KiB
Python
138 lines
3.9 KiB
Python
"""
|
|
EduSearch Seeds Pydantic Models.
|
|
|
|
Request/Response models for the education search seed URL API.
|
|
"""
|
|
|
|
from typing import Optional, List
|
|
from datetime import datetime
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class CategoryResponse(BaseModel):
|
|
"""Category response model."""
|
|
id: str
|
|
name: str
|
|
display_name: str
|
|
description: Optional[str] = None
|
|
icon: Optional[str] = None
|
|
sort_order: int
|
|
is_active: bool
|
|
|
|
|
|
class SeedBase(BaseModel):
|
|
"""Base seed model for creation/update."""
|
|
url: str = Field(..., max_length=500)
|
|
name: str = Field(..., max_length=255)
|
|
description: Optional[str] = None
|
|
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
|
|
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
|
|
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
|
|
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
|
|
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
|
|
enabled: bool = True
|
|
crawl_depth: int = Field(2, ge=1, le=5)
|
|
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
|
|
|
|
|
|
class SeedCreate(SeedBase):
|
|
"""Seed creation model."""
|
|
pass
|
|
|
|
|
|
class SeedUpdate(BaseModel):
|
|
"""Seed update model (all fields optional)."""
|
|
url: Optional[str] = Field(None, max_length=500)
|
|
name: Optional[str] = Field(None, max_length=255)
|
|
description: Optional[str] = None
|
|
category_name: Optional[str] = None
|
|
source_type: Optional[str] = None
|
|
scope: Optional[str] = None
|
|
state: Optional[str] = Field(None, max_length=5)
|
|
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
|
|
enabled: Optional[bool] = None
|
|
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
|
|
crawl_frequency: Optional[str] = None
|
|
|
|
|
|
class SeedResponse(BaseModel):
|
|
"""Seed response model."""
|
|
id: str
|
|
url: str
|
|
name: str
|
|
description: Optional[str] = None
|
|
category: Optional[str] = None
|
|
category_display_name: Optional[str] = None
|
|
source_type: str
|
|
scope: str
|
|
state: Optional[str] = None
|
|
trust_boost: float
|
|
enabled: bool
|
|
crawl_depth: int
|
|
crawl_frequency: str
|
|
last_crawled_at: Optional[datetime] = None
|
|
last_crawl_status: Optional[str] = None
|
|
last_crawl_docs: int = 0
|
|
total_documents: int = 0
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
|
|
|
|
class SeedsListResponse(BaseModel):
|
|
"""List response with pagination info."""
|
|
seeds: List[SeedResponse]
|
|
total: int
|
|
page: int
|
|
page_size: int
|
|
|
|
|
|
class StatsResponse(BaseModel):
|
|
"""Crawl statistics response."""
|
|
total_seeds: int
|
|
enabled_seeds: int
|
|
total_documents: int
|
|
seeds_by_category: dict
|
|
seeds_by_state: dict
|
|
last_crawl_time: Optional[datetime] = None
|
|
|
|
|
|
class BulkImportRequest(BaseModel):
|
|
"""Bulk import request."""
|
|
seeds: List[SeedCreate]
|
|
|
|
|
|
class BulkImportResponse(BaseModel):
|
|
"""Bulk import response."""
|
|
imported: int
|
|
skipped: int
|
|
errors: List[str]
|
|
|
|
|
|
class CrawlStatusUpdate(BaseModel):
|
|
"""Crawl status update from edu-search-service."""
|
|
seed_url: str = Field(..., description="The seed URL that was crawled")
|
|
status: str = Field(..., description="Crawl status: success, error, partial")
|
|
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
|
|
error_message: Optional[str] = Field(None, description="Error message if status is error")
|
|
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
|
|
|
|
|
|
class CrawlStatusResponse(BaseModel):
|
|
"""Response for crawl status update."""
|
|
success: bool
|
|
seed_url: str
|
|
message: str
|
|
|
|
|
|
class BulkCrawlStatusUpdate(BaseModel):
|
|
"""Bulk crawl status update."""
|
|
updates: List[CrawlStatusUpdate]
|
|
|
|
|
|
class BulkCrawlStatusResponse(BaseModel):
|
|
"""Response for bulk crawl status update."""
|
|
updated: int
|
|
failed: int
|
|
errors: List[str]
|