[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
137
klausur-service/backend/dsfa_rag_models.py
Normal file
137
klausur-service/backend/dsfa_rag_models.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
DSFA RAG Pydantic Models.
|
||||
|
||||
Request/Response models for the DSFA RAG API.
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Response Models
|
||||
# =============================================================================
|
||||
|
||||
class DSFASourceResponse(BaseModel):
|
||||
"""Response model for DSFA source."""
|
||||
id: str
|
||||
source_code: str
|
||||
name: str
|
||||
full_name: Optional[str] = None
|
||||
organization: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
license_code: str
|
||||
license_name: str
|
||||
license_url: Optional[str] = None
|
||||
attribution_required: bool
|
||||
attribution_text: str
|
||||
document_type: Optional[str] = None
|
||||
language: str = "de"
|
||||
|
||||
|
||||
class DSFAChunkResponse(BaseModel):
|
||||
"""Response model for a single chunk with attribution."""
|
||||
chunk_id: str
|
||||
content: str
|
||||
section_title: Optional[str] = None
|
||||
page_number: Optional[int] = None
|
||||
category: Optional[str] = None
|
||||
|
||||
# Document info
|
||||
document_id: str
|
||||
document_title: Optional[str] = None
|
||||
|
||||
# Attribution (always included)
|
||||
source_id: str
|
||||
source_code: str
|
||||
source_name: str
|
||||
attribution_text: str
|
||||
license_code: str
|
||||
license_name: str
|
||||
license_url: Optional[str] = None
|
||||
attribution_required: bool
|
||||
source_url: Optional[str] = None
|
||||
document_type: Optional[str] = None
|
||||
|
||||
|
||||
class DSFASearchResultResponse(BaseModel):
|
||||
"""Response model for search result."""
|
||||
chunk_id: str
|
||||
content: str
|
||||
score: float
|
||||
|
||||
# Attribution
|
||||
source_code: str
|
||||
source_name: str
|
||||
attribution_text: str
|
||||
license_code: str
|
||||
license_name: str
|
||||
license_url: Optional[str] = None
|
||||
attribution_required: bool
|
||||
source_url: Optional[str] = None
|
||||
|
||||
# Metadata
|
||||
document_type: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
section_title: Optional[str] = None
|
||||
page_number: Optional[int] = None
|
||||
|
||||
|
||||
class DSFASearchResponse(BaseModel):
|
||||
"""Response model for search endpoint."""
|
||||
query: str
|
||||
results: List[DSFASearchResultResponse]
|
||||
total_results: int
|
||||
|
||||
# Aggregated licenses for footer
|
||||
licenses_used: List[str]
|
||||
attribution_notice: str
|
||||
|
||||
|
||||
class DSFASourceStatsResponse(BaseModel):
|
||||
"""Response model for source statistics."""
|
||||
source_id: str
|
||||
source_code: str
|
||||
name: str
|
||||
organization: Optional[str] = None
|
||||
license_code: str
|
||||
document_type: Optional[str] = None
|
||||
document_count: int
|
||||
chunk_count: int
|
||||
last_indexed_at: Optional[str] = None
|
||||
|
||||
|
||||
class DSFACorpusStatsResponse(BaseModel):
|
||||
"""Response model for corpus statistics."""
|
||||
sources: List[DSFASourceStatsResponse]
|
||||
total_sources: int
|
||||
total_documents: int
|
||||
total_chunks: int
|
||||
qdrant_collection: str
|
||||
qdrant_points_count: int
|
||||
qdrant_status: str
|
||||
|
||||
|
||||
class IngestRequest(BaseModel):
|
||||
"""Request model for ingestion."""
|
||||
document_url: Optional[str] = None
|
||||
document_text: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
|
||||
|
||||
class IngestResponse(BaseModel):
|
||||
"""Response model for ingestion."""
|
||||
source_code: str
|
||||
document_id: Optional[str] = None
|
||||
chunks_created: int
|
||||
message: str
|
||||
|
||||
|
||||
class LicenseInfo(BaseModel):
|
||||
"""License information."""
|
||||
code: str
|
||||
name: str
|
||||
url: Optional[str] = None
|
||||
attribution_required: bool
|
||||
modification_allowed: bool
|
||||
commercial_use: bool
|
||||
Reference in New Issue
Block a user