backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
138 lines
3.3 KiB
Python
138 lines
3.3 KiB
Python
"""
|
|
DSFA RAG Pydantic Models.
|
|
|
|
Request/Response models for the DSFA RAG API.
|
|
"""
|
|
|
|
from typing import List, Optional
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# =============================================================================
|
|
# Response Models
|
|
# =============================================================================
|
|
|
|
class DSFASourceResponse(BaseModel):
|
|
"""Response model for DSFA source."""
|
|
id: str
|
|
source_code: str
|
|
name: str
|
|
full_name: Optional[str] = None
|
|
organization: Optional[str] = None
|
|
source_url: Optional[str] = None
|
|
license_code: str
|
|
license_name: str
|
|
license_url: Optional[str] = None
|
|
attribution_required: bool
|
|
attribution_text: str
|
|
document_type: Optional[str] = None
|
|
language: str = "de"
|
|
|
|
|
|
class DSFAChunkResponse(BaseModel):
|
|
"""Response model for a single chunk with attribution."""
|
|
chunk_id: str
|
|
content: str
|
|
section_title: Optional[str] = None
|
|
page_number: Optional[int] = None
|
|
category: Optional[str] = None
|
|
|
|
# Document info
|
|
document_id: str
|
|
document_title: Optional[str] = None
|
|
|
|
# Attribution (always included)
|
|
source_id: str
|
|
source_code: str
|
|
source_name: str
|
|
attribution_text: str
|
|
license_code: str
|
|
license_name: str
|
|
license_url: Optional[str] = None
|
|
attribution_required: bool
|
|
source_url: Optional[str] = None
|
|
document_type: Optional[str] = None
|
|
|
|
|
|
class DSFASearchResultResponse(BaseModel):
|
|
"""Response model for search result."""
|
|
chunk_id: str
|
|
content: str
|
|
score: float
|
|
|
|
# Attribution
|
|
source_code: str
|
|
source_name: str
|
|
attribution_text: str
|
|
license_code: str
|
|
license_name: str
|
|
license_url: Optional[str] = None
|
|
attribution_required: bool
|
|
source_url: Optional[str] = None
|
|
|
|
# Metadata
|
|
document_type: Optional[str] = None
|
|
category: Optional[str] = None
|
|
section_title: Optional[str] = None
|
|
page_number: Optional[int] = None
|
|
|
|
|
|
class DSFASearchResponse(BaseModel):
|
|
"""Response model for search endpoint."""
|
|
query: str
|
|
results: List[DSFASearchResultResponse]
|
|
total_results: int
|
|
|
|
# Aggregated licenses for footer
|
|
licenses_used: List[str]
|
|
attribution_notice: str
|
|
|
|
|
|
class DSFASourceStatsResponse(BaseModel):
|
|
"""Response model for source statistics."""
|
|
source_id: str
|
|
source_code: str
|
|
name: str
|
|
organization: Optional[str] = None
|
|
license_code: str
|
|
document_type: Optional[str] = None
|
|
document_count: int
|
|
chunk_count: int
|
|
last_indexed_at: Optional[str] = None
|
|
|
|
|
|
class DSFACorpusStatsResponse(BaseModel):
|
|
"""Response model for corpus statistics."""
|
|
sources: List[DSFASourceStatsResponse]
|
|
total_sources: int
|
|
total_documents: int
|
|
total_chunks: int
|
|
qdrant_collection: str
|
|
qdrant_points_count: int
|
|
qdrant_status: str
|
|
|
|
|
|
class IngestRequest(BaseModel):
|
|
"""Request model for ingestion."""
|
|
document_url: Optional[str] = None
|
|
document_text: Optional[str] = None
|
|
title: Optional[str] = None
|
|
|
|
|
|
class IngestResponse(BaseModel):
|
|
"""Response model for ingestion."""
|
|
source_code: str
|
|
document_id: Optional[str] = None
|
|
chunks_created: int
|
|
message: str
|
|
|
|
|
|
class LicenseInfo(BaseModel):
|
|
"""License information."""
|
|
code: str
|
|
name: str
|
|
url: Optional[str] = None
|
|
attribution_required: bool
|
|
modification_allowed: bool
|
|
commercial_use: bool
|