""" BSI / PDF Extraction Pydantic schemas — extracted from compliance/api/schemas.py. Phase 1 Step 3: the monolithic ``compliance.api.schemas`` module is being split per domain under ``compliance.schemas``. This module is re-exported from ``compliance.api.schemas`` for backwards compatibility. """ from datetime import datetime, date from typing import Optional, List, Any, Dict from pydantic import BaseModel, ConfigDict, Field from compliance.schemas.common import ( PaginationMeta, RegulationType, ControlType, ControlDomain, ControlStatus, RiskLevel, EvidenceStatus, ) # ============================================================================ # PDF Extraction Schemas # ============================================================================ class BSIAspectResponse(BaseModel): """A single extracted BSI-TR Pruefaspekt (test aspect).""" aspect_id: str title: str full_text: str category: str page_number: int section: str requirement_level: str source_document: str keywords: Optional[List[str]] = None related_aspects: Optional[List[str]] = None class PDFExtractionRequest(BaseModel): """Request for PDF extraction.""" document_code: str = Field(..., description="BSI-TR document code, e.g. BSI-TR-03161-2") save_to_db: bool = Field(True, description="Whether to save extracted requirements to database") force: bool = Field(False, description="Force re-extraction even if requirements exist") class PDFExtractionResponse(BaseModel): """Response from PDF extraction endpoint.""" # Simple endpoint format (new /pdf/extract/{doc_code}) doc_code: Optional[str] = None total_extracted: Optional[int] = None saved_to_db: Optional[int] = None aspects: Optional[List[BSIAspectResponse]] = None # Legacy scraper endpoint format (/scraper/extract-pdf) success: Optional[bool] = None source_document: Optional[str] = None total_aspects: Optional[int] = None statistics: Optional[Dict[str, Any]] = None requirements_created: Optional[int] = None