breakpilot-compliance/backend-compliance/compliance/api/agent_scan_models.py

"""Pydantic models for the Agent Website Scan API."""

from pydantic import BaseModel


class ScanRequest(BaseModel):
    url: str
    mode: str = "post_launch"
    recipient: str = "dsb@breakpilot.local"


class ServiceInfo(BaseModel):
    name: str
    category: str
    provider: str
    country: str
    eu_adequate: bool
    requires_consent: bool
    legal_ref: str
    in_dse: bool
    status: str  # "ok", "undocumented", "outdated"


class TextReferenceModel(BaseModel):
    found: bool = False
    source_url: str = ""
    document_type: str = "Datenschutzerklaerung"
    section_heading: str = ""
    section_number: str = ""
    parent_section: str = ""
    paragraph_index: int = 0
    original_text: str = ""
    issue: str = ""
    correction_type: str = ""
    correction_text: str = ""
    insert_after: str = ""


class ScanFinding(BaseModel):
    code: str
    severity: str
    text: str
    correction: str = ""
    text_reference: TextReferenceModel | None = None


class DiscoveredDocument(BaseModel):
    title: str
    url: str
    doc_type: str
    language: str = ""
    word_count: int = 0
    completeness_pct: int = 0
    findings_count: int = 0


class ScanResponse(BaseModel):
    url: str
    pages_scanned: int
    pages_list: list[str] = []
    services: list[ServiceInfo]
    findings: list[ScanFinding]
    discovered_documents: list[DiscoveredDocument] = []
    ai_detected: bool
    chatbot_detected: bool
    chatbot_provider: str
    missing_pages: dict
    summary: str
    email_status: str
    scanned_at: str


class ScanStartResponse(BaseModel):
    scan_id: str
    status: str = "running"
    message: str = ""


class ScanStatusResponse(BaseModel):
    scan_id: str
    status: str  # "running", "completed", "failed"
    progress: str = ""
    result: ScanResponse | None = None
    error: str = ""