breakpilot-compliance/backend-compliance/compliance/api/ai_routes.py

"""
FastAPI routes for AI Compliance Assistant.

Endpoints:
- /ai/status: Get AI provider status
- /ai/interpret: Interpret a requirement
- /ai/suggest-controls: Get AI-suggested controls
- /ai/assess-risk: Assess module risk
- /ai/gap-analysis: Analyze coverage gaps
- /ai/batch-interpret: Batch interpret requirements
- /ai/auto-map-controls: Auto-map controls to requirements
- /ai/batch-map-controls: Batch map controls
- /ai/switch-provider: Switch LLM provider
- /ai/providers: List available providers
- /pdf/*: PDF extraction endpoints
"""

import logging
import os
from typing import Optional, List

from pydantic import BaseModel
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
from sqlalchemy.orm import Session

from classroom_engine.database import get_db

from ..db import (
    RegulationRepository,
    RequirementRepository,
    ControlRepository,
)
from ..db.models import RegulationDB, RequirementDB, AISystemDB, AIClassificationEnum, AISystemStatusEnum
from .schemas import (
    # AI Assistant schemas
    AIInterpretationRequest, AIInterpretationResponse,
    AIBatchInterpretationRequest, AIBatchInterpretationResponse,
    AIControlSuggestionRequest, AIControlSuggestionResponse, AIControlSuggestionItem,
    AIRiskAssessmentRequest, AIRiskAssessmentResponse, AIRiskFactor,
    AIGapAnalysisRequest, AIGapAnalysisResponse,
    AIStatusResponse,
    # AI System schemas
    AISystemCreate, AISystemUpdate, AISystemResponse, AISystemListResponse,
    # PDF extraction schemas
    BSIAspectResponse, PDFExtractionResponse,
)

logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-ai"])


# ============================================================================
# AI System CRUD Endpoints (AI Act Compliance)
# ============================================================================

@router.get("/ai/systems", response_model=AISystemListResponse)
async def list_ai_systems(
    classification: Optional[str] = Query(None, description="Filter by classification"),
    status: Optional[str] = Query(None, description="Filter by status"),
    sector: Optional[str] = Query(None, description="Filter by sector"),
    db: Session = Depends(get_db),
):
    """List all registered AI systems."""
    import uuid as _uuid
    query = db.query(AISystemDB)

    if classification:
        try:
            cls_enum = AIClassificationEnum(classification)
            query = query.filter(AISystemDB.classification == cls_enum)
        except ValueError:
            pass

    if status:
        try:
            status_enum = AISystemStatusEnum(status)
            query = query.filter(AISystemDB.status == status_enum)
        except ValueError:
            pass

    if sector:
        query = query.filter(AISystemDB.sector.ilike(f"%{sector}%"))

    systems = query.order_by(AISystemDB.created_at.desc()).all()

    results = [
        AISystemResponse(
            id=s.id,
            name=s.name,
            description=s.description,
            purpose=s.purpose,
            sector=s.sector,
            classification=s.classification.value if s.classification else "unclassified",
            status=s.status.value if s.status else "draft",
            obligations=s.obligations or [],
            assessment_date=s.assessment_date,
            assessment_result=s.assessment_result,
            risk_factors=s.risk_factors,
            recommendations=s.recommendations,
            created_at=s.created_at,
            updated_at=s.updated_at,
        )
        for s in systems
    ]

    return AISystemListResponse(systems=results, total=len(results))


@router.post("/ai/systems", response_model=AISystemResponse)
async def create_ai_system(
    data: AISystemCreate,
    db: Session = Depends(get_db),
):
    """Register a new AI system."""
    import uuid as _uuid
    from datetime import datetime

    try:
        cls_enum = AIClassificationEnum(data.classification) if data.classification else AIClassificationEnum.UNCLASSIFIED
    except ValueError:
        cls_enum = AIClassificationEnum.UNCLASSIFIED

    try:
        status_enum = AISystemStatusEnum(data.status) if data.status else AISystemStatusEnum.DRAFT
    except ValueError:
        status_enum = AISystemStatusEnum.DRAFT

    system = AISystemDB(
        id=str(_uuid.uuid4()),
        name=data.name,
        description=data.description,
        purpose=data.purpose,
        sector=data.sector,
        classification=cls_enum,
        status=status_enum,
        obligations=data.obligations or [],
    )
    db.add(system)
    db.commit()
    db.refresh(system)

    return AISystemResponse(
        id=system.id,
        name=system.name,
        description=system.description,
        purpose=system.purpose,
        sector=system.sector,
        classification=system.classification.value if system.classification else "unclassified",
        status=system.status.value if system.status else "draft",
        obligations=system.obligations or [],
        assessment_date=system.assessment_date,
        assessment_result=system.assessment_result,
        risk_factors=system.risk_factors,
        recommendations=system.recommendations,
        created_at=system.created_at,
        updated_at=system.updated_at,
    )


@router.get("/ai/systems/{system_id}", response_model=AISystemResponse)
async def get_ai_system(system_id: str, db: Session = Depends(get_db)):
    """Get a specific AI system by ID."""
    system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
    if not system:
        raise HTTPException(status_code=404, detail=f"AI System {system_id} not found")

    return AISystemResponse(
        id=system.id,
        name=system.name,
        description=system.description,
        purpose=system.purpose,
        sector=system.sector,
        classification=system.classification.value if system.classification else "unclassified",
        status=system.status.value if system.status else "draft",
        obligations=system.obligations or [],
        assessment_date=system.assessment_date,
        assessment_result=system.assessment_result,
        risk_factors=system.risk_factors,
        recommendations=system.recommendations,
        created_at=system.created_at,
        updated_at=system.updated_at,
    )


@router.put("/ai/systems/{system_id}", response_model=AISystemResponse)
async def update_ai_system(
    system_id: str,
    data: AISystemUpdate,
    db: Session = Depends(get_db),
):
    """Update an AI system."""
    from datetime import datetime

    system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
    if not system:
        raise HTTPException(status_code=404, detail=f"AI System {system_id} not found")

    update_data = data.model_dump(exclude_unset=True)

    if "classification" in update_data:
        try:
            update_data["classification"] = AIClassificationEnum(update_data["classification"])
        except ValueError:
            raise HTTPException(status_code=400, detail=f"Invalid classification: {update_data['classification']}")

    if "status" in update_data:
        try:
            update_data["status"] = AISystemStatusEnum(update_data["status"])
        except ValueError:
            raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")

    for key, value in update_data.items():
        if hasattr(system, key):
            setattr(system, key, value)

    system.updated_at = datetime.utcnow()
    db.commit()
    db.refresh(system)

    return AISystemResponse(
        id=system.id,
        name=system.name,
        description=system.description,
        purpose=system.purpose,
        sector=system.sector,
        classification=system.classification.value if system.classification else "unclassified",
        status=system.status.value if system.status else "draft",
        obligations=system.obligations or [],
        assessment_date=system.assessment_date,
        assessment_result=system.assessment_result,
        risk_factors=system.risk_factors,
        recommendations=system.recommendations,
        created_at=system.created_at,
        updated_at=system.updated_at,
    )


@router.delete("/ai/systems/{system_id}")
async def delete_ai_system(system_id: str, db: Session = Depends(get_db)):
    """Delete an AI system."""
    system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
    if not system:
        raise HTTPException(status_code=404, detail=f"AI System {system_id} not found")

    db.delete(system)
    db.commit()
    return {"success": True, "message": "AI System deleted"}


@router.post("/ai/systems/{system_id}/assess", response_model=AISystemResponse)
async def assess_ai_system(
    system_id: str,
    db: Session = Depends(get_db),
):
    """Run AI Act risk assessment for an AI system."""
    from datetime import datetime

    system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
    if not system:
        raise HTTPException(status_code=404, detail=f"AI System {system_id} not found")

    # Try AI-based assessment
    assessment_result = None
    try:
        from ..services.ai_compliance_assistant import get_ai_assistant
        assistant = get_ai_assistant()
        result = await assistant.assess_module_risk(
            module_name=system.name,
            service_type="ai_system",
            description=system.description or "",
            processes_pii=True,
            ai_components=True,
            criticality="high",
            data_categories=[],
            regulations=[{"code": "AI-ACT", "relevance": "high"}],
        )
        assessment_result = {
            "overall_risk": result.overall_risk,
            "risk_factors": result.risk_factors,
            "recommendations": result.recommendations,
            "compliance_gaps": result.compliance_gaps,
            "confidence_score": result.confidence_score,
        }
    except Exception as e:
        logger.warning(f"AI assessment failed for {system_id}, using rule-based: {e}")
        # Rule-based fallback
        assessment_result = _rule_based_assessment(system)

    # Update system with assessment results
    classification = _derive_classification(assessment_result)
    try:
        system.classification = AIClassificationEnum(classification)
    except ValueError:
        system.classification = AIClassificationEnum.UNCLASSIFIED

    system.assessment_date = datetime.utcnow()
    system.assessment_result = assessment_result
    system.obligations = _derive_obligations(classification)
    system.risk_factors = assessment_result.get("risk_factors", [])
    system.recommendations = assessment_result.get("recommendations", [])
    system.status = AISystemStatusEnum.CLASSIFIED

    db.commit()
    db.refresh(system)

    return AISystemResponse(
        id=system.id,
        name=system.name,
        description=system.description,
        purpose=system.purpose,
        sector=system.sector,
        classification=system.classification.value if system.classification else "unclassified",
        status=system.status.value if system.status else "draft",
        obligations=system.obligations or [],
        assessment_date=system.assessment_date,
        assessment_result=system.assessment_result,
        risk_factors=system.risk_factors,
        recommendations=system.recommendations,
        created_at=system.created_at,
        updated_at=system.updated_at,
    )


def _rule_based_assessment(system: AISystemDB) -> dict:
    """Simple rule-based AI Act classification when AI service is unavailable."""
    desc = (system.description or "").lower() + " " + (system.purpose or "").lower()
    sector = (system.sector or "").lower()

    risk_factors = []
    risk_score = 0

    # Check for prohibited use cases
    prohibited_keywords = ["social scoring", "biometric surveillance", "emotion recognition", "subliminal manipulation"]
    for kw in prohibited_keywords:
        if kw in desc:
            risk_factors.append({"factor": f"Prohibited use case: {kw}", "severity": "critical", "likelihood": "high"})
            risk_score += 10

    # Check for high-risk indicators
    high_risk_keywords = ["education", "employment", "credit scoring", "law enforcement", "migration", "critical infrastructure", "medical", "bildung", "gesundheit"]
    for kw in high_risk_keywords:
        if kw in desc or kw in sector:
            risk_factors.append({"factor": f"High-risk sector: {kw}", "severity": "high", "likelihood": "medium"})
            risk_score += 5

    # Check for limited-risk indicators
    limited_keywords = ["chatbot", "deepfake", "emotion", "biometric"]
    for kw in limited_keywords:
        if kw in desc:
            risk_factors.append({"factor": f"Transparency requirement: {kw}", "severity": "medium", "likelihood": "high"})
            risk_score += 3

    return {
        "overall_risk": "critical" if risk_score >= 10 else "high" if risk_score >= 5 else "medium" if risk_score >= 3 else "low",
        "risk_factors": risk_factors,
        "recommendations": [
            "Dokumentation des AI-Systems vervollstaendigen",
            "Risikomanagement-Framework implementieren",
            "Transparenzpflichten pruefen",
        ],
        "compliance_gaps": [],
        "confidence_score": 0.6,
        "risk_score": risk_score,
    }


def _derive_classification(assessment: dict) -> str:
    """Derive AI Act classification from assessment result."""
    risk = assessment.get("overall_risk", "medium")
    score = assessment.get("risk_score", 0)

    if score >= 10:
        return "prohibited"
    elif risk in ("critical", "high") or score >= 5:
        return "high-risk"
    elif risk == "medium" or score >= 3:
        return "limited-risk"
    else:
        return "minimal-risk"


def _derive_obligations(classification: str) -> list:
    """Derive AI Act obligations based on classification."""
    obligations_map = {
        "prohibited": ["Einsatz verboten (Art. 5 AI Act)"],
        "high-risk": [
            "Risikomanagementsystem (Art. 9)",
            "Daten-Governance (Art. 10)",
            "Technische Dokumentation (Art. 11)",
            "Aufzeichnungspflicht (Art. 12)",
            "Transparenz (Art. 13)",
            "Menschliche Aufsicht (Art. 14)",
            "Genauigkeit & Robustheit (Art. 15)",
            "Konformitaetsbewertung (Art. 43)",
        ],
        "limited-risk": [
            "Transparenzpflicht (Art. 52)",
            "Kennzeichnung als KI-System",
        ],
        "minimal-risk": [
            "Freiwillige Verhaltenskodizes (Art. 69)",
        ],
    }
    return obligations_map.get(classification, [])


# ============================================================================
# AI Assistant Endpoints (Sprint 4)
# ============================================================================

@router.get("/ai/status", response_model=AIStatusResponse)
async def get_ai_status():
    """Get the status of the AI provider."""
    from ..services.llm_provider import get_shared_provider, LLMProviderType

    try:
        provider = get_shared_provider()
        return AIStatusResponse(
            provider=provider.provider_name,
            model=provider.config.model,
            is_available=True,
            is_mock=provider.provider_name == "mock",
            error=None,
        )
    except Exception as e:
        return AIStatusResponse(
            provider="unknown",
            model="unknown",
            is_available=False,
            is_mock=True,
            error=str(e),
        )


@router.post("/ai/interpret", response_model=AIInterpretationResponse)
async def interpret_requirement(
    request: AIInterpretationRequest,
    db: Session = Depends(get_db),
):
    """Generate AI interpretation for a requirement."""
    from ..services.ai_compliance_assistant import get_ai_assistant

    # Get requirement from DB
    req_repo = RequirementRepository(db)
    requirement = req_repo.get_by_id(request.requirement_id)

    if not requirement:
        raise HTTPException(status_code=404, detail=f"Requirement {request.requirement_id} not found")

    # Get regulation info
    reg_repo = RegulationRepository(db)
    regulation = reg_repo.get_by_id(requirement.regulation_id)

    try:
        assistant = get_ai_assistant()
        result = await assistant.interpret_requirement(
            requirement_id=requirement.id,
            article=requirement.article,
            title=requirement.title,
            requirement_text=requirement.requirement_text or requirement.description or "",
            regulation_code=regulation.code if regulation else "UNKNOWN",
            regulation_name=regulation.name if regulation else "Unknown Regulation",
        )

        return AIInterpretationResponse(
            requirement_id=result.requirement_id,
            summary=result.summary,
            applicability=result.applicability,
            technical_measures=result.technical_measures,
            affected_modules=result.affected_modules,
            risk_level=result.risk_level,
            implementation_hints=result.implementation_hints,
            confidence_score=result.confidence_score,
            error=result.error,
        )
    except Exception as e:
        logger.error(f"AI interpretation failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/ai/suggest-controls", response_model=AIControlSuggestionResponse)
async def suggest_controls(
    request: AIControlSuggestionRequest,
    db: Session = Depends(get_db),
):
    """Get AI-suggested controls for a requirement."""
    from ..services.ai_compliance_assistant import get_ai_assistant

    # Get requirement from DB
    req_repo = RequirementRepository(db)
    requirement = req_repo.get_by_id(request.requirement_id)

    if not requirement:
        raise HTTPException(status_code=404, detail=f"Requirement {request.requirement_id} not found")

    # Get regulation info
    reg_repo = RegulationRepository(db)
    regulation = reg_repo.get_by_id(requirement.regulation_id)

    try:
        assistant = get_ai_assistant()
        suggestions = await assistant.suggest_controls(
            requirement_title=requirement.title,
            requirement_text=requirement.requirement_text or requirement.description or "",
            regulation_name=regulation.name if regulation else "Unknown",
            affected_modules=[],  # Could be populated from previous interpretation
        )

        return AIControlSuggestionResponse(
            requirement_id=request.requirement_id,
            suggestions=[
                AIControlSuggestionItem(
                    control_id=s.control_id,
                    domain=s.domain,
                    title=s.title,
                    description=s.description,
                    pass_criteria=s.pass_criteria,
                    implementation_guidance=s.implementation_guidance,
                    is_automated=s.is_automated,
                    automation_tool=s.automation_tool,
                    priority=s.priority,
                    confidence_score=s.confidence_score,
                )
                for s in suggestions
            ],
        )
    except Exception as e:
        logger.error(f"AI control suggestion failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/ai/assess-risk", response_model=AIRiskAssessmentResponse)
async def assess_module_risk(
    request: AIRiskAssessmentRequest,
    db: Session = Depends(get_db),
):
    """Get AI risk assessment for a service module."""
    from ..services.ai_compliance_assistant import get_ai_assistant
    from ..db.repository import ServiceModuleRepository

    # Get module from DB
    module_repo = ServiceModuleRepository(db)
    module = module_repo.get_by_id(request.module_id)

    if not module:
        module = module_repo.get_by_name(request.module_id)

    if not module:
        raise HTTPException(status_code=404, detail=f"Module {request.module_id} not found")

    # Get regulations for this module
    module_detail = module_repo.get_with_regulations(module.id)
    regulations = []
    if module_detail and module_detail.get("regulation_mappings"):
        for mapping in module_detail["regulation_mappings"]:
            regulations.append({
                "code": mapping.get("regulation_code", ""),
                "relevance": mapping.get("relevance_level", "medium"),
            })

    try:
        assistant = get_ai_assistant()
        result = await assistant.assess_module_risk(
            module_name=module.name,
            service_type=module.service_type.value if module.service_type else "unknown",
            description=module.description or "",
            processes_pii=module.processes_pii,
            ai_components=module.ai_components,
            criticality=module.criticality or "medium",
            data_categories=module.data_categories or [],
            regulations=regulations,
        )

        return AIRiskAssessmentResponse(
            module_name=result.module_name,
            overall_risk=result.overall_risk,
            risk_factors=[
                AIRiskFactor(
                    factor=f.get("factor", ""),
                    severity=f.get("severity", "medium"),
                    likelihood=f.get("likelihood", "medium"),
                )
                for f in result.risk_factors
            ],
            recommendations=result.recommendations,
            compliance_gaps=result.compliance_gaps,
            confidence_score=result.confidence_score,
        )
    except Exception as e:
        logger.error(f"AI risk assessment failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/ai/gap-analysis", response_model=AIGapAnalysisResponse)
async def analyze_gap(
    request: AIGapAnalysisRequest,
    db: Session = Depends(get_db),
):
    """Analyze coverage gaps between a requirement and existing controls."""
    from ..services.ai_compliance_assistant import get_ai_assistant

    # Get requirement from DB
    req_repo = RequirementRepository(db)
    requirement = req_repo.get_by_id(request.requirement_id)

    if not requirement:
        raise HTTPException(status_code=404, detail=f"Requirement {request.requirement_id} not found")

    # Get regulation info
    reg_repo = RegulationRepository(db)
    regulation = reg_repo.get_by_id(requirement.regulation_id)

    # Get existing control mappings from eager-loaded relationship
    ctrl_repo = ControlRepository(db)
    existing_controls = []

    if requirement.control_mappings:
        for mapping in requirement.control_mappings:
            if mapping.control:
                existing_controls.append({
                    "control_id": mapping.control.control_id,
                    "title": mapping.control.title,
                    "status": mapping.control.status.value if mapping.control.status else "unknown",
                })

    try:
        assistant = get_ai_assistant()
        result = await assistant.analyze_gap(
            requirement_id=requirement.id,
            requirement_title=requirement.title,
            requirement_text=requirement.requirement_text or requirement.description or "",
            regulation_code=regulation.code if regulation else "UNKNOWN",
            existing_controls=existing_controls,
        )

        return AIGapAnalysisResponse(
            requirement_id=result.requirement_id,
            requirement_title=result.requirement_title,
            coverage_level=result.coverage_level,
            existing_controls=result.existing_controls,
            missing_coverage=result.missing_coverage,
            suggested_actions=result.suggested_actions,
        )
    except Exception as e:
        logger.error(f"AI gap analysis failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/ai/batch-interpret", response_model=AIBatchInterpretationResponse)
async def batch_interpret_requirements(
    request: AIBatchInterpretationRequest,
    background_tasks: BackgroundTasks,
    db: Session = Depends(get_db),
):
    """
    Batch interpret multiple requirements.

    For large batches, this runs in the background and returns immediately.
    """
    from ..services.ai_compliance_assistant import get_ai_assistant

    req_repo = RequirementRepository(db)
    reg_repo = RegulationRepository(db)

    # Build list of requirements to process
    requirements_to_process = []

    if request.requirement_ids:
        for req_id in request.requirement_ids:
            req = req_repo.get_by_id(req_id)
            if req:
                reg = reg_repo.get_by_id(req.regulation_id)
                requirements_to_process.append({
                    "id": req.id,
                    "article": req.article,
                    "title": req.title,
                    "requirement_text": req.requirement_text or req.description or "",
                    "regulation_code": reg.code if reg else "UNKNOWN",
                    "regulation_name": reg.name if reg else "Unknown",
                })

    elif request.regulation_code:
        # Get all requirements for a regulation
        reg = reg_repo.get_by_code(request.regulation_code)
        if reg:
            reqs = req_repo.get_by_regulation(reg.id)
            for req in reqs[:50]:  # Limit to 50 for batch processing
                requirements_to_process.append({
                    "id": req.id,
                    "article": req.article,
                    "title": req.title,
                    "requirement_text": req.requirement_text or req.description or "",
                    "regulation_code": reg.code,
                    "regulation_name": reg.name,
                })

    if not requirements_to_process:
        raise HTTPException(status_code=400, detail="No requirements found to process")

    # For small batches, process synchronously
    if len(requirements_to_process) <= 5:
        assistant = get_ai_assistant()
        results = await assistant.batch_interpret_requirements(
            requirements_to_process,
            rate_limit=request.rate_limit,
        )

        return AIBatchInterpretationResponse(
            total=len(requirements_to_process),
            processed=len(results),
            interpretations=[
                AIInterpretationResponse(
                    requirement_id=r.requirement_id,
                    summary=r.summary,
                    applicability=r.applicability,
                    technical_measures=r.technical_measures,
                    affected_modules=r.affected_modules,
                    risk_level=r.risk_level,
                    implementation_hints=r.implementation_hints,
                    confidence_score=r.confidence_score,
                    error=r.error,
                )
                for r in results
            ],
        )

    # For large batches, return immediately with info
    # (Background processing would be added in a production version)
    return AIBatchInterpretationResponse(
        total=len(requirements_to_process),
        processed=0,
        interpretations=[],
    )


# ============================================================================
# PDF Extraction (Sprint 2)
# ============================================================================

@router.get("/pdf/available")
async def list_available_pdfs():
    """List available PDF documents for extraction."""
    from pathlib import Path

    docs_path = Path("/app/docs") if Path("/app/docs").exists() else Path("docs")

    available = []
    bsi_files = list(docs_path.glob("BSI-TR-*.pdf"))

    for pdf_file in bsi_files:
        available.append({
            "filename": pdf_file.name,
            "path": str(pdf_file),
            "size_bytes": pdf_file.stat().st_size,
            "type": "bsi_standard",
        })

    return {
        "available_pdfs": available,
        "total": len(available),
    }


@router.post("/pdf/extract/{doc_code}", response_model=PDFExtractionResponse)
async def extract_pdf_requirements(
    doc_code: str,
    save_to_db: bool = Query(True, description="Save extracted requirements to database"),
    db: Session = Depends(get_db),
):
    """
    Extract requirements/aspects from a BSI-TR PDF document.

    doc_code examples:
    - BSI-TR-03161-1: General security requirements
    - BSI-TR-03161-2: Web application security
    - BSI-TR-03161-3: Backend/server security
    """
    from pathlib import Path
    from ..services.pdf_extractor import BSIPDFExtractor
    from ..db.models import RegulationTypeEnum

    # Find the PDF file
    docs_path = Path("/app/docs") if Path("/app/docs").exists() else Path("docs")
    pdf_path = docs_path / f"{doc_code}.pdf"

    if not pdf_path.exists():
        raise HTTPException(status_code=404, detail=f"PDF not found: {doc_code}.pdf")

    # Extract aspects
    extractor = BSIPDFExtractor()
    try:
        aspects = extractor.extract_from_file(str(pdf_path), source_name=doc_code)
    except Exception as e:
        logger.error(f"PDF extraction failed: {e}")
        raise HTTPException(status_code=500, detail=f"PDF extraction failed: {str(e)}")

    # Find or create the regulation
    reg_repo = RegulationRepository(db)
    regulation = reg_repo.get_by_code(doc_code)

    if not regulation:
        regulation = reg_repo.create(
            code=doc_code,
            name=f"BSI Technical Guideline {doc_code.split('-')[-1]}",
            full_name=f"BSI Technische Richtlinie {doc_code}",
            regulation_type=RegulationTypeEnum.BSI_STANDARD,
            local_pdf_path=str(pdf_path),
        )

    # Save to database if requested
    saved_count = 0
    if save_to_db and aspects:
        req_repo = RequirementRepository(db)
        for aspect in aspects:
            # Check if requirement already exists
            existing = db.query(RequirementDB).filter(
                RequirementDB.regulation_id == regulation.id,
                RequirementDB.article == aspect.aspect_id,
            ).first()

            if not existing:
                try:
                    req_repo.create(
                        regulation_id=regulation.id,
                        article=aspect.aspect_id,
                        title=aspect.title[:300] if aspect.title else "",
                        description=f"Category: {aspect.category.value}",
                        requirement_text=aspect.full_text[:4000] if aspect.full_text else "",
                        priority=1 if aspect.requirement_level.value == "MUSS" else (
                            2 if aspect.requirement_level.value == "SOLL" else 3
                        ),
                    )
                    saved_count += 1
                except Exception as e:
                    logger.warning(f"Failed to save aspect {aspect.aspect_id}: {e}")

        db.commit()

    # Convert aspects to response format
    aspect_responses = [
        BSIAspectResponse(
            aspect_id=a.aspect_id,
            title=a.title,
            full_text=a.full_text,
            category=a.category.value,
            page_number=a.page_number,
            section=a.section,
            requirement_level=a.requirement_level.value,
            source_document=a.source_document,
        )
        for a in aspects
    ]

    return PDFExtractionResponse(
        doc_code=doc_code,
        total_extracted=len(aspects),
        saved_to_db=saved_count,
        aspects=aspect_responses,
    )


@router.get("/pdf/extraction-stats")
async def get_extraction_stats(db: Session = Depends(get_db)):
    """Get statistics about extracted PDF requirements."""
    from sqlalchemy import func

    # Count requirements per BSI regulation
    stats = (
        db.query(
            RegulationDB.code,
            func.count(RequirementDB.id).label('count')
        )
        .join(RequirementDB, RequirementDB.regulation_id == RegulationDB.id)
        .filter(RegulationDB.code.like('BSI-%'))
        .group_by(RegulationDB.code)
        .all()
    )

    return {
        "bsi_requirements": {code: count for code, count in stats},
        "total_bsi_requirements": sum(count for _, count in stats),
    }


# ============================================================================
# Automatic Control Mapping
# ============================================================================

# Domain keyword mapping for automatic control assignment
DOMAIN_KEYWORDS = {
    "priv": ["datenschutz", "dsgvo", "gdpr", "privacy", "personenbezogen", "einwilligung",
             "consent", "betroffenenrechte", "verarbeitungsverzeichnis", "pii", "auftragsverarbeitung"],
    "iam": ["authentifizierung", "auth", "login", "passwort", "password", "zugang", "access",
            "berechtigung", "session", "token", "jwt", "oauth", "sso", "mfa", "2fa", "rbac"],
    "crypto": ["verschlüsselung", "encryption", "kryptograph", "crypto", "hash", "schlüssel",
               "key", "tls", "ssl", "zertifikat", "signatur", "aes", "rsa"],
    "sdlc": ["entwicklung", "code", "software", "sast", "dast", "dependency", "vulnerable",
             "cve", "security scan", "semgrep", "trivy", "sbom", "ci/cd", "build"],
    "ops": ["monitoring", "logging", "log", "protokoll", "backup", "incident", "alert",
            "availability", "uptime", "patch", "update", "deployment"],
    "ai": ["künstliche intelligenz", "ki", "ai", "machine learning", "ml", "modell",
           "training", "inference", "bias", "ai act", "hochrisiko"],
    "cra": ["vulnerability", "schwachstelle", "disclosure", "patch", "eol", "end-of-life",
            "supply chain", "sbom", "cve", "update"],
    "gov": ["richtlinie", "policy", "governance", "verantwortlich", "raci", "dokumentation",
            "prozess", "awareness", "schulung", "training"],
    "aud": ["audit", "prüfung", "nachweis", "evidence", "traceability", "nachvollzieh",
            "protokoll", "export", "report"],
}


@router.post("/ai/auto-map-controls")
async def auto_map_controls(
    requirement_id: str = Query(..., description="Requirement UUID"),
    save_to_db: bool = Query(True, description="Save mappings to database"),
    use_ai: bool = Query(False, description="Use AI for better matching (slower)"),
    db: Session = Depends(get_db),
):
    """
    Automatically map controls to a requirement.

    Uses keyword matching by default (fast) or AI for better accuracy (slower).
    """
    from ..db.models import ControlMappingDB

    # Get requirement
    req_repo = RequirementRepository(db)
    requirement = req_repo.get_by_id(requirement_id)
    if not requirement:
        raise HTTPException(status_code=404, detail=f"Requirement {requirement_id} not found")

    # Get all controls
    ctrl_repo = ControlRepository(db)
    all_controls = ctrl_repo.get_all()

    # Text to analyze
    text_to_analyze = f"{requirement.title} {requirement.requirement_text or ''} {requirement.description or ''}"
    text_lower = text_to_analyze.lower()

    matched_controls = []

    if use_ai:
        # Use AI for matching (slower but more accurate)
        from ..services.ai_compliance_assistant import get_ai_assistant
        assistant = get_ai_assistant()

        reg_repo = RegulationRepository(db)
        regulation = reg_repo.get_by_id(requirement.regulation_id)

        try:
            suggestions = await assistant.suggest_controls(
                requirement_title=requirement.title,
                requirement_text=requirement.requirement_text or "",
                regulation_name=regulation.name if regulation else "Unknown",
                affected_modules=[],
            )

            # Match suggestions to existing controls by domain
            for suggestion in suggestions:
                domain = suggestion.domain.lower()
                domain_controls = [c for c in all_controls if c.domain and c.domain.value.lower() == domain]
                if domain_controls:
                    # Take the first matching control from this domain
                    matched_controls.append({
                        "control": domain_controls[0],
                        "coverage": "partial",
                        "notes": f"AI suggested: {suggestion.title}",
                        "confidence": suggestion.confidence_score,
                    })
        except Exception as e:
            logger.warning(f"AI mapping failed, falling back to keyword matching: {e}")
            use_ai = False  # Fall back to keyword matching

    if not use_ai:
        # Keyword-based matching (fast)
        domain_scores = {}
        for domain, keywords in DOMAIN_KEYWORDS.items():
            score = sum(1 for kw in keywords if kw.lower() in text_lower)
            if score > 0:
                domain_scores[domain] = score

        # Sort domains by score
        sorted_domains = sorted(domain_scores.items(), key=lambda x: x[1], reverse=True)

        # Take top 3 domains
        for domain, score in sorted_domains[:3]:
            domain_controls = [c for c in all_controls if c.domain and c.domain.value.lower() == domain]
            for ctrl in domain_controls[:2]:  # Max 2 controls per domain
                matched_controls.append({
                    "control": ctrl,
                    "coverage": "partial" if score < 3 else "full",
                    "notes": f"Keyword match (score: {score})",
                    "confidence": min(0.9, 0.5 + score * 0.1),
                })

    # Save mappings to database if requested
    created_mappings = []
    if save_to_db and matched_controls:
        for match in matched_controls:
            ctrl = match["control"]

            # Check if mapping already exists
            existing = db.query(ControlMappingDB).filter(
                ControlMappingDB.requirement_id == requirement_id,
                ControlMappingDB.control_id == ctrl.id,
            ).first()

            if not existing:
                mapping = ControlMappingDB(
                    requirement_id=requirement_id,
                    control_id=ctrl.id,
                    coverage_level=match["coverage"],
                    notes=match["notes"],
                )
                db.add(mapping)
                created_mappings.append({
                    "control_id": ctrl.control_id,
                    "domain": ctrl.domain.value if ctrl.domain else None,
                    "title": ctrl.title,
                    "coverage_level": match["coverage"],
                    "notes": match["notes"],
                })

        db.commit()

    return {
        "requirement_id": requirement_id,
        "requirement_title": requirement.title,
        "matched_controls": len(matched_controls),
        "created_mappings": len(created_mappings),
        "mappings": created_mappings if save_to_db else [
            {
                "control_id": m["control"].control_id,
                "domain": m["control"].domain.value if m["control"].domain else None,
                "title": m["control"].title,
                "coverage_level": m["coverage"],
                "confidence": m.get("confidence", 0.7),
            }
            for m in matched_controls
        ],
    }


@router.post("/ai/batch-map-controls")
async def batch_map_controls(
    regulation_code: Optional[str] = Query(None, description="Filter by regulation code"),
    limit: int = Query(100, description="Max requirements to process"),
    use_ai: bool = Query(False, description="Use AI for matching (slower)"),
    background_tasks: BackgroundTasks = None,
    db: Session = Depends(get_db),
):
    """
    Batch map controls to multiple requirements.

    Processes requirements that don't have mappings yet.
    """
    from ..db.models import ControlMappingDB

    # Get requirements without mappings
    req_repo = RequirementRepository(db)

    if regulation_code:
        reg_repo = RegulationRepository(db)
        regulation = reg_repo.get_by_code(regulation_code)
        if not regulation:
            raise HTTPException(status_code=404, detail=f"Regulation {regulation_code} not found")
        all_requirements = req_repo.get_by_regulation(regulation.id)
    else:
        all_requirements = req_repo.get_all()

    # Filter to requirements without mappings
    requirements_without_mappings = []
    for req in all_requirements:
        existing = db.query(ControlMappingDB).filter(
            ControlMappingDB.requirement_id == req.id
        ).first()
        if not existing:
            requirements_without_mappings.append(req)

    # Limit processing
    to_process = requirements_without_mappings[:limit]

    # Get all controls once
    ctrl_repo = ControlRepository(db)
    all_controls = ctrl_repo.get_all()

    # Process each requirement
    results = []
    for req in to_process:
        try:
            text_to_analyze = f"{req.title} {req.requirement_text or ''} {req.description or ''}"
            text_lower = text_to_analyze.lower()

            # Quick keyword matching
            domain_scores = {}
            for domain, keywords in DOMAIN_KEYWORDS.items():
                score = sum(1 for kw in keywords if kw.lower() in text_lower)
                if score > 0:
                    domain_scores[domain] = score

            if domain_scores:
                # Get top domain
                top_domain = max(domain_scores.items(), key=lambda x: x[1])[0]
                domain_controls = [c for c in all_controls if c.domain and c.domain.value.lower() == top_domain]

                if domain_controls:
                    ctrl = domain_controls[0]

                    # Create mapping
                    mapping = ControlMappingDB(
                        requirement_id=req.id,
                        control_id=ctrl.id,
                        coverage_level="partial",
                        notes=f"Auto-mapped (domain: {top_domain})",
                    )
                    db.add(mapping)

                    results.append({
                        "requirement_id": req.id,
                        "requirement_title": req.title[:50],
                        "control_id": ctrl.control_id,
                        "domain": top_domain,
                    })
        except Exception as e:
            logger.warning(f"Failed to map requirement {req.id}: {e}")

    db.commit()

    return {
        "processed": len(to_process),
        "mapped": len(results),
        "remaining": len(requirements_without_mappings) - len(to_process),
        "mappings": results[:20],  # Only return first 20 for readability
    }


# ============================================================================
# LLM Provider Switch Endpoints (Runtime Configuration)
# ============================================================================

class ProviderSwitchRequest(BaseModel):
    """Request to switch LLM provider at runtime."""
    provider: str  # "anthropic" or "self_hosted"
    model: Optional[str] = None  # Optional: override model
    url: Optional[str] = None  # Optional: override URL for self-hosted


class ProviderSwitchResponse(BaseModel):
    """Response after switching LLM provider."""
    success: bool
    previous_provider: str
    new_provider: str
    model: str
    url: Optional[str] = None
    message: str


@router.post("/ai/switch-provider", response_model=ProviderSwitchResponse)
async def switch_llm_provider(request: ProviderSwitchRequest):
    """
    Switch the LLM provider at runtime between Anthropic API and Self-Hosted (Ollama).

    This allows developers to toggle between:
    - **anthropic**: Cloud-based Claude API (kostenpflichtig, Daten gehen zu Anthropic)
    - **self_hosted**: Self-hosted Ollama on Mac Mini (kostenlos, DSGVO-konform, Daten bleiben intern)

    Note: This change is temporary for the current container session.
    For permanent changes, modify the docker-compose.yml environment variables.
    """
    from ..services.llm_provider import (
        reset_shared_provider,
        get_shared_provider,
        LLMProviderType,
    )

    try:
        # Get current provider info before switch
        old_provider = get_shared_provider()
        old_provider_name = old_provider.provider_name

        # Map string to enum
        provider_map = {
            "anthropic": LLMProviderType.ANTHROPIC,
            "self_hosted": LLMProviderType.SELF_HOSTED,
            "mock": LLMProviderType.MOCK,
        }

        if request.provider.lower() not in provider_map:
            raise HTTPException(
                status_code=400,
                detail=f"Invalid provider: {request.provider}. Use 'anthropic' or 'self_hosted'"
            )

        # Update environment variables for the new provider
        os.environ["COMPLIANCE_LLM_PROVIDER"] = request.provider.lower()

        if request.provider.lower() == "self_hosted":
            if request.url:
                os.environ["SELF_HOSTED_LLM_URL"] = request.url
            if request.model:
                os.environ["SELF_HOSTED_LLM_MODEL"] = request.model
            else:
                # Default to llama3.1:70b for compliance tasks
                os.environ["SELF_HOSTED_LLM_MODEL"] = os.environ.get(
                    "SELF_HOSTED_LLM_MODEL", "llama3.1:70b"
                )
        elif request.provider.lower() == "anthropic":
            if request.model:
                os.environ["ANTHROPIC_MODEL"] = request.model

        # Reset the shared provider to pick up new config
        reset_shared_provider()

        # Get the new provider
        new_provider = get_shared_provider()

        return ProviderSwitchResponse(
            success=True,
            previous_provider=old_provider_name,
            new_provider=new_provider.provider_name,
            model=new_provider.config.model,
            url=new_provider.config.base_url if hasattr(new_provider.config, 'base_url') else None,
            message=f"Successfully switched from {old_provider_name} to {new_provider.provider_name}",
        )

    except Exception as e:
        logger.error(f"Failed to switch LLM provider: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/ai/providers")
async def list_available_providers():
    """
    List available LLM providers with their descriptions.

    This helps developers understand which provider to use for which scenario.
    """
    return {
        "providers": [
            {
                "id": "anthropic",
                "name": "Anthropic Claude API",
                "description_de": "Cloud-basierte KI von Anthropic. Kostenpflichtig (API-Credits). Daten werden zur Verarbeitung an Anthropic gesendet.",
                "description_en": "Cloud-based AI from Anthropic. Paid service (API credits). Data is sent to Anthropic for processing.",
                "gdpr_compliant": False,
                "data_location": "Anthropic Cloud (USA)",
                "cost": "Kostenpflichtig pro Token",
                "use_case": "Produktiv, wenn hohe Qualitaet benoetigt wird",
                "models": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
            },
            {
                "id": "self_hosted",
                "name": "Self-Hosted Ollama",
                "description_de": "Lokales LLM auf dem Mac Mini M4 Pro (64GB RAM). Kostenlos. Alle Daten bleiben intern - DSGVO-konform!",
                "description_en": "Local LLM on Mac Mini M4 Pro (64GB RAM). Free. All data stays internal - GDPR compliant!",
                "gdpr_compliant": True,
                "data_location": "Lokal auf Mac Mini",
                "cost": "Kostenlos (Hardware bereits vorhanden)",
                "use_case": "Entwicklung, Testing, DSGVO-sensitive Dokumente",
                "models": ["llama3.1:70b", "llama3.2-vision", "mixtral:8x7b"],
            },
        ],
        "current_provider": None,  # Will be filled by get_ai_status
        "note_de": "Umschaltung erfolgt sofort, aber nur fuer diese Container-Session. Fuer permanente Aenderung docker-compose.yml anpassen.",
        "note_en": "Switch takes effect immediately but only for this container session. For permanent change, modify docker-compose.yml.",
    }