breakpilot-lehrer/backend-lehrer/recording_routes.py

"""
Recording API - Core Recording Routes.

Webhook, CRUD, health, audit, and download endpoints.
"""

import uuid
from datetime import datetime, timedelta
from typing import Optional

from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import JSONResponse

from recording_models import (
    JibriWebhookPayload,
    RecordingResponse,
    RecordingListResponse,
    MINIO_ENDPOINT,
    MINIO_BUCKET,
    DEFAULT_RETENTION_DAYS,
)
from recording_helpers import (
    _recordings_store,
    _transcriptions_store,
    _audit_log,
    log_audit,
)

router = APIRouter(tags=["Recordings"])


# ==========================================
# WEBHOOK ENDPOINT (Jibri)
# ==========================================

@router.post("/webhook")
async def jibri_webhook(payload: JibriWebhookPayload, request: Request):
    """
    Webhook endpoint called by Jibri finalize.sh after upload.

    This creates a new recording entry and optionally triggers transcription.
    """
    if payload.event != "recording_completed":
        return JSONResponse(
            status_code=400,
            content={"error": f"Unknown event type: {payload.event}"}
        )

    # Extract meeting_id from recording_name (format: meetingId_timestamp)
    parts = payload.recording_name.split("_")
    meeting_id = parts[0] if parts else payload.recording_name

    # Create recording entry
    recording_id = str(uuid.uuid4())
    recorded_at = datetime.utcnow()

    recording = {
        "id": recording_id,
        "meeting_id": meeting_id,
        "jibri_session_id": payload.recording_name,
        "title": f"Recording {meeting_id}",
        "storage_path": payload.storage_path,
        "audio_path": payload.audio_path,
        "file_size_bytes": payload.file_size_bytes,
        "duration_seconds": None,  # Will be updated after analysis
        "participant_count": 0,
        "status": "uploaded",
        "recorded_at": recorded_at.isoformat(),
        "retention_days": DEFAULT_RETENTION_DAYS,
        "created_at": datetime.utcnow().isoformat(),
        "updated_at": datetime.utcnow().isoformat()
    }

    _recordings_store[recording_id] = recording

    # Log the creation
    log_audit(
        action="created",
        recording_id=recording_id,
        metadata={
            "source": "jibri_webhook",
            "storage_path": payload.storage_path,
            "file_size_bytes": payload.file_size_bytes
        }
    )

    return {
        "success": True,
        "recording_id": recording_id,
        "meeting_id": meeting_id,
        "status": "uploaded",
        "message": "Recording registered successfully"
    }


# ==========================================
# HEALTH & AUDIT ENDPOINTS (must be before parameterized routes)
# ==========================================

@router.get("/health")
async def recordings_health():
    """Health check for recording service."""
    return {
        "status": "healthy",
        "recordings_count": len(_recordings_store),
        "transcriptions_count": len(_transcriptions_store),
        "minio_endpoint": MINIO_ENDPOINT,
        "bucket": MINIO_BUCKET
    }


@router.get("/audit/log")
async def get_audit_log(
    recording_id: Optional[str] = Query(None),
    action: Optional[str] = Query(None),
    limit: int = Query(100, ge=1, le=1000)
):
    """
    Get audit log entries (DSGVO compliance).

    Admin-only endpoint for reviewing recording access history.
    """
    logs = _audit_log.copy()

    if recording_id:
        logs = [l for l in logs if l.get("recording_id") == recording_id]
    if action:
        logs = [l for l in logs if l.get("action") == action]

    # Sort by created_at descending
    logs.sort(key=lambda x: x["created_at"], reverse=True)

    return {
        "entries": logs[:limit],
        "total": len(logs)
    }


# ==========================================
# RECORDING MANAGEMENT ENDPOINTS
# ==========================================

@router.get("/", response_model=RecordingListResponse)
async def list_recordings(
    status: Optional[str] = Query(None, description="Filter by status"),
    meeting_id: Optional[str] = Query(None, description="Filter by meeting ID"),
    page: int = Query(1, ge=1, description="Page number"),
    page_size: int = Query(20, ge=1, le=100, description="Items per page")
):
    """
    List all recordings with optional filtering.

    Supports pagination and filtering by status or meeting ID.
    """
    # Filter recordings
    recordings = list(_recordings_store.values())

    if status:
        recordings = [r for r in recordings if r["status"] == status]
    if meeting_id:
        recordings = [r for r in recordings if r["meeting_id"] == meeting_id]

    # Sort by recorded_at descending
    recordings.sort(key=lambda x: x["recorded_at"], reverse=True)

    # Paginate
    total = len(recordings)
    start = (page - 1) * page_size
    end = start + page_size
    page_recordings = recordings[start:end]

    # Convert to response format
    result = []
    for rec in page_recordings:
        recorded_at = datetime.fromisoformat(rec["recorded_at"])
        retention_expires = recorded_at + timedelta(days=rec["retention_days"])

        # Check for transcription
        trans = next(
            (t for t in _transcriptions_store.values() if t["recording_id"] == rec["id"]),
            None
        )

        result.append(RecordingResponse(
            id=rec["id"],
            meeting_id=rec["meeting_id"],
            title=rec.get("title"),
            storage_path=rec["storage_path"],
            audio_path=rec.get("audio_path"),
            file_size_bytes=rec.get("file_size_bytes"),
            duration_seconds=rec.get("duration_seconds"),
            participant_count=rec.get("participant_count", 0),
            status=rec["status"],
            recorded_at=recorded_at,
            retention_days=rec["retention_days"],
            retention_expires_at=retention_expires,
            transcription_status=trans["status"] if trans else None,
            transcription_id=trans["id"] if trans else None
        ))

    return RecordingListResponse(
        recordings=result,
        total=total,
        page=page,
        page_size=page_size
    )


@router.get("/{recording_id}", response_model=RecordingResponse)
async def get_recording(recording_id: str):
    """
    Get details for a specific recording.
    """
    recording = _recordings_store.get(recording_id)
    if not recording:
        raise HTTPException(status_code=404, detail="Recording not found")

    # Log view action
    log_audit(action="viewed", recording_id=recording_id)

    recorded_at = datetime.fromisoformat(recording["recorded_at"])
    retention_expires = recorded_at + timedelta(days=recording["retention_days"])

    # Check for transcription
    trans = next(
        (t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
        None
    )

    return RecordingResponse(
        id=recording["id"],
        meeting_id=recording["meeting_id"],
        title=recording.get("title"),
        storage_path=recording["storage_path"],
        audio_path=recording.get("audio_path"),
        file_size_bytes=recording.get("file_size_bytes"),
        duration_seconds=recording.get("duration_seconds"),
        participant_count=recording.get("participant_count", 0),
        status=recording["status"],
        recorded_at=recorded_at,
        retention_days=recording["retention_days"],
        retention_expires_at=retention_expires,
        transcription_status=trans["status"] if trans else None,
        transcription_id=trans["id"] if trans else None
    )


@router.delete("/{recording_id}")
async def delete_recording(
    recording_id: str,
    reason: str = Query(..., description="Reason for deletion (DSGVO audit)")
):
    """
    Soft-delete a recording (DSGVO compliance).

    The recording is marked as deleted but retained for audit purposes.
    Actual file deletion happens after the audit retention period.
    """
    recording = _recordings_store.get(recording_id)
    if not recording:
        raise HTTPException(status_code=404, detail="Recording not found")

    # Soft delete
    recording["status"] = "deleted"
    recording["deleted_at"] = datetime.utcnow().isoformat()
    recording["updated_at"] = datetime.utcnow().isoformat()

    # Log deletion with reason
    log_audit(
        action="deleted",
        recording_id=recording_id,
        metadata={"reason": reason}
    )

    return {
        "success": True,
        "recording_id": recording_id,
        "status": "deleted",
        "message": "Recording marked for deletion"
    }


@router.get("/{recording_id}/download")
async def download_recording(recording_id: str):
    """
    Download the recording file.

    In production, this would generate a presigned URL to MinIO.
    """
    recording = _recordings_store.get(recording_id)
    if not recording:
        raise HTTPException(status_code=404, detail="Recording not found")

    if recording["status"] == "deleted":
        raise HTTPException(status_code=410, detail="Recording has been deleted")

    # Log download action
    log_audit(action="downloaded", recording_id=recording_id)

    # In production, generate presigned URL to MinIO
    # For now, return info about where the file is
    return {
        "recording_id": recording_id,
        "storage_path": recording["storage_path"],
        "file_size_bytes": recording.get("file_size_bytes"),
        "message": "In production, this would redirect to a presigned MinIO URL"
    }