[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions
--- a/backend-lehrer/recording_routes.py
+++ b/backend-lehrer/recording_routes.py
@@ -0,0 +1,307 @@
+"""
+Recording API - Core Recording Routes.
+
+Webhook, CRUD, health, audit, and download endpoints.
+"""
+
+import uuid
+from datetime import datetime, timedelta
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Query, Request
+from fastapi.responses import JSONResponse
+
+from recording_models import (
+    JibriWebhookPayload,
+    RecordingResponse,
+    RecordingListResponse,
+    MINIO_ENDPOINT,
+    MINIO_BUCKET,
+    DEFAULT_RETENTION_DAYS,
+)
+from recording_helpers import (
+    _recordings_store,
+    _transcriptions_store,
+    _audit_log,
+    log_audit,
+)
+
+router = APIRouter(tags=["Recordings"])
+
+
+# ==========================================
+# WEBHOOK ENDPOINT (Jibri)
+# ==========================================
+
+@router.post("/webhook")
+async def jibri_webhook(payload: JibriWebhookPayload, request: Request):
+    """
+    Webhook endpoint called by Jibri finalize.sh after upload.
+
+    This creates a new recording entry and optionally triggers transcription.
+    """
+    if payload.event != "recording_completed":
+        return JSONResponse(
+            status_code=400,
+            content={"error": f"Unknown event type: {payload.event}"}
+        )
+
+    # Extract meeting_id from recording_name (format: meetingId_timestamp)
+    parts = payload.recording_name.split("_")
+    meeting_id = parts[0] if parts else payload.recording_name
+
+    # Create recording entry
+    recording_id = str(uuid.uuid4())
+    recorded_at = datetime.utcnow()
+
+    recording = {
+        "id": recording_id,
+        "meeting_id": meeting_id,
+        "jibri_session_id": payload.recording_name,
+        "title": f"Recording {meeting_id}",
+        "storage_path": payload.storage_path,
+        "audio_path": payload.audio_path,
+        "file_size_bytes": payload.file_size_bytes,
+        "duration_seconds": None,  # Will be updated after analysis
+        "participant_count": 0,
+        "status": "uploaded",
+        "recorded_at": recorded_at.isoformat(),
+        "retention_days": DEFAULT_RETENTION_DAYS,
+        "created_at": datetime.utcnow().isoformat(),
+        "updated_at": datetime.utcnow().isoformat()
+    }
+
+    _recordings_store[recording_id] = recording
+
+    # Log the creation
+    log_audit(
+        action="created",
+        recording_id=recording_id,
+        metadata={
+            "source": "jibri_webhook",
+            "storage_path": payload.storage_path,
+            "file_size_bytes": payload.file_size_bytes
+        }
+    )
+
+    return {
+        "success": True,
+        "recording_id": recording_id,
+        "meeting_id": meeting_id,
+        "status": "uploaded",
+        "message": "Recording registered successfully"
+    }
+
+
+# ==========================================
+# HEALTH & AUDIT ENDPOINTS (must be before parameterized routes)
+# ==========================================
+
+@router.get("/health")
+async def recordings_health():
+    """Health check for recording service."""
+    return {
+        "status": "healthy",
+        "recordings_count": len(_recordings_store),
+        "transcriptions_count": len(_transcriptions_store),
+        "minio_endpoint": MINIO_ENDPOINT,
+        "bucket": MINIO_BUCKET
+    }
+
+
+@router.get("/audit/log")
+async def get_audit_log(
+    recording_id: Optional[str] = Query(None),
+    action: Optional[str] = Query(None),
+    limit: int = Query(100, ge=1, le=1000)
+):
+    """
+    Get audit log entries (DSGVO compliance).
+
+    Admin-only endpoint for reviewing recording access history.
+    """
+    logs = _audit_log.copy()
+
+    if recording_id:
+        logs = [l for l in logs if l.get("recording_id") == recording_id]
+    if action:
+        logs = [l for l in logs if l.get("action") == action]
+
+    # Sort by created_at descending
+    logs.sort(key=lambda x: x["created_at"], reverse=True)
+
+    return {
+        "entries": logs[:limit],
+        "total": len(logs)
+    }
+
+
+# ==========================================
+# RECORDING MANAGEMENT ENDPOINTS
+# ==========================================
+
+@router.get("", response_model=RecordingListResponse)
+async def list_recordings(
+    status: Optional[str] = Query(None, description="Filter by status"),
+    meeting_id: Optional[str] = Query(None, description="Filter by meeting ID"),
+    page: int = Query(1, ge=1, description="Page number"),
+    page_size: int = Query(20, ge=1, le=100, description="Items per page")
+):
+    """
+    List all recordings with optional filtering.
+
+    Supports pagination and filtering by status or meeting ID.
+    """
+    # Filter recordings
+    recordings = list(_recordings_store.values())
+
+    if status:
+        recordings = [r for r in recordings if r["status"] == status]
+    if meeting_id:
+        recordings = [r for r in recordings if r["meeting_id"] == meeting_id]
+
+    # Sort by recorded_at descending
+    recordings.sort(key=lambda x: x["recorded_at"], reverse=True)
+
+    # Paginate
+    total = len(recordings)
+    start = (page - 1) * page_size
+    end = start + page_size
+    page_recordings = recordings[start:end]
+
+    # Convert to response format
+    result = []
+    for rec in page_recordings:
+        recorded_at = datetime.fromisoformat(rec["recorded_at"])
+        retention_expires = recorded_at + timedelta(days=rec["retention_days"])
+
+        # Check for transcription
+        trans = next(
+            (t for t in _transcriptions_store.values() if t["recording_id"] == rec["id"]),
+            None
+        )
+
+        result.append(RecordingResponse(
+            id=rec["id"],
+            meeting_id=rec["meeting_id"],
+            title=rec.get("title"),
+            storage_path=rec["storage_path"],
+            audio_path=rec.get("audio_path"),
+            file_size_bytes=rec.get("file_size_bytes"),
+            duration_seconds=rec.get("duration_seconds"),
+            participant_count=rec.get("participant_count", 0),
+            status=rec["status"],
+            recorded_at=recorded_at,
+            retention_days=rec["retention_days"],
+            retention_expires_at=retention_expires,
+            transcription_status=trans["status"] if trans else None,
+            transcription_id=trans["id"] if trans else None
+        ))
+
+    return RecordingListResponse(
+        recordings=result,
+        total=total,
+        page=page,
+        page_size=page_size
+    )
+
+
+@router.get("/{recording_id}", response_model=RecordingResponse)
+async def get_recording(recording_id: str):
+    """
+    Get details for a specific recording.
+    """
+    recording = _recordings_store.get(recording_id)
+    if not recording:
+        raise HTTPException(status_code=404, detail="Recording not found")
+
+    # Log view action
+    log_audit(action="viewed", recording_id=recording_id)
+
+    recorded_at = datetime.fromisoformat(recording["recorded_at"])
+    retention_expires = recorded_at + timedelta(days=recording["retention_days"])
+
+    # Check for transcription
+    trans = next(
+        (t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
+        None
+    )
+
+    return RecordingResponse(
+        id=recording["id"],
+        meeting_id=recording["meeting_id"],
+        title=recording.get("title"),
+        storage_path=recording["storage_path"],
+        audio_path=recording.get("audio_path"),
+        file_size_bytes=recording.get("file_size_bytes"),
+        duration_seconds=recording.get("duration_seconds"),
+        participant_count=recording.get("participant_count", 0),
+        status=recording["status"],
+        recorded_at=recorded_at,
+        retention_days=recording["retention_days"],
+        retention_expires_at=retention_expires,
+        transcription_status=trans["status"] if trans else None,
+        transcription_id=trans["id"] if trans else None
+    )
+
+
+@router.delete("/{recording_id}")
+async def delete_recording(
+    recording_id: str,
+    reason: str = Query(..., description="Reason for deletion (DSGVO audit)")
+):
+    """
+    Soft-delete a recording (DSGVO compliance).
+
+    The recording is marked as deleted but retained for audit purposes.
+    Actual file deletion happens after the audit retention period.
+    """
+    recording = _recordings_store.get(recording_id)
+    if not recording:
+        raise HTTPException(status_code=404, detail="Recording not found")
+
+    # Soft delete
+    recording["status"] = "deleted"
+    recording["deleted_at"] = datetime.utcnow().isoformat()
+    recording["updated_at"] = datetime.utcnow().isoformat()
+
+    # Log deletion with reason
+    log_audit(
+        action="deleted",
+        recording_id=recording_id,
+        metadata={"reason": reason}
+    )
+
+    return {
+        "success": True,
+        "recording_id": recording_id,
+        "status": "deleted",
+        "message": "Recording marked for deletion"
+    }
+
+
+@router.get("/{recording_id}/download")
+async def download_recording(recording_id: str):
+    """
+    Download the recording file.
+
+    In production, this would generate a presigned URL to MinIO.
+    """
+    recording = _recordings_store.get(recording_id)
+    if not recording:
+        raise HTTPException(status_code=404, detail="Recording not found")
+
+    if recording["status"] == "deleted":
+        raise HTTPException(status_code=410, detail="Recording has been deleted")
+
+    # Log download action
+    log_audit(action="downloaded", recording_id=recording_id)
+
+    # In production, generate presigned URL to MinIO
+    # For now, return info about where the file is
+    return {
+        "recording_id": recording_id,
+        "storage_path": recording["storage_path"],
+        "file_size_bytes": recording.get("file_size_bytes"),
+        "message": "In production, this would redirect to a presigned MinIO URL"
+    }