[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,307 @@
"""
Recording API - Core Recording Routes.
Webhook, CRUD, health, audit, and download endpoints.
"""
import uuid
from datetime import datetime, timedelta
from typing import Optional
from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import JSONResponse
from recording_models import (
JibriWebhookPayload,
RecordingResponse,
RecordingListResponse,
MINIO_ENDPOINT,
MINIO_BUCKET,
DEFAULT_RETENTION_DAYS,
)
from recording_helpers import (
_recordings_store,
_transcriptions_store,
_audit_log,
log_audit,
)
router = APIRouter(tags=["Recordings"])
# ==========================================
# WEBHOOK ENDPOINT (Jibri)
# ==========================================
@router.post("/webhook")
async def jibri_webhook(payload: JibriWebhookPayload, request: Request):
"""
Webhook endpoint called by Jibri finalize.sh after upload.
This creates a new recording entry and optionally triggers transcription.
"""
if payload.event != "recording_completed":
return JSONResponse(
status_code=400,
content={"error": f"Unknown event type: {payload.event}"}
)
# Extract meeting_id from recording_name (format: meetingId_timestamp)
parts = payload.recording_name.split("_")
meeting_id = parts[0] if parts else payload.recording_name
# Create recording entry
recording_id = str(uuid.uuid4())
recorded_at = datetime.utcnow()
recording = {
"id": recording_id,
"meeting_id": meeting_id,
"jibri_session_id": payload.recording_name,
"title": f"Recording {meeting_id}",
"storage_path": payload.storage_path,
"audio_path": payload.audio_path,
"file_size_bytes": payload.file_size_bytes,
"duration_seconds": None, # Will be updated after analysis
"participant_count": 0,
"status": "uploaded",
"recorded_at": recorded_at.isoformat(),
"retention_days": DEFAULT_RETENTION_DAYS,
"created_at": datetime.utcnow().isoformat(),
"updated_at": datetime.utcnow().isoformat()
}
_recordings_store[recording_id] = recording
# Log the creation
log_audit(
action="created",
recording_id=recording_id,
metadata={
"source": "jibri_webhook",
"storage_path": payload.storage_path,
"file_size_bytes": payload.file_size_bytes
}
)
return {
"success": True,
"recording_id": recording_id,
"meeting_id": meeting_id,
"status": "uploaded",
"message": "Recording registered successfully"
}
# ==========================================
# HEALTH & AUDIT ENDPOINTS (must be before parameterized routes)
# ==========================================
@router.get("/health")
async def recordings_health():
"""Health check for recording service."""
return {
"status": "healthy",
"recordings_count": len(_recordings_store),
"transcriptions_count": len(_transcriptions_store),
"minio_endpoint": MINIO_ENDPOINT,
"bucket": MINIO_BUCKET
}
@router.get("/audit/log")
async def get_audit_log(
recording_id: Optional[str] = Query(None),
action: Optional[str] = Query(None),
limit: int = Query(100, ge=1, le=1000)
):
"""
Get audit log entries (DSGVO compliance).
Admin-only endpoint for reviewing recording access history.
"""
logs = _audit_log.copy()
if recording_id:
logs = [l for l in logs if l.get("recording_id") == recording_id]
if action:
logs = [l for l in logs if l.get("action") == action]
# Sort by created_at descending
logs.sort(key=lambda x: x["created_at"], reverse=True)
return {
"entries": logs[:limit],
"total": len(logs)
}
# ==========================================
# RECORDING MANAGEMENT ENDPOINTS
# ==========================================
@router.get("", response_model=RecordingListResponse)
async def list_recordings(
status: Optional[str] = Query(None, description="Filter by status"),
meeting_id: Optional[str] = Query(None, description="Filter by meeting ID"),
page: int = Query(1, ge=1, description="Page number"),
page_size: int = Query(20, ge=1, le=100, description="Items per page")
):
"""
List all recordings with optional filtering.
Supports pagination and filtering by status or meeting ID.
"""
# Filter recordings
recordings = list(_recordings_store.values())
if status:
recordings = [r for r in recordings if r["status"] == status]
if meeting_id:
recordings = [r for r in recordings if r["meeting_id"] == meeting_id]
# Sort by recorded_at descending
recordings.sort(key=lambda x: x["recorded_at"], reverse=True)
# Paginate
total = len(recordings)
start = (page - 1) * page_size
end = start + page_size
page_recordings = recordings[start:end]
# Convert to response format
result = []
for rec in page_recordings:
recorded_at = datetime.fromisoformat(rec["recorded_at"])
retention_expires = recorded_at + timedelta(days=rec["retention_days"])
# Check for transcription
trans = next(
(t for t in _transcriptions_store.values() if t["recording_id"] == rec["id"]),
None
)
result.append(RecordingResponse(
id=rec["id"],
meeting_id=rec["meeting_id"],
title=rec.get("title"),
storage_path=rec["storage_path"],
audio_path=rec.get("audio_path"),
file_size_bytes=rec.get("file_size_bytes"),
duration_seconds=rec.get("duration_seconds"),
participant_count=rec.get("participant_count", 0),
status=rec["status"],
recorded_at=recorded_at,
retention_days=rec["retention_days"],
retention_expires_at=retention_expires,
transcription_status=trans["status"] if trans else None,
transcription_id=trans["id"] if trans else None
))
return RecordingListResponse(
recordings=result,
total=total,
page=page,
page_size=page_size
)
@router.get("/{recording_id}", response_model=RecordingResponse)
async def get_recording(recording_id: str):
"""
Get details for a specific recording.
"""
recording = _recordings_store.get(recording_id)
if not recording:
raise HTTPException(status_code=404, detail="Recording not found")
# Log view action
log_audit(action="viewed", recording_id=recording_id)
recorded_at = datetime.fromisoformat(recording["recorded_at"])
retention_expires = recorded_at + timedelta(days=recording["retention_days"])
# Check for transcription
trans = next(
(t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
None
)
return RecordingResponse(
id=recording["id"],
meeting_id=recording["meeting_id"],
title=recording.get("title"),
storage_path=recording["storage_path"],
audio_path=recording.get("audio_path"),
file_size_bytes=recording.get("file_size_bytes"),
duration_seconds=recording.get("duration_seconds"),
participant_count=recording.get("participant_count", 0),
status=recording["status"],
recorded_at=recorded_at,
retention_days=recording["retention_days"],
retention_expires_at=retention_expires,
transcription_status=trans["status"] if trans else None,
transcription_id=trans["id"] if trans else None
)
@router.delete("/{recording_id}")
async def delete_recording(
recording_id: str,
reason: str = Query(..., description="Reason for deletion (DSGVO audit)")
):
"""
Soft-delete a recording (DSGVO compliance).
The recording is marked as deleted but retained for audit purposes.
Actual file deletion happens after the audit retention period.
"""
recording = _recordings_store.get(recording_id)
if not recording:
raise HTTPException(status_code=404, detail="Recording not found")
# Soft delete
recording["status"] = "deleted"
recording["deleted_at"] = datetime.utcnow().isoformat()
recording["updated_at"] = datetime.utcnow().isoformat()
# Log deletion with reason
log_audit(
action="deleted",
recording_id=recording_id,
metadata={"reason": reason}
)
return {
"success": True,
"recording_id": recording_id,
"status": "deleted",
"message": "Recording marked for deletion"
}
@router.get("/{recording_id}/download")
async def download_recording(recording_id: str):
"""
Download the recording file.
In production, this would generate a presigned URL to MinIO.
"""
recording = _recordings_store.get(recording_id)
if not recording:
raise HTTPException(status_code=404, detail="Recording not found")
if recording["status"] == "deleted":
raise HTTPException(status_code=410, detail="Recording has been deleted")
# Log download action
log_audit(action="downloaded", recording_id=recording_id)
# In production, generate presigned URL to MinIO
# For now, return info about where the file is
return {
"recording_id": recording_id,
"storage_path": recording["storage_path"],
"file_size_bytes": recording.get("file_size_bytes"),
"message": "In production, this would redirect to a presigned MinIO URL"
}