[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,250 @@
"""
Recording API - Transcription Routes.
Start transcription, get status, download VTT/SRT subtitle files.
"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, HTTPException
from fastapi.responses import PlainTextResponse
from recording_models import (
TranscriptionRequest,
TranscriptionStatusResponse,
)
from recording_helpers import (
_recordings_store,
_transcriptions_store,
log_audit,
format_vtt_time,
format_srt_time,
)
router = APIRouter(tags=["Recordings"])
# ==========================================
# TRANSCRIPTION ENDPOINTS
# ==========================================
@router.post("/{recording_id}/transcribe", response_model=TranscriptionStatusResponse)
async def start_transcription(recording_id: str, request: TranscriptionRequest):
"""
Start transcription for a recording.
Queues the recording for processing by the transcription worker.
"""
recording = _recordings_store.get(recording_id)
if not recording:
raise HTTPException(status_code=404, detail="Recording not found")
if recording["status"] == "deleted":
raise HTTPException(status_code=400, detail="Cannot transcribe deleted recording")
# Check if transcription already exists
existing = next(
(t for t in _transcriptions_store.values()
if t["recording_id"] == recording_id and t["status"] != "failed"),
None
)
if existing:
raise HTTPException(
status_code=409,
detail=f"Transcription already exists with status: {existing['status']}"
)
# Create transcription entry
transcription_id = str(uuid.uuid4())
now = datetime.utcnow()
transcription = {
"id": transcription_id,
"recording_id": recording_id,
"language": request.language,
"model": request.model,
"status": "pending",
"full_text": None,
"word_count": None,
"confidence_score": None,
"vtt_path": None,
"srt_path": None,
"json_path": None,
"error_message": None,
"processing_started_at": None,
"processing_completed_at": None,
"processing_duration_seconds": None,
"created_at": now.isoformat(),
"updated_at": now.isoformat()
}
_transcriptions_store[transcription_id] = transcription
# Update recording status
recording["status"] = "processing"
recording["updated_at"] = now.isoformat()
# Log transcription start
log_audit(
action="transcription_started",
recording_id=recording_id,
transcription_id=transcription_id,
metadata={"language": request.language, "model": request.model}
)
# TODO: Queue job to Redis/Valkey for transcription worker
return TranscriptionStatusResponse(
id=transcription_id,
recording_id=recording_id,
status="pending",
language=request.language,
model=request.model,
word_count=None,
confidence_score=None,
processing_duration_seconds=None,
error_message=None,
created_at=now,
completed_at=None
)
@router.get("/{recording_id}/transcription", response_model=TranscriptionStatusResponse)
async def get_transcription_status(recording_id: str):
"""
Get transcription status for a recording.
"""
transcription = next(
(t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
None
)
if not transcription:
raise HTTPException(status_code=404, detail="No transcription found for this recording")
return TranscriptionStatusResponse(
id=transcription["id"],
recording_id=transcription["recording_id"],
status=transcription["status"],
language=transcription["language"],
model=transcription["model"],
word_count=transcription.get("word_count"),
confidence_score=transcription.get("confidence_score"),
processing_duration_seconds=transcription.get("processing_duration_seconds"),
error_message=transcription.get("error_message"),
created_at=datetime.fromisoformat(transcription["created_at"]),
completed_at=(
datetime.fromisoformat(transcription["processing_completed_at"])
if transcription.get("processing_completed_at") else None
)
)
@router.get("/{recording_id}/transcription/text")
async def get_transcription_text(recording_id: str):
"""
Get the full transcription text.
"""
transcription = next(
(t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
None
)
if not transcription:
raise HTTPException(status_code=404, detail="No transcription found for this recording")
if transcription["status"] != "completed":
raise HTTPException(
status_code=400,
detail=f"Transcription not ready. Status: {transcription['status']}"
)
return {
"transcription_id": transcription["id"],
"recording_id": recording_id,
"language": transcription["language"],
"text": transcription.get("full_text", ""),
"word_count": transcription.get("word_count", 0)
}
@router.get("/{recording_id}/transcription/vtt")
async def get_transcription_vtt(recording_id: str):
"""
Download transcription as WebVTT subtitle file.
"""
transcription = next(
(t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
None
)
if not transcription:
raise HTTPException(status_code=404, detail="No transcription found for this recording")
if transcription["status"] != "completed":
raise HTTPException(
status_code=400,
detail=f"Transcription not ready. Status: {transcription['status']}"
)
# Generate VTT content
vtt_content = "WEBVTT\n\n"
text = transcription.get("full_text", "")
if text:
sentences = text.replace(".", ".\n").split("\n")
time_offset = 0
for sentence in sentences:
sentence = sentence.strip()
if sentence:
start = format_vtt_time(time_offset)
time_offset += 3000
end = format_vtt_time(time_offset)
vtt_content += f"{start} --> {end}\n{sentence}\n\n"
return PlainTextResponse(
content=vtt_content,
media_type="text/vtt",
headers={"Content-Disposition": f"attachment; filename=transcript_{recording_id}.vtt"}
)
@router.get("/{recording_id}/transcription/srt")
async def get_transcription_srt(recording_id: str):
"""
Download transcription as SRT subtitle file.
"""
transcription = next(
(t for t in _transcriptions_store.values() if t["recording_id"] == recording_id),
None
)
if not transcription:
raise HTTPException(status_code=404, detail="No transcription found for this recording")
if transcription["status"] != "completed":
raise HTTPException(
status_code=400,
detail=f"Transcription not ready. Status: {transcription['status']}"
)
# Generate SRT content
srt_content = ""
text = transcription.get("full_text", "")
if text:
sentences = text.replace(".", ".\n").split("\n")
time_offset = 0
index = 1
for sentence in sentences:
sentence = sentence.strip()
if sentence:
start = format_srt_time(time_offset)
time_offset += 3000
end = format_srt_time(time_offset)
srt_content += f"{index}\n{start} --> {end}\n{sentence}\n\n"
index += 1
return PlainTextResponse(
content=srt_content,
media_type="text/plain",
headers={"Content-Disposition": f"attachment; filename=transcript_{recording_id}.srt"}
)