fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit 21a844cb8a
1986 changed files with 744143 additions and 1731 deletions
@@ -0,0 +1,24 @@
+"""
+BreakPilot Transcription Worker
+
+Asynchronous processing of meeting recordings using:
+- faster-whisper for transcription (MIT License)
+- pyannote.audio for speaker diarization (MIT License)
+
+All components are open source and commercially usable.
+"""
+
+__version__ = "1.0.0"
+__author__ = "BreakPilot Team"
+
+from .transcriber import WhisperTranscriber
+from .diarizer import SpeakerDiarizer
+from .aligner import TranscriptAligner
+from .storage import MinIOStorage
+
+__all__ = [
+    "WhisperTranscriber",
+    "SpeakerDiarizer",
+    "TranscriptAligner",
+    "MinIOStorage"
+]
@@ -0,0 +1,202 @@
+"""
+BreakPilot Transcript Aligner
+
+Aligns Whisper transcription segments with pyannote speaker diarization.
+Assigns speaker IDs to each transcribed segment.
+"""
+
+import structlog
+from typing import List, Dict, Optional
+from collections import defaultdict
+
+log = structlog.get_logger(__name__)
+
+
+class TranscriptAligner:
+    """
+    Aligns transcription segments with speaker diarization results.
+
+    Uses overlap-based matching to assign speaker IDs to each
+    transcribed segment. Handles cases where speakers change
+    mid-sentence.
+    """
+
+    def __init__(self):
+        """Initialize the aligner."""
+        self._speaker_count = 0
+        self._speaker_map = {}  # Maps pyannote IDs to friendly names
+
+    def align(
+        self,
+        transcription_segments: List[Dict],
+        diarization_segments: List[Dict],
+        min_overlap_ratio: float = 0.3
+    ) -> List[Dict]:
+        """
+        Align transcription with speaker diarization.
+
+        Args:
+            transcription_segments: List of segments from Whisper
+            diarization_segments: List of segments from pyannote
+            min_overlap_ratio: Minimum overlap ratio to assign speaker
+
+        Returns:
+            Transcription segments with speaker_id added
+        """
+        if not diarization_segments:
+            log.warning("no_diarization_segments", message="Returning transcription without speakers")
+            return transcription_segments
+
+        log.info(
+            "aligning_transcription",
+            transcription_count=len(transcription_segments),
+            diarization_count=len(diarization_segments)
+        )
+
+        # Build speaker mapping
+        unique_speakers = set(s["speaker_id"] for s in diarization_segments)
+        self._speaker_count = len(unique_speakers)
+
+        for i, speaker in enumerate(sorted(unique_speakers)):
+            self._speaker_map[speaker] = f"SPEAKER_{i:02d}"
+
+        # Align each transcription segment
+        aligned_segments = []
+        for trans_seg in transcription_segments:
+            speaker_id = self._find_speaker_for_segment(
+                trans_seg,
+                diarization_segments,
+                min_overlap_ratio
+            )
+
+            aligned_seg = trans_seg.copy()
+            aligned_seg["speaker_id"] = speaker_id
+
+            aligned_segments.append(aligned_seg)
+
+        # Log statistics
+        speaker_counts = defaultdict(int)
+        for seg in aligned_segments:
+            speaker_counts[seg.get("speaker_id", "UNKNOWN")] += 1
+
+        log.info(
+            "alignment_complete",
+            speakers=dict(speaker_counts),
+            total_speakers=self._speaker_count
+        )
+
+        return aligned_segments
+
+    def _find_speaker_for_segment(
+        self,
+        trans_seg: Dict,
+        diarization_segments: List[Dict],
+        min_overlap_ratio: float
+    ) -> Optional[str]:
+        """
+        Find the best matching speaker for a transcription segment.
+
+        Uses overlap-based matching with the speaker who has the
+        highest overlap with the segment.
+        """
+        trans_start = trans_seg["start_time_ms"]
+        trans_end = trans_seg["end_time_ms"]
+        trans_duration = trans_end - trans_start
+
+        if trans_duration <= 0:
+            return None
+
+        # Find overlapping diarization segments
+        overlaps = []
+        for diar_seg in diarization_segments:
+            diar_start = diar_seg["start_time_ms"]
+            diar_end = diar_seg["end_time_ms"]
+
+            # Calculate overlap
+            overlap_start = max(trans_start, diar_start)
+            overlap_end = min(trans_end, diar_end)
+            overlap_duration = max(0, overlap_end - overlap_start)
+
+            if overlap_duration > 0:
+                overlap_ratio = overlap_duration / trans_duration
+                overlaps.append({
+                    "speaker_id": diar_seg["speaker_id"],
+                    "overlap_duration": overlap_duration,
+                    "overlap_ratio": overlap_ratio
+                })
+
+        if not overlaps:
+            return None
+
+        # Find speaker with highest overlap
+        best_match = max(overlaps, key=lambda x: x["overlap_duration"])
+
+        if best_match["overlap_ratio"] >= min_overlap_ratio:
+            original_id = best_match["speaker_id"]
+            return self._speaker_map.get(original_id, original_id)
+
+        return None
+
+    def get_speaker_count(self) -> int:
+        """Get the number of unique speakers detected."""
+        return self._speaker_count
+
+    def get_speaker_mapping(self) -> Dict[str, str]:
+        """Get the mapping from pyannote IDs to friendly names."""
+        return self._speaker_map.copy()
+
+    def merge_consecutive_segments(
+        self,
+        segments: List[Dict],
+        max_gap_ms: int = 1000,
+        same_speaker_only: bool = True
+    ) -> List[Dict]:
+        """
+        Merge consecutive segments that are close together.
+
+        Useful for creating cleaner subtitle output.
+
+        Args:
+            segments: List of aligned segments
+            max_gap_ms: Maximum gap between segments to merge
+            same_speaker_only: Only merge if same speaker
+
+        Returns:
+            List of merged segments
+        """
+        if not segments:
+            return []
+
+        merged = []
+        current = segments[0].copy()
+
+        for next_seg in segments[1:]:
+            gap = next_seg["start_time_ms"] - current["end_time_ms"]
+            same_speaker = (
+                not same_speaker_only or
+                current.get("speaker_id") == next_seg.get("speaker_id")
+            )
+
+            if gap <= max_gap_ms and same_speaker:
+                # Merge segments
+                current["end_time_ms"] = next_seg["end_time_ms"]
+                current["text"] = current["text"] + " " + next_seg["text"]
+
+                # Merge word timestamps if present
+                if "words" in current and "words" in next_seg:
+                    current["words"].extend(next_seg["words"])
+            else:
+                # Save current and start new
+                merged.append(current)
+                current = next_seg.copy()
+
+        # Don't forget the last segment
+        merged.append(current)
+
+        log.info(
+            "segments_merged",
+            original_count=len(segments),
+            merged_count=len(merged)
+        )
+
+        return merged
@@ -0,0 +1,197 @@
+"""
+BreakPilot Speaker Diarizer
+
+Uses pyannote.audio (MIT License) for speaker diarization.
+Identifies who spoke when in an audio recording.
+"""
+
+import os
+import structlog
+from typing import List, Dict, Optional
+
+log = structlog.get_logger(__name__)
+
+
+class SpeakerDiarizer:
+    """
+    Speaker diarization using pyannote.audio.
+
+    Identifies distinct speakers in an audio recording and provides
+    timestamp information for when each speaker is talking.
+
+    License: MIT
+    Source: https://github.com/pyannote/pyannote-audio
+
+    Note: Requires a HuggingFace token with access to pyannote models.
+    Accept the conditions at: https://huggingface.co/pyannote/speaker-diarization
+    """
+
+    def __init__(
+        self,
+        auth_token: Optional[str] = None,
+        device: str = "auto"
+    ):
+        """
+        Initialize the diarizer.
+
+        Args:
+            auth_token: HuggingFace token with pyannote access
+            device: Device to run on ("cpu", "cuda", "auto")
+        """
+        self.auth_token = auth_token or os.getenv("PYANNOTE_AUTH_TOKEN")
+        self.device = device
+        self._pipeline = None
+
+        if not self.auth_token:
+            log.warning(
+                "pyannote_token_missing",
+                message="Speaker diarization requires a HuggingFace token"
+            )
+
+    def _load_pipeline(self):
+        """Lazy load the diarization pipeline."""
+        if self._pipeline is not None:
+            return
+
+        if not self.auth_token:
+            raise ValueError(
+                "HuggingFace token required for pyannote.audio. "
+                "Set PYANNOTE_AUTH_TOKEN environment variable."
+            )
+
+        try:
+            from pyannote.audio import Pipeline
+            import torch
+
+            log.info("loading_pyannote_pipeline", device=self.device)
+
+            # Load pre-trained speaker diarization pipeline
+            self._pipeline = Pipeline.from_pretrained(
+                "pyannote/speaker-diarization-3.1",
+                use_auth_token=self.auth_token
+            )
+
+            # Move to appropriate device
+            if self.device == "auto":
+                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            else:
+                device = torch.device(self.device)
+
+            self._pipeline.to(device)
+
+            log.info("pyannote_pipeline_loaded", device=str(device))
+
+        except ImportError:
+            log.error("pyannote_not_installed")
+            raise ImportError(
+                "pyannote.audio is not installed. "
+                "Install with: pip install pyannote.audio"
+            )
+
+    def diarize(
+        self,
+        audio_path: str,
+        num_speakers: Optional[int] = None,
+        min_speakers: Optional[int] = None,
+        max_speakers: Optional[int] = None
+    ) -> List[Dict]:
+        """
+        Perform speaker diarization on an audio file.
+
+        Args:
+            audio_path: Path to audio file (WAV recommended)
+            num_speakers: Exact number of speakers (if known)
+            min_speakers: Minimum number of speakers
+            max_speakers: Maximum number of speakers
+
+        Returns:
+            List of speaker segments with speaker ID and timestamps
+        """
+        self._load_pipeline()
+
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+        log.info(
+            "starting_diarization",
+            audio_path=audio_path,
+            num_speakers=num_speakers
+        )
+
+        # Run diarization
+        diarization = self._pipeline(
+            audio_path,
+            num_speakers=num_speakers,
+            min_speakers=min_speakers,
+            max_speakers=max_speakers
+        )
+
+        # Convert to list of segments
+        segments = []
+        for turn, _, speaker in diarization.itertracks(yield_label=True):
+            segments.append({
+                "speaker_id": speaker,
+                "start_time_ms": int(turn.start * 1000),
+                "end_time_ms": int(turn.end * 1000),
+                "duration_ms": int((turn.end - turn.start) * 1000)
+            })
+
+        # Get unique speakers
+        unique_speakers = set(s["speaker_id"] for s in segments)
+
+        log.info(
+            "diarization_complete",
+            segments_count=len(segments),
+            speakers_count=len(unique_speakers),
+            speakers=list(unique_speakers)
+        )
+
+        return segments
+
+    def get_speaker_stats(self, segments: List[Dict]) -> Dict:
+        """
+        Calculate speaking statistics per speaker.
+
+        Args:
+            segments: List of speaker segments from diarize()
+
+        Returns:
+            dict with speaking time and percentage per speaker
+        """
+        speaker_times = {}
+
+        for seg in segments:
+            speaker = seg["speaker_id"]
+            duration = seg["duration_ms"]
+
+            if speaker not in speaker_times:
+                speaker_times[speaker] = 0
+            speaker_times[speaker] += duration
+
+        total_time = sum(speaker_times.values())
+
+        stats = {}
+        for speaker, time_ms in speaker_times.items():
+            stats[speaker] = {
+                "total_time_ms": time_ms,
+                "total_time_seconds": round(time_ms / 1000, 1),
+                "percentage": round((time_ms / total_time) * 100, 1) if total_time > 0 else 0
+            }
+
+        return {
+            "speakers": stats,
+            "total_speakers": len(stats),
+            "total_duration_ms": total_time
+        }
+
+    def is_available(self) -> bool:
+        """Check if diarization is available (token configured)."""
+        return bool(self.auth_token)
+
+    def get_pipeline_info(self) -> dict:
+        """Get information about the pipeline."""
+        return {
+            "available": self.is_available(),
+            "device": self.device,
+            "loaded": self._pipeline is not None
+        }
@@ -0,0 +1,291 @@
+"""
+BreakPilot Transcript Export
+
+Functions to export transcription segments to various formats:
+- WebVTT (for HTML5 video captions)
+- SRT (universal subtitle format)
+- JSON (full data with speakers and timestamps)
+"""
+
+import json
+from typing import List, Dict, Any
+from datetime import datetime
+
+
+def ms_to_vtt_timestamp(ms: int) -> str:
+    """
+    Convert milliseconds to WebVTT timestamp format.
+
+    Args:
+        ms: Milliseconds
+
+    Returns:
+        Timestamp string (HH:MM:SS.mmm)
+    """
+    hours = ms // 3600000
+    minutes = (ms % 3600000) // 60000
+    seconds = (ms % 60000) // 1000
+    millis = ms % 1000
+
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}"
+
+
+def ms_to_srt_timestamp(ms: int) -> str:
+    """
+    Convert milliseconds to SRT timestamp format.
+
+    Args:
+        ms: Milliseconds
+
+    Returns:
+        Timestamp string (HH:MM:SS,mmm)
+    """
+    hours = ms // 3600000
+    minutes = (ms % 3600000) // 60000
+    seconds = (ms % 60000) // 1000
+    millis = ms % 1000
+
+    # SRT uses comma as decimal separator
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis:03d}"
+
+
+def export_to_vtt(
+    segments: List[Dict],
+    include_speakers: bool = True,
+    header: str = "WEBVTT\nKind: captions\nLanguage: de\n"
+) -> str:
+    """
+    Export segments to WebVTT format.
+
+    Args:
+        segments: List of transcription segments
+        include_speakers: Include speaker labels
+        header: VTT header text
+
+    Returns:
+        WebVTT formatted string
+    """
+    lines = [header]
+
+    for i, seg in enumerate(segments):
+        # Cue identifier
+        lines.append(f"\n{i + 1}")
+
+        # Timestamps
+        start = ms_to_vtt_timestamp(seg["start_time_ms"])
+        end = ms_to_vtt_timestamp(seg["end_time_ms"])
+        lines.append(f"{start} --> {end}")
+
+        # Text with optional speaker
+        text = seg["text"]
+        if include_speakers and seg.get("speaker_id"):
+            text = f"<v {seg['speaker_id']}>{text}"
+
+        lines.append(text)
+
+    return "\n".join(lines) + "\n"
+
+
+def export_to_srt(
+    segments: List[Dict],
+    include_speakers: bool = True
+) -> str:
+    """
+    Export segments to SRT format.
+
+    Args:
+        segments: List of transcription segments
+        include_speakers: Include speaker labels in text
+
+    Returns:
+        SRT formatted string
+    """
+    lines = []
+
+    for i, seg in enumerate(segments):
+        # Sequence number
+        lines.append(str(i + 1))
+
+        # Timestamps
+        start = ms_to_srt_timestamp(seg["start_time_ms"])
+        end = ms_to_srt_timestamp(seg["end_time_ms"])
+        lines.append(f"{start} --> {end}")
+
+        # Text with optional speaker
+        text = seg["text"]
+        if include_speakers and seg.get("speaker_id"):
+            text = f"[{seg['speaker_id']}] {text}"
+
+        lines.append(text)
+        lines.append("")  # Empty line between entries
+
+    return "\n".join(lines)
+
+
+def export_to_json(
+    segments: List[Dict],
+    metadata: Dict[str, Any] = None
+) -> str:
+    """
+    Export segments to JSON format with full metadata.
+
+    Args:
+        segments: List of transcription segments
+        metadata: Additional metadata to include
+
+    Returns:
+        JSON formatted string
+    """
+    # Prepare export data
+    export_data = {
+        "version": "1.0",
+        "format": "breakpilot-transcript",
+        "generated_at": datetime.utcnow().isoformat() + "Z",
+        "metadata": metadata or {},
+        "segments": []
+    }
+
+    # Add segments
+    for seg in segments:
+        export_seg = {
+            "index": seg.get("index", 0),
+            "start_ms": seg["start_time_ms"],
+            "end_ms": seg["end_time_ms"],
+            "duration_ms": seg["end_time_ms"] - seg["start_time_ms"],
+            "text": seg["text"],
+            "speaker_id": seg.get("speaker_id"),
+            "confidence": seg.get("confidence")
+        }
+
+        # Include word-level timestamps if available
+        if "words" in seg:
+            export_seg["words"] = seg["words"]
+
+        export_data["segments"].append(export_seg)
+
+    # Calculate statistics
+    total_duration_ms = sum(s["duration_ms"] for s in export_data["segments"])
+    total_words = sum(len(s["text"].split()) for s in export_data["segments"])
+    unique_speakers = set(s["speaker_id"] for s in export_data["segments"] if s["speaker_id"])
+
+    export_data["statistics"] = {
+        "total_segments": len(export_data["segments"]),
+        "total_duration_ms": total_duration_ms,
+        "total_duration_seconds": round(total_duration_ms / 1000, 1),
+        "total_words": total_words,
+        "unique_speakers": len(unique_speakers),
+        "speakers": list(unique_speakers)
+    }
+
+    return json.dumps(export_data, indent=2, ensure_ascii=False)
+
+
+def export_to_txt(
+    segments: List[Dict],
+    include_timestamps: bool = False,
+    include_speakers: bool = True,
+    paragraph_gap_ms: int = 3000
+) -> str:
+    """
+    Export segments to plain text format.
+
+    Args:
+        segments: List of transcription segments
+        include_timestamps: Add timestamps
+        include_speakers: Add speaker labels
+        paragraph_gap_ms: Gap threshold for new paragraph
+
+    Returns:
+        Plain text formatted string
+    """
+    lines = []
+    last_end = 0
+    current_speaker = None
+
+    for seg in segments:
+        # Add paragraph break for large gaps
+        gap = seg["start_time_ms"] - last_end
+        if gap > paragraph_gap_ms and lines:
+            lines.append("")
+
+        # Build text line
+        parts = []
+
+        if include_timestamps:
+            ts = ms_to_vtt_timestamp(seg["start_time_ms"])
+            parts.append(f"[{ts}]")
+
+        speaker = seg.get("speaker_id")
+        if include_speakers and speaker and speaker != current_speaker:
+            parts.append(f"\n{speaker}:")
+            current_speaker = speaker
+
+        parts.append(seg["text"])
+
+        lines.append(" ".join(parts))
+        last_end = seg["end_time_ms"]
+
+    return "\n".join(lines)
+
+
+def create_chapters(
+    segments: List[Dict],
+    min_chapter_duration_ms: int = 60000,
+    speaker_change_as_chapter: bool = True
+) -> List[Dict]:
+    """
+    Create chapter markers from segments.
+
+    Useful for video navigation and table of contents.
+
+    Args:
+        segments: List of transcription segments
+        min_chapter_duration_ms: Minimum chapter duration
+        speaker_change_as_chapter: Create chapter on speaker change
+
+    Returns:
+        List of chapter markers
+    """
+    if not segments:
+        return []
+
+    chapters = []
+    chapter_start = segments[0]["start_time_ms"]
+    chapter_text_parts = []
+    current_speaker = segments[0].get("speaker_id")
+
+    for seg in segments:
+        elapsed = seg["start_time_ms"] - chapter_start
+
+        # Check for new chapter
+        speaker_changed = (
+            speaker_change_as_chapter and
+            seg.get("speaker_id") and
+            seg.get("speaker_id") != current_speaker
+        )
+
+        if elapsed >= min_chapter_duration_ms or speaker_changed:
+            # Save current chapter
+            if chapter_text_parts:
+                chapters.append({
+                    "start_ms": chapter_start,
+                    "title": " ".join(chapter_text_parts[:5]) + "...",  # First 5 words
+                    "speaker": current_speaker
+                })
+
+            # Start new chapter
+            chapter_start = seg["start_time_ms"]
+            chapter_text_parts = []
+            current_speaker = seg.get("speaker_id")
+
+        chapter_text_parts.extend(seg["text"].split())
+
+    # Don't forget the last chapter
+    if chapter_text_parts:
+        chapters.append({
+            "start_ms": chapter_start,
+            "title": " ".join(chapter_text_parts[:5]) + "...",
+            "speaker": current_speaker
+        })
+
+    return chapters
@@ -0,0 +1,359 @@
+"""
+BreakPilot MinIO Storage Helper
+
+Provides file upload/download operations for MinIO object storage.
+"""
+
+import os
+import io
+import structlog
+from typing import Optional, BinaryIO
+
+log = structlog.get_logger(__name__)
+
+
+class MinIOStorage:
+    """
+    MinIO storage client for recordings and transcriptions.
+
+    Provides methods to upload, download, and manage files
+    in MinIO object storage (S3-compatible).
+    """
+
+    def __init__(
+        self,
+        endpoint: str = "minio:9000",
+        access_key: str = "breakpilot",
+        secret_key: str = "breakpilot123",
+        bucket: str = "breakpilot-recordings",
+        secure: bool = False
+    ):
+        """
+        Initialize MinIO client.
+
+        Args:
+            endpoint: MinIO server endpoint (host:port)
+            access_key: Access key (username)
+            secret_key: Secret key (password)
+            bucket: Default bucket name
+            secure: Use HTTPS
+        """
+        self.endpoint = endpoint
+        self.access_key = access_key
+        self.secret_key = secret_key
+        self.bucket = bucket
+        self.secure = secure
+        self._client = None
+
+    def _get_client(self):
+        """Lazy initialize MinIO client."""
+        if self._client is not None:
+            return self._client
+
+        try:
+            from minio import Minio
+
+            self._client = Minio(
+                self.endpoint,
+                access_key=self.access_key,
+                secret_key=self.secret_key,
+                secure=self.secure
+            )
+
+            log.info(
+                "minio_client_initialized",
+                endpoint=self.endpoint,
+                bucket=self.bucket
+            )
+
+            return self._client
+
+        except ImportError:
+            log.error("minio_not_installed")
+            raise ImportError(
+                "minio is not installed. "
+                "Install with: pip install minio"
+            )
+
+    def ensure_bucket(self) -> bool:
+        """
+        Ensure the bucket exists, create if needed.
+
+        Returns:
+            True if bucket exists or was created
+        """
+        client = self._get_client()
+
+        if not client.bucket_exists(self.bucket):
+            client.make_bucket(self.bucket)
+            log.info("bucket_created", bucket=self.bucket)
+            return True
+
+        return True
+
+    def download_file(
+        self,
+        object_name: str,
+        local_path: str,
+        bucket: Optional[str] = None
+    ) -> str:
+        """
+        Download a file from MinIO.
+
+        Args:
+            object_name: Path in MinIO bucket
+            local_path: Local destination path
+            bucket: Optional bucket override
+
+        Returns:
+            Local file path
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        log.info(
+            "downloading_file",
+            bucket=bucket,
+            object_name=object_name,
+            local_path=local_path
+        )
+
+        # Ensure directory exists
+        os.makedirs(os.path.dirname(local_path), exist_ok=True)
+
+        # Download
+        client.fget_object(bucket, object_name, local_path)
+
+        log.info(
+            "file_downloaded",
+            object_name=object_name,
+            local_path=local_path,
+            size=os.path.getsize(local_path)
+        )
+
+        return local_path
+
+    def upload_file(
+        self,
+        local_path: str,
+        object_name: str,
+        content_type: Optional[str] = None,
+        bucket: Optional[str] = None
+    ) -> str:
+        """
+        Upload a file to MinIO.
+
+        Args:
+            local_path: Local file path
+            object_name: Destination path in MinIO
+            content_type: MIME type
+            bucket: Optional bucket override
+
+        Returns:
+            Object name in MinIO
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        # Ensure bucket exists
+        self.ensure_bucket()
+
+        log.info(
+            "uploading_file",
+            local_path=local_path,
+            bucket=bucket,
+            object_name=object_name
+        )
+
+        # Upload
+        result = client.fput_object(
+            bucket,
+            object_name,
+            local_path,
+            content_type=content_type
+        )
+
+        log.info(
+            "file_uploaded",
+            object_name=object_name,
+            etag=result.etag
+        )
+
+        return object_name
+
+    def upload_content(
+        self,
+        content: str,
+        object_name: str,
+        content_type: str = "text/plain",
+        bucket: Optional[str] = None
+    ) -> str:
+        """
+        Upload string content directly to MinIO.
+
+        Args:
+            content: String content to upload
+            object_name: Destination path in MinIO
+            content_type: MIME type
+            bucket: Optional bucket override
+
+        Returns:
+            Object name in MinIO
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        # Ensure bucket exists
+        self.ensure_bucket()
+
+        # Convert to bytes
+        data = content.encode("utf-8")
+        data_stream = io.BytesIO(data)
+
+        log.info(
+            "uploading_content",
+            bucket=bucket,
+            object_name=object_name,
+            size=len(data)
+        )
+
+        # Upload
+        result = client.put_object(
+            bucket,
+            object_name,
+            data_stream,
+            length=len(data),
+            content_type=content_type
+        )
+
+        log.info(
+            "content_uploaded",
+            object_name=object_name,
+            etag=result.etag
+        )
+
+        return object_name
+
+    def get_content(
+        self,
+        object_name: str,
+        bucket: Optional[str] = None
+    ) -> str:
+        """
+        Get string content from MinIO.
+
+        Args:
+            object_name: Path in MinIO bucket
+            bucket: Optional bucket override
+
+        Returns:
+            File content as string
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        response = client.get_object(bucket, object_name)
+        content = response.read().decode("utf-8")
+        response.close()
+        response.release_conn()
+
+        return content
+
+    def delete_file(
+        self,
+        object_name: str,
+        bucket: Optional[str] = None
+    ) -> bool:
+        """
+        Delete a file from MinIO.
+
+        Args:
+            object_name: Path in MinIO bucket
+            bucket: Optional bucket override
+
+        Returns:
+            True if deleted
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        client.remove_object(bucket, object_name)
+
+        log.info("file_deleted", object_name=object_name)
+        return True
+
+    def file_exists(
+        self,
+        object_name: str,
+        bucket: Optional[str] = None
+    ) -> bool:
+        """
+        Check if a file exists in MinIO.
+
+        Args:
+            object_name: Path in MinIO bucket
+            bucket: Optional bucket override
+
+        Returns:
+            True if file exists
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        try:
+            client.stat_object(bucket, object_name)
+            return True
+        except Exception:
+            return False
+
+    def get_presigned_url(
+        self,
+        object_name: str,
+        expires_hours: int = 24,
+        bucket: Optional[str] = None
+    ) -> str:
+        """
+        Get a presigned URL for temporary file access.
+
+        Args:
+            object_name: Path in MinIO bucket
+            expires_hours: URL validity in hours
+            bucket: Optional bucket override
+
+        Returns:
+            Presigned URL
+        """
+        from datetime import timedelta
+
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        url = client.presigned_get_object(
+            bucket,
+            object_name,
+            expires=timedelta(hours=expires_hours)
+        )
+
+        return url
+
+    def list_files(
+        self,
+        prefix: str = "",
+        bucket: Optional[str] = None
+    ) -> list:
+        """
+        List files with a given prefix.
+
+        Args:
+            prefix: Path prefix to filter
+            bucket: Optional bucket override
+
+        Returns:
+            List of object names
+        """
+        client = self._get_client()
+        bucket = bucket or self.bucket
+
+        objects = client.list_objects(bucket, prefix=prefix, recursive=True)
+
+        return [obj.object_name for obj in objects]
@@ -0,0 +1,230 @@
+"""
+BreakPilot Transcription Tasks
+
+RQ task definitions for transcription processing.
+"""
+
+import os
+import time
+import tempfile
+import structlog
+from typing import Optional
+from datetime import datetime
+
+from .transcriber import WhisperTranscriber
+from .diarizer import SpeakerDiarizer
+from .aligner import TranscriptAligner
+from .storage import MinIOStorage
+from .export import export_to_vtt, export_to_srt, export_to_json
+
+log = structlog.get_logger(__name__)
+
+# Configuration
+WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3")
+WHISPER_DEVICE = os.getenv("WHISPER_DEVICE", "cpu")
+WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
+PYANNOTE_AUTH_TOKEN = os.getenv("PYANNOTE_AUTH_TOKEN")
+TEMP_DIR = os.getenv("TEMP_DIR", "/tmp/transcriptions")
+
+# MinIO Configuration
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "minio:9000")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "breakpilot")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "breakpilot123")
+MINIO_BUCKET = os.getenv("MINIO_BUCKET", "breakpilot-recordings")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
+
+# Database URL for status updates
+DATABASE_URL = os.getenv("DATABASE_URL")
+
+
+def update_transcription_status(
+    transcription_id: str,
+    status: str,
+    error_message: Optional[str] = None,
+    **kwargs
+):
+    """Update transcription status in database."""
+    # TODO: Implement database update
+    log.info(
+        "status_update",
+        transcription_id=transcription_id,
+        status=status,
+        error=error_message,
+        **kwargs
+    )
+
+
+def transcribe_recording(
+    transcription_id: str,
+    recording_id: str,
+    audio_path: str,
+    language: str = "de",
+    enable_diarization: bool = True
+) -> dict:
+    """
+    Main transcription task.
+
+    Downloads audio from MinIO, transcribes with Whisper,
+    optionally performs speaker diarization, and uploads results.
+
+    Args:
+        transcription_id: UUID of the transcription record
+        recording_id: UUID of the source recording
+        audio_path: Path to audio file in MinIO bucket
+        language: Language code (de, en, etc.)
+        enable_diarization: Whether to perform speaker diarization
+
+    Returns:
+        dict with transcription results and paths
+    """
+    start_time = time.time()
+
+    log.info(
+        "transcription_started",
+        transcription_id=transcription_id,
+        recording_id=recording_id,
+        audio_path=audio_path,
+        language=language
+    )
+
+    # Update status to processing
+    update_transcription_status(
+        transcription_id,
+        status="processing",
+        processing_started_at=datetime.utcnow().isoformat()
+    )
+
+    try:
+        # Initialize storage
+        storage = MinIOStorage(
+            endpoint=MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            bucket=MINIO_BUCKET,
+            secure=MINIO_SECURE
+        )
+
+        # Create temp directory
+        os.makedirs(TEMP_DIR, exist_ok=True)
+
+        # Download audio file
+        local_audio_path = os.path.join(TEMP_DIR, f"{transcription_id}_audio.wav")
+        storage.download_file(audio_path, local_audio_path)
+        log.info("audio_downloaded", path=local_audio_path)
+
+        # Initialize transcriber
+        transcriber = WhisperTranscriber(
+            model_name=WHISPER_MODEL,
+            device=WHISPER_DEVICE,
+            compute_type=WHISPER_COMPUTE_TYPE
+        )
+
+        # Transcribe audio
+        log.info("transcription_starting", model=WHISPER_MODEL)
+        segments = transcriber.transcribe(
+            audio_path=local_audio_path,
+            language=language
+        )
+        log.info("transcription_complete", segments_count=len(segments))
+
+        # Speaker diarization (if enabled and token available)
+        if enable_diarization and PYANNOTE_AUTH_TOKEN:
+            log.info("diarization_starting")
+            diarizer = SpeakerDiarizer(auth_token=PYANNOTE_AUTH_TOKEN)
+            speaker_segments = diarizer.diarize(local_audio_path)
+
+            # Align transcription with speakers
+            aligner = TranscriptAligner()
+            segments = aligner.align(segments, speaker_segments)
+            log.info("diarization_complete", speakers=aligner.get_speaker_count())
+        else:
+            log.info("diarization_skipped", reason="disabled or no token")
+
+        # Calculate statistics
+        full_text = " ".join(s["text"] for s in segments)
+        word_count = len(full_text.split())
+        avg_confidence = sum(s.get("confidence", 0) for s in segments) / len(segments) if segments else 0
+
+        # Export to different formats
+        base_path = audio_path.rsplit("/", 1)[0]  # recordings/{recording_name}
+
+        # WebVTT
+        vtt_content = export_to_vtt(segments)
+        vtt_path = f"{base_path}/transcript.vtt"
+        storage.upload_content(vtt_content, vtt_path, content_type="text/vtt")
+
+        # SRT
+        srt_content = export_to_srt(segments)
+        srt_path = f"{base_path}/transcript.srt"
+        storage.upload_content(srt_content, srt_path, content_type="text/plain")
+
+        # JSON (full data with speakers)
+        json_content = export_to_json(segments, {
+            "transcription_id": transcription_id,
+            "recording_id": recording_id,
+            "language": language,
+            "model": WHISPER_MODEL,
+            "word_count": word_count,
+            "confidence": avg_confidence
+        })
+        json_path = f"{base_path}/transcript.json"
+        storage.upload_content(json_content, json_path, content_type="application/json")
+
+        # Cleanup temp file
+        if os.path.exists(local_audio_path):
+            os.remove(local_audio_path)
+
+        # Calculate processing time
+        processing_duration = int(time.time() - start_time)
+
+        # Update status to completed
+        result = {
+            "transcription_id": transcription_id,
+            "recording_id": recording_id,
+            "status": "completed",
+            "full_text": full_text,
+            "word_count": word_count,
+            "confidence_score": round(avg_confidence, 3),
+            "segments_count": len(segments),
+            "vtt_path": vtt_path,
+            "srt_path": srt_path,
+            "json_path": json_path,
+            "processing_duration_seconds": processing_duration
+        }
+
+        update_transcription_status(
+            transcription_id,
+            status="completed",
+            full_text=full_text,
+            word_count=word_count,
+            confidence_score=avg_confidence,
+            vtt_path=vtt_path,
+            srt_path=srt_path,
+            json_path=json_path,
+            processing_duration_seconds=processing_duration,
+            processing_completed_at=datetime.utcnow().isoformat()
+        )
+
+        log.info(
+            "transcription_completed",
+            transcription_id=transcription_id,
+            word_count=word_count,
+            duration_seconds=processing_duration
+        )
+
+        return result
+
+    except Exception as e:
+        log.error(
+            "transcription_failed",
+            transcription_id=transcription_id,
+            error=str(e)
+        )
+
+        update_transcription_status(
+            transcription_id,
+            status="failed",
+            error_message=str(e)
+        )
+
+        raise
@@ -0,0 +1,211 @@
+"""
+BreakPilot Whisper Transcriber
+
+Uses faster-whisper (MIT License) for GPU-optimized transcription.
+Based on CTranslate2 for fast inference.
+"""
+
+import os
+import structlog
+from typing import List, Dict, Optional
+
+log = structlog.get_logger(__name__)
+
+
+class WhisperTranscriber:
+    """
+    Whisper-based audio transcription using faster-whisper.
+
+    faster-whisper is a reimplementation of OpenAI Whisper using CTranslate2,
+    which is significantly faster than the original implementation.
+
+    License: MIT
+    Source: https://github.com/SYSTRAN/faster-whisper
+    """
+
+    def __init__(
+        self,
+        model_name: str = "large-v3",
+        device: str = "cpu",
+        compute_type: str = "int8"
+    ):
+        """
+        Initialize the transcriber.
+
+        Args:
+            model_name: Whisper model to use (tiny, base, small, medium, large-v3)
+            device: Device to run on ("cpu", "cuda", "auto")
+            compute_type: Quantization type ("int8", "float16", "float32")
+        """
+        self.model_name = model_name
+        self.device = device
+        self.compute_type = compute_type
+        self._model = None
+
+    def _load_model(self):
+        """Lazy load the model on first use."""
+        if self._model is not None:
+            return
+
+        try:
+            from faster_whisper import WhisperModel
+
+            log.info(
+                "loading_whisper_model",
+                model=self.model_name,
+                device=self.device,
+                compute_type=self.compute_type
+            )
+
+            self._model = WhisperModel(
+                self.model_name,
+                device=self.device,
+                compute_type=self.compute_type
+            )
+
+            log.info("whisper_model_loaded")
+
+        except ImportError:
+            log.error("faster_whisper_not_installed")
+            raise ImportError(
+                "faster-whisper is not installed. "
+                "Install with: pip install faster-whisper"
+            )
+
+    def transcribe(
+        self,
+        audio_path: str,
+        language: str = "de",
+        beam_size: int = 5,
+        word_timestamps: bool = True,
+        vad_filter: bool = True,
+        vad_parameters: Optional[dict] = None
+    ) -> List[Dict]:
+        """
+        Transcribe an audio file.
+
+        Args:
+            audio_path: Path to audio file (WAV, MP3, etc.)
+            language: Language code (de, en, fr, etc.) or None for auto-detection
+            beam_size: Beam size for decoding (higher = better but slower)
+            word_timestamps: Include word-level timestamps
+            vad_filter: Enable Voice Activity Detection to filter silence
+            vad_parameters: Custom VAD parameters
+
+        Returns:
+            List of segments with text, timestamps, and confidence scores
+        """
+        self._load_model()
+
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+        log.info(
+            "transcribing_audio",
+            audio_path=audio_path,
+            language=language,
+            beam_size=beam_size
+        )
+
+        # Default VAD parameters for better speech detection
+        if vad_parameters is None:
+            vad_parameters = {
+                "min_silence_duration_ms": 500,
+                "speech_pad_ms": 400
+            }
+
+        # Run transcription
+        segments_gen, info = self._model.transcribe(
+            audio_path,
+            language=language,
+            beam_size=beam_size,
+            word_timestamps=word_timestamps,
+            vad_filter=vad_filter,
+            vad_parameters=vad_parameters
+        )
+
+        log.info(
+            "transcription_info",
+            detected_language=info.language,
+            language_probability=info.language_probability,
+            duration=info.duration
+        )
+
+        # Convert generator to list of segments
+        segments = []
+        for i, segment in enumerate(segments_gen):
+            seg_dict = {
+                "index": i,
+                "start_time_ms": int(segment.start * 1000),
+                "end_time_ms": int(segment.end * 1000),
+                "text": segment.text.strip(),
+                "confidence": round(segment.avg_logprob, 3) if segment.avg_logprob else None,
+                "no_speech_prob": segment.no_speech_prob
+            }
+
+            # Add word-level timestamps if available
+            if word_timestamps and segment.words:
+                seg_dict["words"] = [
+                    {
+                        "word": word.word,
+                        "start": int(word.start * 1000),
+                        "end": int(word.end * 1000),
+                        "probability": round(word.probability, 3)
+                    }
+                    for word in segment.words
+                ]
+
+            segments.append(seg_dict)
+
+        log.info(
+            "transcription_complete",
+            segments_count=len(segments),
+            duration_seconds=info.duration
+        )
+
+        return segments
+
+    def detect_language(self, audio_path: str) -> dict:
+        """
+        Detect the language of an audio file.
+
+        Args:
+            audio_path: Path to audio file
+
+        Returns:
+            dict with language code and probability
+        """
+        self._load_model()
+
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+        # Transcribe first 30 seconds to detect language
+        _, info = self._model.transcribe(
+            audio_path,
+            language=None,  # Auto-detect
+            beam_size=1,
+            without_timestamps=True
+        )
+
+        return {
+            "language": info.language,
+            "probability": info.language_probability
+        }
+
+    @property
+    def available_languages(self) -> List[str]:
+        """List of supported languages."""
+        return [
+            "de", "en", "fr", "es", "it", "pt", "nl", "pl", "ru",
+            "zh", "ja", "ko", "ar", "tr", "hi", "vi", "th", "id"
+        ]
+
+    def get_model_info(self) -> dict:
+        """Get information about the loaded model."""
+        return {
+            "model_name": self.model_name,
+            "device": self.device,
+            "compute_type": self.compute_type,
+            "loaded": self._model is not None
+        }
@@ -0,0 +1,129 @@
+"""
+BreakPilot Transcription Worker - Main Entry Point
+
+Runs as an RQ worker, processing transcription jobs from the queue.
+"""
+
+import os
+import sys
+import signal
+import structlog
+from redis import Redis
+from rq import Worker, Queue, Connection
+
+# Configure logging
+structlog.configure(
+    processors=[
+        structlog.stdlib.filter_by_level,
+        structlog.stdlib.add_logger_name,
+        structlog.stdlib.add_log_level,
+        structlog.stdlib.PositionalArgumentsFormatter(),
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.format_exc_info,
+        structlog.processors.UnicodeDecoder(),
+        structlog.processors.JSONRenderer()
+    ],
+    wrapper_class=structlog.stdlib.BoundLogger,
+    context_class=dict,
+    logger_factory=structlog.stdlib.LoggerFactory(),
+    cache_logger_on_first_use=True,
+)
+
+log = structlog.get_logger(__name__)
+
+# Configuration
+REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/1")
+QUEUE_NAME = os.getenv("QUEUE_NAME", "transcription")
+WORKER_NAME = os.getenv("WORKER_NAME", f"transcription-worker-{os.getpid()}")
+
+
+def setup_signal_handlers(worker: Worker):
+    """Setup graceful shutdown handlers."""
+
+    def shutdown_handler(signum, frame):
+        log.info("shutdown_signal_received", signal=signum)
+        worker.request_stop(signum, frame)
+
+    signal.signal(signal.SIGINT, shutdown_handler)
+    signal.signal(signal.SIGTERM, shutdown_handler)
+
+
+def preload_models():
+    """Preload ML models to reduce first-job latency."""
+    log.info("preloading_models")
+
+    try:
+        from .transcriber import WhisperTranscriber
+        from .diarizer import SpeakerDiarizer
+
+        # Initialize transcriber (downloads model if needed)
+        whisper_model = os.getenv("WHISPER_MODEL", "large-v3")
+        device = os.getenv("WHISPER_DEVICE", "cpu")
+        compute_type = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
+
+        transcriber = WhisperTranscriber(
+            model_name=whisper_model,
+            device=device,
+            compute_type=compute_type
+        )
+        log.info("whisper_model_loaded", model=whisper_model, device=device)
+
+        # Initialize diarizer (downloads model if needed)
+        pyannote_token = os.getenv("PYANNOTE_AUTH_TOKEN")
+        if pyannote_token:
+            diarizer = SpeakerDiarizer(auth_token=pyannote_token)
+            log.info("pyannote_model_loaded")
+        else:
+            log.warning("pyannote_token_missing", message="Speaker diarization disabled")
+
+    except Exception as e:
+        log.error("model_preload_failed", error=str(e))
+        # Don't fail startup, models will be loaded on first job
+
+
+def main():
+    """Main entry point for the worker."""
+    log.info(
+        "worker_starting",
+        redis_url=REDIS_URL,
+        queue=QUEUE_NAME,
+        worker_name=WORKER_NAME
+    )
+
+    # Connect to Redis
+    redis_conn = Redis.from_url(REDIS_URL)
+
+    # Test connection
+    try:
+        redis_conn.ping()
+        log.info("redis_connected")
+    except Exception as e:
+        log.error("redis_connection_failed", error=str(e))
+        sys.exit(1)
+
+    # Preload models
+    preload_models()
+
+    # Create queue
+    queue = Queue(QUEUE_NAME, connection=redis_conn)
+
+    # Create worker
+    worker = Worker(
+        queues=[queue],
+        connection=redis_conn,
+        name=WORKER_NAME
+    )
+
+    # Setup signal handlers
+    setup_signal_handlers(worker)
+
+    log.info("worker_ready", queues=[QUEUE_NAME])
+
+    # Start processing
+    with Connection(redis_conn):
+        worker.work(with_scheduler=True)
+
+
+if __name__ == "__main__":
+    main()