A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
212 lines
6.2 KiB
Python
212 lines
6.2 KiB
Python
"""
|
|
BreakPilot Whisper Transcriber
|
|
|
|
Uses faster-whisper (MIT License) for GPU-optimized transcription.
|
|
Based on CTranslate2 for fast inference.
|
|
"""
|
|
|
|
import os
|
|
import structlog
|
|
from typing import List, Dict, Optional
|
|
|
|
log = structlog.get_logger(__name__)
|
|
|
|
|
|
class WhisperTranscriber:
|
|
"""
|
|
Whisper-based audio transcription using faster-whisper.
|
|
|
|
faster-whisper is a reimplementation of OpenAI Whisper using CTranslate2,
|
|
which is significantly faster than the original implementation.
|
|
|
|
License: MIT
|
|
Source: https://github.com/SYSTRAN/faster-whisper
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model_name: str = "large-v3",
|
|
device: str = "cpu",
|
|
compute_type: str = "int8"
|
|
):
|
|
"""
|
|
Initialize the transcriber.
|
|
|
|
Args:
|
|
model_name: Whisper model to use (tiny, base, small, medium, large-v3)
|
|
device: Device to run on ("cpu", "cuda", "auto")
|
|
compute_type: Quantization type ("int8", "float16", "float32")
|
|
"""
|
|
self.model_name = model_name
|
|
self.device = device
|
|
self.compute_type = compute_type
|
|
self._model = None
|
|
|
|
def _load_model(self):
|
|
"""Lazy load the model on first use."""
|
|
if self._model is not None:
|
|
return
|
|
|
|
try:
|
|
from faster_whisper import WhisperModel
|
|
|
|
log.info(
|
|
"loading_whisper_model",
|
|
model=self.model_name,
|
|
device=self.device,
|
|
compute_type=self.compute_type
|
|
)
|
|
|
|
self._model = WhisperModel(
|
|
self.model_name,
|
|
device=self.device,
|
|
compute_type=self.compute_type
|
|
)
|
|
|
|
log.info("whisper_model_loaded")
|
|
|
|
except ImportError:
|
|
log.error("faster_whisper_not_installed")
|
|
raise ImportError(
|
|
"faster-whisper is not installed. "
|
|
"Install with: pip install faster-whisper"
|
|
)
|
|
|
|
def transcribe(
|
|
self,
|
|
audio_path: str,
|
|
language: str = "de",
|
|
beam_size: int = 5,
|
|
word_timestamps: bool = True,
|
|
vad_filter: bool = True,
|
|
vad_parameters: Optional[dict] = None
|
|
) -> List[Dict]:
|
|
"""
|
|
Transcribe an audio file.
|
|
|
|
Args:
|
|
audio_path: Path to audio file (WAV, MP3, etc.)
|
|
language: Language code (de, en, fr, etc.) or None for auto-detection
|
|
beam_size: Beam size for decoding (higher = better but slower)
|
|
word_timestamps: Include word-level timestamps
|
|
vad_filter: Enable Voice Activity Detection to filter silence
|
|
vad_parameters: Custom VAD parameters
|
|
|
|
Returns:
|
|
List of segments with text, timestamps, and confidence scores
|
|
"""
|
|
self._load_model()
|
|
|
|
if not os.path.exists(audio_path):
|
|
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
|
|
log.info(
|
|
"transcribing_audio",
|
|
audio_path=audio_path,
|
|
language=language,
|
|
beam_size=beam_size
|
|
)
|
|
|
|
# Default VAD parameters for better speech detection
|
|
if vad_parameters is None:
|
|
vad_parameters = {
|
|
"min_silence_duration_ms": 500,
|
|
"speech_pad_ms": 400
|
|
}
|
|
|
|
# Run transcription
|
|
segments_gen, info = self._model.transcribe(
|
|
audio_path,
|
|
language=language,
|
|
beam_size=beam_size,
|
|
word_timestamps=word_timestamps,
|
|
vad_filter=vad_filter,
|
|
vad_parameters=vad_parameters
|
|
)
|
|
|
|
log.info(
|
|
"transcription_info",
|
|
detected_language=info.language,
|
|
language_probability=info.language_probability,
|
|
duration=info.duration
|
|
)
|
|
|
|
# Convert generator to list of segments
|
|
segments = []
|
|
for i, segment in enumerate(segments_gen):
|
|
seg_dict = {
|
|
"index": i,
|
|
"start_time_ms": int(segment.start * 1000),
|
|
"end_time_ms": int(segment.end * 1000),
|
|
"text": segment.text.strip(),
|
|
"confidence": round(segment.avg_logprob, 3) if segment.avg_logprob else None,
|
|
"no_speech_prob": segment.no_speech_prob
|
|
}
|
|
|
|
# Add word-level timestamps if available
|
|
if word_timestamps and segment.words:
|
|
seg_dict["words"] = [
|
|
{
|
|
"word": word.word,
|
|
"start": int(word.start * 1000),
|
|
"end": int(word.end * 1000),
|
|
"probability": round(word.probability, 3)
|
|
}
|
|
for word in segment.words
|
|
]
|
|
|
|
segments.append(seg_dict)
|
|
|
|
log.info(
|
|
"transcription_complete",
|
|
segments_count=len(segments),
|
|
duration_seconds=info.duration
|
|
)
|
|
|
|
return segments
|
|
|
|
def detect_language(self, audio_path: str) -> dict:
|
|
"""
|
|
Detect the language of an audio file.
|
|
|
|
Args:
|
|
audio_path: Path to audio file
|
|
|
|
Returns:
|
|
dict with language code and probability
|
|
"""
|
|
self._load_model()
|
|
|
|
if not os.path.exists(audio_path):
|
|
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
|
|
# Transcribe first 30 seconds to detect language
|
|
_, info = self._model.transcribe(
|
|
audio_path,
|
|
language=None, # Auto-detect
|
|
beam_size=1,
|
|
without_timestamps=True
|
|
)
|
|
|
|
return {
|
|
"language": info.language,
|
|
"probability": info.language_probability
|
|
}
|
|
|
|
@property
|
|
def available_languages(self) -> List[str]:
|
|
"""List of supported languages."""
|
|
return [
|
|
"de", "en", "fr", "es", "it", "pt", "nl", "pl", "ru",
|
|
"zh", "ja", "ko", "ar", "tr", "hi", "vi", "th", "id"
|
|
]
|
|
|
|
def get_model_info(self) -> dict:
|
|
"""Get information about the loaded model."""
|
|
return {
|
|
"model_name": self.model_name,
|
|
"device": self.device,
|
|
"compute_type": self.compute_type,
|
|
"loaded": self._model is not None
|
|
}
|