This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/transcription_worker/worker.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

130 lines
3.6 KiB
Python

"""
BreakPilot Transcription Worker - Main Entry Point
Runs as an RQ worker, processing transcription jobs from the queue.
"""
import os
import sys
import signal
import structlog
from redis import Redis
from rq import Worker, Queue, Connection
# Configure logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
wrapper_class=structlog.stdlib.BoundLogger,
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
log = structlog.get_logger(__name__)
# Configuration
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/1")
QUEUE_NAME = os.getenv("QUEUE_NAME", "transcription")
WORKER_NAME = os.getenv("WORKER_NAME", f"transcription-worker-{os.getpid()}")
def setup_signal_handlers(worker: Worker):
"""Setup graceful shutdown handlers."""
def shutdown_handler(signum, frame):
log.info("shutdown_signal_received", signal=signum)
worker.request_stop(signum, frame)
signal.signal(signal.SIGINT, shutdown_handler)
signal.signal(signal.SIGTERM, shutdown_handler)
def preload_models():
"""Preload ML models to reduce first-job latency."""
log.info("preloading_models")
try:
from .transcriber import WhisperTranscriber
from .diarizer import SpeakerDiarizer
# Initialize transcriber (downloads model if needed)
whisper_model = os.getenv("WHISPER_MODEL", "large-v3")
device = os.getenv("WHISPER_DEVICE", "cpu")
compute_type = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
transcriber = WhisperTranscriber(
model_name=whisper_model,
device=device,
compute_type=compute_type
)
log.info("whisper_model_loaded", model=whisper_model, device=device)
# Initialize diarizer (downloads model if needed)
pyannote_token = os.getenv("PYANNOTE_AUTH_TOKEN")
if pyannote_token:
diarizer = SpeakerDiarizer(auth_token=pyannote_token)
log.info("pyannote_model_loaded")
else:
log.warning("pyannote_token_missing", message="Speaker diarization disabled")
except Exception as e:
log.error("model_preload_failed", error=str(e))
# Don't fail startup, models will be loaded on first job
def main():
"""Main entry point for the worker."""
log.info(
"worker_starting",
redis_url=REDIS_URL,
queue=QUEUE_NAME,
worker_name=WORKER_NAME
)
# Connect to Redis
redis_conn = Redis.from_url(REDIS_URL)
# Test connection
try:
redis_conn.ping()
log.info("redis_connected")
except Exception as e:
log.error("redis_connection_failed", error=str(e))
sys.exit(1)
# Preload models
preload_models()
# Create queue
queue = Queue(QUEUE_NAME, connection=redis_conn)
# Create worker
worker = Worker(
queues=[queue],
connection=redis_conn,
name=WORKER_NAME
)
# Setup signal handlers
setup_signal_handlers(worker)
log.info("worker_ready", queues=[QUEUE_NAME])
# Start processing
with Connection(redis_conn):
worker.work(with_scheduler=True)
if __name__ == "__main__":
main()