Compare commits
3 Commits
95c371e9a5
...
df5b6d69ef
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df5b6d69ef | ||
|
|
4f6ac9b23a | ||
|
|
5ea31a3236 |
@@ -15,8 +15,16 @@ WORKDIR /app
|
|||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Download Piper voice model (German, thorsten, high quality)
|
# Download Piper voice models
|
||||||
RUN mkdir -p /app/models && wget -q -O /app/models/de_DE-thorsten-high.onnx "https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx" && wget -q -O /app/models/de_DE-thorsten-high.onnx.json "https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx.json"
|
RUN mkdir -p /app/models && \
|
||||||
|
wget -q -O /app/models/de_DE-thorsten-high.onnx \
|
||||||
|
"https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx" && \
|
||||||
|
wget -q -O /app/models/de_DE-thorsten-high.onnx.json \
|
||||||
|
"https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx.json" && \
|
||||||
|
wget -q -O /app/models/en_US-lessac-high.onnx \
|
||||||
|
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.onnx" && \
|
||||||
|
wget -q -O /app/models/en_US-lessac-high.onnx.json \
|
||||||
|
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.onnx.json"
|
||||||
|
|
||||||
# Copy application
|
# Copy application
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
|
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import FileResponse, Response
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from storage import StorageClient
|
from storage import StorageClient
|
||||||
@@ -21,6 +23,7 @@ MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "breakpilot")
|
|||||||
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "breakpilot123")
|
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "breakpilot123")
|
||||||
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
||||||
PIPER_MODEL_PATH = os.getenv("PIPER_MODEL_PATH", "/app/models/de_DE-thorsten-high.onnx")
|
PIPER_MODEL_PATH = os.getenv("PIPER_MODEL_PATH", "/app/models/de_DE-thorsten-high.onnx")
|
||||||
|
PIPER_MODEL_EN_PATH = os.getenv("PIPER_MODEL_EN_PATH", "/app/models/en_US-lessac-high.onnx")
|
||||||
|
|
||||||
AUDIO_BUCKET = "compliance-training-audio"
|
AUDIO_BUCKET = "compliance-training-audio"
|
||||||
VIDEO_BUCKET = "compliance-training-video"
|
VIDEO_BUCKET = "compliance-training-video"
|
||||||
@@ -28,6 +31,7 @@ VIDEO_BUCKET = "compliance-training-video"
|
|||||||
# Initialize services
|
# Initialize services
|
||||||
storage = StorageClient(MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, secure=MINIO_SECURE)
|
storage = StorageClient(MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, secure=MINIO_SECURE)
|
||||||
tts = PiperTTS(PIPER_MODEL_PATH)
|
tts = PiperTTS(PIPER_MODEL_PATH)
|
||||||
|
tts_en = PiperTTS(PIPER_MODEL_EN_PATH) if os.path.exists(PIPER_MODEL_EN_PATH) else None
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
@@ -104,16 +108,108 @@ async def health():
|
|||||||
@app.get("/voices")
|
@app.get("/voices")
|
||||||
async def list_voices():
|
async def list_voices():
|
||||||
"""List available TTS voices."""
|
"""List available TTS voices."""
|
||||||
return {
|
voices = [
|
||||||
"voices": [
|
VoiceInfo(
|
||||||
VoiceInfo(
|
id="de_DE-thorsten-high",
|
||||||
id="de_DE-thorsten-high",
|
language="de",
|
||||||
language="de",
|
name="Thorsten (High Quality)",
|
||||||
name="Thorsten (High Quality)",
|
quality="high",
|
||||||
quality="high",
|
),
|
||||||
),
|
]
|
||||||
],
|
if tts_en is not None:
|
||||||
}
|
voices.append(VoiceInfo(
|
||||||
|
id="en_US-lessac-high",
|
||||||
|
language="en",
|
||||||
|
name="Lessac (High Quality)",
|
||||||
|
quality="high",
|
||||||
|
))
|
||||||
|
return {"voices": voices}
|
||||||
|
|
||||||
|
|
||||||
|
class SynthesizeDirectRequest(BaseModel):
|
||||||
|
text: str
|
||||||
|
language: str = "de"
|
||||||
|
|
||||||
|
|
||||||
|
# Simple disk cache for synthesized audio (avoids re-synthesis of same text)
|
||||||
|
TTS_CACHE_DIR = "/tmp/tts-cache"
|
||||||
|
os.makedirs(TTS_CACHE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
EDGE_TTS_VOICES = {
|
||||||
|
"de": "de-DE-ConradNeural",
|
||||||
|
"en": "en-US-GuyNeural",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _edge_tts_synthesize(text: str, language: str, output_path: str) -> bool:
|
||||||
|
"""Synthesize using Edge TTS (Microsoft Neural Voices). Returns True on success."""
|
||||||
|
try:
|
||||||
|
import edge_tts
|
||||||
|
voice = EDGE_TTS_VOICES.get(language, EDGE_TTS_VOICES["de"])
|
||||||
|
communicate = edge_tts.Communicate(text, voice)
|
||||||
|
await communicate.save(output_path)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Edge TTS failed, falling back to Piper: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/synthesize-direct")
|
||||||
|
async def synthesize_direct(req: SynthesizeDirectRequest):
|
||||||
|
"""Synthesize text and return MP3 audio directly (no MinIO upload).
|
||||||
|
|
||||||
|
Uses Edge TTS (Microsoft Neural Voices) for high-quality speech.
|
||||||
|
Falls back to Piper TTS if Edge TTS is unavailable (e.g. no internet).
|
||||||
|
Includes disk caching so identical text is only synthesized once.
|
||||||
|
"""
|
||||||
|
if not req.text.strip():
|
||||||
|
raise HTTPException(status_code=400, detail="Text is empty")
|
||||||
|
|
||||||
|
# Cache key based on text + language hash
|
||||||
|
text_hash = hashlib.sha256(f"{req.language}:{req.text}".encode()).hexdigest()[:16]
|
||||||
|
cache_path = os.path.join(TTS_CACHE_DIR, f"{text_hash}.mp3")
|
||||||
|
|
||||||
|
if os.path.exists(cache_path):
|
||||||
|
logger.info(f"TTS cache hit: {text_hash}")
|
||||||
|
return FileResponse(
|
||||||
|
cache_path,
|
||||||
|
media_type="audio/mpeg",
|
||||||
|
headers={"X-TTS-Cache": "hit"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Try Edge TTS first (high quality neural voices)
|
||||||
|
success = await _edge_tts_synthesize(req.text, req.language, cache_path)
|
||||||
|
|
||||||
|
if success and os.path.exists(cache_path):
|
||||||
|
size = os.path.getsize(cache_path)
|
||||||
|
logger.info(f"Edge TTS ({req.language}): {len(req.text)} chars, {size} bytes, cached as {text_hash}")
|
||||||
|
return FileResponse(
|
||||||
|
cache_path,
|
||||||
|
media_type="audio/mpeg",
|
||||||
|
headers={"X-TTS-Cache": "miss", "X-TTS-Engine": "edge"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fallback: Piper TTS
|
||||||
|
engine = tts
|
||||||
|
if req.language == "en" and tts_en is not None:
|
||||||
|
engine = tts_en
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
try:
|
||||||
|
mp3_path, duration = engine.synthesize_to_mp3(req.text, tmpdir)
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(mp3_path, cache_path)
|
||||||
|
logger.info(f"Piper TTS ({req.language}): {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"TTS synthesis failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
cache_path,
|
||||||
|
media_type="audio/mpeg",
|
||||||
|
headers={"X-TTS-Cache": "miss", "X-TTS-Engine": "piper"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/presigned-url", response_model=PresignedURLResponse)
|
@app.post("/presigned-url", response_model=PresignedURLResponse)
|
||||||
|
|||||||
@@ -3,3 +3,4 @@ uvicorn[standard]==0.27.1
|
|||||||
boto3==1.34.25
|
boto3==1.34.25
|
||||||
python-multipart==0.0.6
|
python-multipart==0.0.6
|
||||||
pydantic==2.6.1
|
pydantic==2.6.1
|
||||||
|
edge-tts==6.1.12
|
||||||
|
|||||||
Reference in New Issue
Block a user