diff --git a/compliance-tts-service/main.py b/compliance-tts-service/main.py
index f01cde2..1b4ba7c 100644
--- a/compliance-tts-service/main.py
+++ b/compliance-tts-service/main.py
@@ -141,11 +141,16 @@ EDGE_TTS_VOICES = {
"en": "en-US-GuyNeural",
}
+# Matches word markers used to force English pronunciation in German text
+_EN_TAG_RE = re.compile(r'(.*?)', re.DOTALL)
+
async def _edge_tts_synthesize(text: str, language: str, output_path: str) -> bool:
- """Synthesize using Edge TTS (Microsoft Neural Voices). Returns True on success."""
+ """Synthesize using Edge TTS. Handles … mixed-language markers."""
try:
import edge_tts
+ if '' in text:
+ return await _edge_tts_mixed(text, language, output_path)
voice = EDGE_TTS_VOICES.get(language, EDGE_TTS_VOICES["de"])
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
@@ -155,6 +160,45 @@ async def _edge_tts_synthesize(text: str, language: str, output_path: str) -> bo
return False
+async def _edge_tts_mixed(text: str, base_lang: str, output_path: str) -> bool:
+ """Split on tags, synthesise each segment with the right voice, concat."""
+ import edge_tts, shutil, tempfile as tf
+ segments: list[tuple[str, str]] = []
+ last = 0
+ for m in _EN_TAG_RE.finditer(text):
+ if m.start() > last:
+ segments.append((base_lang, text[last:m.start()].strip()))
+ segments.append(("en", m.group(1).strip()))
+ last = m.end()
+ if last < len(text):
+ segments.append((base_lang, text[last:].strip()))
+ segments = [(lang, t) for lang, t in segments if t]
+
+ tmpdir = tf.mkdtemp()
+ try:
+ seg_files = []
+ for i, (lang, seg_text) in enumerate(segments):
+ voice = EDGE_TTS_VOICES.get(lang, EDGE_TTS_VOICES["de"])
+ seg_path = os.path.join(tmpdir, f"seg_{i:04d}.mp3")
+ await edge_tts.Communicate(seg_text, voice).save(seg_path)
+ seg_files.append(seg_path)
+
+ list_file = os.path.join(tmpdir, "list.txt")
+ with open(list_file, "w") as f:
+ for sf in seg_files:
+ f.write(f"file '{sf}'\n")
+ proc = subprocess.run(
+ ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", output_path],
+ capture_output=True, text=True, timeout=60,
+ )
+ return proc.returncode == 0
+ except Exception as e:
+ logger.warning(f"Mixed TTS failed: {e}")
+ return False
+ finally:
+ shutil.rmtree(tmpdir, ignore_errors=True)
+
+
@app.post("/synthesize-direct")
async def synthesize_direct(req: SynthesizeDirectRequest):
"""Synthesize text and return MP3 audio directly (no MinIO upload).