"""PPTX text extraction using python-pptx.""" from pptx import Presentation def extract_pptx(file_path: str) -> str: """Extract text from a PPTX file.""" prs = Presentation(file_path) slides = [] for i, slide in enumerate(prs.slides, 1): texts = [] for shape in slide.shapes: if shape.has_text_frame: for para in shape.text_frame.paragraphs: text = para.text.strip() if text: texts.append(text) if texts: slides.append(f"[Folie {i}]\n" + "\n".join(texts)) return "\n\n".join(slides)