"""Routes files to the appropriate extractor by extension.""" from .pdf_extractor import extract_pdf from .docx_extractor import extract_docx from .xlsx_extractor import extract_xlsx from .pptx_extractor import extract_pptx EXTRACTORS = { ".pdf": extract_pdf, ".docx": extract_docx, ".xlsx": extract_xlsx, ".pptx": extract_pptx, } def extract_text(file_path: str, extension: str) -> str: """Extract text from a file based on its extension. Returns extracted text or raises ValueError for unsupported types. """ ext = extension.lower() extractor = EXTRACTORS.get(ext) if extractor is None: raise ValueError(f"Unsupported file extension: {ext}") return extractor(file_path)