"""DOCX text extraction using python-docx.""" from docx import Document def extract_docx(file_path: str) -> str: """Extract text from a DOCX file.""" doc = Document(file_path) paragraphs = [p.text for p in doc.paragraphs if p.text.strip()] # Also extract from tables for table in doc.tables: for row in table.rows: cells = [cell.text.strip() for cell in row.cells if cell.text.strip()] if cells: paragraphs.append(" | ".join(cells)) return "\n\n".join(paragraphs)