breakpilot-compliance/document-crawler/tests/test_extractors.py

"""Tests for document text extractors."""

import pytest
from extractors.dispatcher import extract_text, EXTRACTORS


def test_supported_extensions():
    assert ".pdf" in EXTRACTORS
    assert ".docx" in EXTRACTORS
    assert ".xlsx" in EXTRACTORS
    assert ".pptx" in EXTRACTORS


def test_unsupported_extension():
    with pytest.raises(ValueError, match="Unsupported"):
        extract_text("/tmp/test.txt", ".txt")