New standalone Python/FastAPI service for automatic compliance document scanning, LLM-based classification, IPFS archival, and gap analysis. Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier, compliance matrix, and full REST API on port 8098. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
17 lines
421 B
Python
17 lines
421 B
Python
"""Tests for document text extractors."""
|
|
|
|
import pytest
|
|
from extractors.dispatcher import extract_text, EXTRACTORS
|
|
|
|
|
|
def test_supported_extensions():
|
|
assert ".pdf" in EXTRACTORS
|
|
assert ".docx" in EXTRACTORS
|
|
assert ".xlsx" in EXTRACTORS
|
|
assert ".pptx" in EXTRACTORS
|
|
|
|
|
|
def test_unsupported_extension():
|
|
with pytest.raises(ValueError, match="Unsupported"):
|
|
extract_text("/tmp/test.txt", ".txt")
|