New standalone Python/FastAPI service for automatic compliance document scanning, LLM-based classification, IPFS archival, and gap analysis. Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier, compliance matrix, and full REST API on port 8098. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
37 lines
1.2 KiB
Python
37 lines
1.2 KiB
Python
"""Tests for keyword fallback classifier."""
|
|
|
|
import pytest
|
|
from classifiers.keyword_fallback import keyword_classify
|
|
|
|
|
|
def test_vvt_detection():
|
|
text = "Verzeichnis von Verarbeitungstaetigkeiten gemaess Art. 30 DSGVO"
|
|
result = keyword_classify(text, "vvt.pdf")
|
|
assert result["classification"] == "VVT"
|
|
assert result["confidence"] <= 0.3
|
|
|
|
|
|
def test_tom_detection():
|
|
text = "Technisch-organisatorische Massnahmen: Zutrittskontrolle, Zugangskontrolle, Verschluesselungskonzept"
|
|
result = keyword_classify(text, "toms.docx")
|
|
assert result["classification"] == "TOM"
|
|
|
|
|
|
def test_dse_detection():
|
|
text = "Datenschutzerklaerung: Informationspflichten nach Art. 13 DSGVO"
|
|
result = keyword_classify(text, "datenschutz.pdf")
|
|
assert result["classification"] == "DSE"
|
|
|
|
|
|
def test_unknown_document():
|
|
text = "Lorem ipsum dolor sit amet"
|
|
result = keyword_classify(text, "random.pdf")
|
|
assert result["classification"] == "Sonstiges"
|
|
assert result["confidence"] == 0.1
|
|
|
|
|
|
def test_confidence_capped():
|
|
text = "Verarbeitungsverzeichnis Art. 30 Kategorie betroffener Personen Datenkategorien Zweck der Verarbeitung"
|
|
result = keyword_classify(text, "vvt_complete.pdf")
|
|
assert result["confidence"] <= 0.3
|