All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 37s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 18s
Phase A: PostgreSQL State Store (sdk_states Tabelle, InMemory-Fallback) Phase B: Modules dynamisch vom Backend, Scope DB-Persistenz, Source Policy State Phase C: UCCA Frontend (3 Seiten, Wizard, RiskScoreGauge), Obligations Live-Daten Phase D: Document Import (PDF/LLM/Gap-Analyse), System Screening (SBOM/OSV.dev) Phase E: Company Profile CRUD mit Audit-Logging Phase F: Tests (Python + TypeScript), flow-data.ts DB-Tabellen aktualisiert Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
124 lines
4.5 KiB
Python
124 lines
4.5 KiB
Python
"""Tests for Document Import routes (import_routes.py)."""
|
|
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch, AsyncMock
|
|
|
|
from compliance.api.import_routes import (
|
|
detect_document_type,
|
|
analyze_gaps,
|
|
extract_text_from_pdf,
|
|
)
|
|
|
|
|
|
class TestDetectDocumentType:
|
|
"""Tests for keyword-based document type detection."""
|
|
|
|
def test_dsfa_detection(self):
|
|
text = "Dies ist eine Datenschutz-Folgenabschaetzung (DSFA) nach Art. 35 DSGVO"
|
|
doc_type, confidence = detect_document_type(text)
|
|
assert doc_type == "DSFA"
|
|
assert confidence >= 0.5
|
|
|
|
def test_tom_detection(self):
|
|
text = "Technisch-organisatorische Massnahmen (TOM) zum Schutz personenbezogener Daten"
|
|
doc_type, confidence = detect_document_type(text)
|
|
assert doc_type == "TOM"
|
|
assert confidence >= 0.5
|
|
|
|
def test_vvt_detection(self):
|
|
text = "Verarbeitungsverzeichnis nach Art. 30 DSGVO - VVT processing activities"
|
|
doc_type, confidence = detect_document_type(text)
|
|
assert doc_type == "VVT"
|
|
assert confidence >= 0.5
|
|
|
|
def test_privacy_policy_detection(self):
|
|
text = "Datenschutzerklaerung - Privacy Policy fuer unsere Nutzer"
|
|
doc_type, confidence = detect_document_type(text)
|
|
assert doc_type == "PRIVACY_POLICY"
|
|
assert confidence >= 0.5
|
|
|
|
def test_unknown_document(self):
|
|
text = "Lorem ipsum dolor sit amet"
|
|
doc_type, confidence = detect_document_type(text)
|
|
assert doc_type == "OTHER"
|
|
assert confidence == 0.3
|
|
|
|
def test_empty_text(self):
|
|
doc_type, confidence = detect_document_type("")
|
|
assert doc_type == "OTHER"
|
|
assert confidence == 0.3
|
|
|
|
def test_confidence_increases_with_more_keywords(self):
|
|
text_single = "dsfa"
|
|
text_multi = "dsfa dpia datenschutz-folgenabschaetzung privacy impact"
|
|
_, conf_single = detect_document_type(text_single)
|
|
_, conf_multi = detect_document_type(text_multi)
|
|
assert conf_multi > conf_single
|
|
|
|
def test_confidence_capped_at_095(self):
|
|
text = "dsfa dpia datenschutz-folgenabschaetzung privacy impact assessment report analysis"
|
|
_, confidence = detect_document_type(text)
|
|
assert confidence <= 0.95
|
|
|
|
|
|
class TestAnalyzeGaps:
|
|
"""Tests for gap analysis rules."""
|
|
|
|
def test_ai_gap_detected(self):
|
|
text = "Wir setzen KI und AI in unserer Anwendung ein"
|
|
gaps = analyze_gaps(text, "OTHER")
|
|
# Should detect AI Act gap (missing risk classification)
|
|
ai_gaps = [g for g in gaps if g["category"] == "AI Act Compliance"]
|
|
assert len(ai_gaps) > 0
|
|
assert ai_gaps[0]["severity"] == "CRITICAL"
|
|
|
|
def test_no_gap_when_requirement_present(self):
|
|
text = "KI-System mit Risikoklassifizierung nach EU AI Act"
|
|
gaps = analyze_gaps(text, "OTHER")
|
|
ai_gaps = [g for g in gaps if g["category"] == "AI Act Compliance"]
|
|
assert len(ai_gaps) == 0
|
|
|
|
def test_tom_gap_detected(self):
|
|
text = "Cloud-basiertes SaaS-System mit KI-Funktionen"
|
|
gaps = analyze_gaps(text, "OTHER")
|
|
tom_gaps = [g for g in gaps if g["category"] == "TOMs"]
|
|
assert len(tom_gaps) > 0
|
|
|
|
def test_no_gaps_for_irrelevant_text(self):
|
|
text = "Ein einfacher Flyer ohne Datenbezug"
|
|
gaps = analyze_gaps(text, "OTHER")
|
|
assert len(gaps) == 0
|
|
|
|
def test_gap_has_required_fields(self):
|
|
text = "KI-System mit automatisierten Entscheidungen"
|
|
gaps = analyze_gaps(text, "OTHER")
|
|
assert len(gaps) > 0
|
|
for gap in gaps:
|
|
assert "id" in gap
|
|
assert "category" in gap
|
|
assert "severity" in gap
|
|
assert "regulation" in gap
|
|
assert "required_action" in gap
|
|
|
|
|
|
class TestExtractTextFromPdf:
|
|
"""Tests for PDF text extraction."""
|
|
|
|
def test_empty_bytes_returns_empty(self):
|
|
result = extract_text_from_pdf(b"")
|
|
assert result == ""
|
|
|
|
def test_invalid_pdf_returns_empty(self):
|
|
result = extract_text_from_pdf(b"not a pdf")
|
|
assert result == ""
|
|
|
|
@patch("compliance.api.import_routes.fitz")
|
|
def test_fitz_import_error(self, mock_fitz):
|
|
"""When fitz is not available, returns empty string."""
|
|
mock_fitz.open.side_effect = ImportError("No module")
|
|
# The actual function catches ImportError internally
|
|
result = extract_text_from_pdf(b"test")
|
|
# Since we mocked fitz at module level it will raise differently,
|
|
# but the function should handle it gracefully
|
|
assert isinstance(result, str)
|