"""Tests for Document Import routes (import_routes.py).""" import pytest from unittest.mock import MagicMock, patch, AsyncMock from compliance.api.import_routes import ( detect_document_type, analyze_gaps, extract_text_from_pdf, ) class TestDetectDocumentType: """Tests for keyword-based document type detection.""" def test_dsfa_detection(self): text = "Dies ist eine Datenschutz-Folgenabschaetzung (DSFA) nach Art. 35 DSGVO" doc_type, confidence = detect_document_type(text) assert doc_type == "DSFA" assert confidence >= 0.5 def test_tom_detection(self): text = "Technisch-organisatorische Massnahmen (TOM) zum Schutz personenbezogener Daten" doc_type, confidence = detect_document_type(text) assert doc_type == "TOM" assert confidence >= 0.5 def test_vvt_detection(self): text = "Verarbeitungsverzeichnis nach Art. 30 DSGVO - VVT processing activities" doc_type, confidence = detect_document_type(text) assert doc_type == "VVT" assert confidence >= 0.5 def test_privacy_policy_detection(self): text = "Datenschutzerklaerung - Privacy Policy fuer unsere Nutzer" doc_type, confidence = detect_document_type(text) assert doc_type == "PRIVACY_POLICY" assert confidence >= 0.5 def test_unknown_document(self): text = "Lorem ipsum dolor sit amet" doc_type, confidence = detect_document_type(text) assert doc_type == "OTHER" assert confidence == 0.3 def test_empty_text(self): doc_type, confidence = detect_document_type("") assert doc_type == "OTHER" assert confidence == 0.3 def test_confidence_increases_with_more_keywords(self): text_single = "dsfa" text_multi = "dsfa dpia datenschutz-folgenabschaetzung privacy impact" _, conf_single = detect_document_type(text_single) _, conf_multi = detect_document_type(text_multi) assert conf_multi > conf_single def test_confidence_capped_at_095(self): text = "dsfa dpia datenschutz-folgenabschaetzung privacy impact assessment report analysis" _, confidence = detect_document_type(text) assert confidence <= 0.95 class TestAnalyzeGaps: """Tests for gap analysis rules.""" def test_ai_gap_detected(self): text = "Wir setzen KI und AI in unserer Anwendung ein" gaps = analyze_gaps(text, "OTHER") # Should detect AI Act gap (missing risk classification) ai_gaps = [g for g in gaps if g["category"] == "AI Act Compliance"] assert len(ai_gaps) > 0 assert ai_gaps[0]["severity"] == "CRITICAL" def test_no_gap_when_requirement_present(self): text = "KI-System mit Risikoklassifizierung nach EU AI Act" gaps = analyze_gaps(text, "OTHER") ai_gaps = [g for g in gaps if g["category"] == "AI Act Compliance"] assert len(ai_gaps) == 0 def test_tom_gap_detected(self): text = "Cloud-basiertes SaaS-System mit KI-Funktionen" gaps = analyze_gaps(text, "OTHER") tom_gaps = [g for g in gaps if g["category"] == "TOMs"] assert len(tom_gaps) > 0 def test_no_gaps_for_irrelevant_text(self): text = "Ein einfacher Flyer ohne Datenbezug" gaps = analyze_gaps(text, "OTHER") assert len(gaps) == 0 def test_gap_has_required_fields(self): text = "KI-System mit automatisierten Entscheidungen" gaps = analyze_gaps(text, "OTHER") assert len(gaps) > 0 for gap in gaps: assert "id" in gap assert "category" in gap assert "severity" in gap assert "regulation" in gap assert "required_action" in gap class TestExtractTextFromPdf: """Tests for PDF text extraction.""" def test_empty_bytes_returns_empty(self): result = extract_text_from_pdf(b"") assert result == "" def test_invalid_pdf_returns_empty(self): result = extract_text_from_pdf(b"not a pdf") assert result == "" @patch("compliance.api.import_routes.fitz") def test_fitz_import_error(self, mock_fitz): """When fitz is not available, returns empty string.""" mock_fitz.open.side_effect = ImportError("No module") # The actual function catches ImportError internally result = extract_text_from_pdf(b"test") # Since we mocked fitz at module level it will raise differently, # but the function should handle it gracefully assert isinstance(result, str)