Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
623
klausur-service/backend/tests/test_legal_templates.py
Normal file
623
klausur-service/backend/tests/test_legal_templates.py
Normal file
@@ -0,0 +1,623 @@
|
||||
"""
|
||||
Tests for Legal Templates RAG System.
|
||||
|
||||
Tests template_sources.py, github_crawler.py, legal_templates_ingestion.py,
|
||||
and the admin API endpoints for legal templates.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Template Sources Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestLicenseType:
|
||||
"""Tests for LicenseType enum."""
|
||||
|
||||
def test_license_types_exist(self):
|
||||
"""Test that all expected license types are defined."""
|
||||
from template_sources import LicenseType
|
||||
|
||||
assert LicenseType.PUBLIC_DOMAIN.value == "public_domain"
|
||||
assert LicenseType.CC0.value == "cc0"
|
||||
assert LicenseType.UNLICENSE.value == "unlicense"
|
||||
assert LicenseType.MIT.value == "mit"
|
||||
assert LicenseType.CC_BY_4.value == "cc_by_4"
|
||||
assert LicenseType.REUSE_NOTICE.value == "reuse_notice"
|
||||
|
||||
|
||||
class TestLicenseInfo:
|
||||
"""Tests for LicenseInfo dataclass."""
|
||||
|
||||
def test_license_info_creation(self):
|
||||
"""Test creating a LicenseInfo instance."""
|
||||
from template_sources import LicenseInfo, LicenseType
|
||||
|
||||
info = LicenseInfo(
|
||||
id=LicenseType.CC0,
|
||||
name="CC0 1.0 Universal",
|
||||
url="https://creativecommons.org/publicdomain/zero/1.0/",
|
||||
attribution_required=False,
|
||||
)
|
||||
|
||||
assert info.id == LicenseType.CC0
|
||||
assert info.attribution_required is False
|
||||
assert info.training_allowed is True
|
||||
assert info.output_allowed is True
|
||||
|
||||
def test_get_attribution_text_no_attribution(self):
|
||||
"""Test attribution text when not required."""
|
||||
from template_sources import LicenseInfo, LicenseType
|
||||
|
||||
info = LicenseInfo(
|
||||
id=LicenseType.CC0,
|
||||
name="CC0",
|
||||
url="https://example.com",
|
||||
attribution_required=False,
|
||||
)
|
||||
|
||||
result = info.get_attribution_text("Test Source", "https://test.com")
|
||||
assert result == ""
|
||||
|
||||
def test_get_attribution_text_with_template(self):
|
||||
"""Test attribution text with template."""
|
||||
from template_sources import LicenseInfo, LicenseType
|
||||
|
||||
info = LicenseInfo(
|
||||
id=LicenseType.MIT,
|
||||
name="MIT License",
|
||||
url="https://opensource.org/licenses/MIT",
|
||||
attribution_required=True,
|
||||
attribution_template="Based on [{source_name}]({source_url}) - MIT License",
|
||||
)
|
||||
|
||||
result = info.get_attribution_text("Test Source", "https://test.com")
|
||||
assert "Test Source" in result
|
||||
assert "https://test.com" in result
|
||||
|
||||
|
||||
class TestSourceConfig:
|
||||
"""Tests for SourceConfig dataclass."""
|
||||
|
||||
def test_source_config_creation(self):
|
||||
"""Test creating a SourceConfig instance."""
|
||||
from template_sources import SourceConfig, LicenseType
|
||||
|
||||
source = SourceConfig(
|
||||
name="test-source",
|
||||
license_type=LicenseType.CC0,
|
||||
template_types=["privacy_policy", "terms_of_service"],
|
||||
languages=["de", "en"],
|
||||
jurisdiction="DE",
|
||||
description="Test description",
|
||||
repo_url="https://github.com/test/repo",
|
||||
)
|
||||
|
||||
assert source.name == "test-source"
|
||||
assert source.license_type == LicenseType.CC0
|
||||
assert "privacy_policy" in source.template_types
|
||||
assert source.enabled is True
|
||||
|
||||
def test_source_config_license_info(self):
|
||||
"""Test getting license info from source config."""
|
||||
from template_sources import SourceConfig, LicenseType, LICENSES
|
||||
|
||||
source = SourceConfig(
|
||||
name="test-source",
|
||||
license_type=LicenseType.MIT,
|
||||
template_types=["privacy_policy"],
|
||||
languages=["en"],
|
||||
jurisdiction="US",
|
||||
description="Test",
|
||||
)
|
||||
|
||||
info = source.license_info
|
||||
assert info.id == LicenseType.MIT
|
||||
assert info.attribution_required is True
|
||||
|
||||
|
||||
class TestTemplateSources:
|
||||
"""Tests for TEMPLATE_SOURCES list."""
|
||||
|
||||
def test_template_sources_not_empty(self):
|
||||
"""Test that template sources are defined."""
|
||||
from template_sources import TEMPLATE_SOURCES
|
||||
|
||||
assert len(TEMPLATE_SOURCES) > 0
|
||||
|
||||
def test_github_site_policy_exists(self):
|
||||
"""Test that github-site-policy source exists."""
|
||||
from template_sources import TEMPLATE_SOURCES
|
||||
|
||||
source = next((s for s in TEMPLATE_SOURCES if s.name == "github-site-policy"), None)
|
||||
assert source is not None
|
||||
assert source.repo_url == "https://github.com/github/site-policy"
|
||||
|
||||
def test_enabled_sources(self):
|
||||
"""Test getting enabled sources."""
|
||||
from template_sources import get_enabled_sources
|
||||
|
||||
enabled = get_enabled_sources()
|
||||
assert all(s.enabled for s in enabled)
|
||||
|
||||
def test_sources_by_priority(self):
|
||||
"""Test getting sources by priority."""
|
||||
from template_sources import get_sources_by_priority
|
||||
|
||||
# Priority 1 sources only
|
||||
p1 = get_sources_by_priority(1)
|
||||
assert all(s.priority == 1 for s in p1)
|
||||
|
||||
# Priority 1-2 sources
|
||||
p2 = get_sources_by_priority(2)
|
||||
assert all(s.priority <= 2 for s in p2)
|
||||
|
||||
def test_sources_by_license(self):
|
||||
"""Test getting sources by license type."""
|
||||
from template_sources import get_sources_by_license, LicenseType
|
||||
|
||||
cc0_sources = get_sources_by_license(LicenseType.CC0)
|
||||
assert all(s.license_type == LicenseType.CC0 for s in cc0_sources)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GitHub Crawler Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestMarkdownParser:
|
||||
"""Tests for MarkdownParser class."""
|
||||
|
||||
def test_parse_simple_markdown(self):
|
||||
"""Test parsing simple markdown content."""
|
||||
from github_crawler import MarkdownParser
|
||||
|
||||
content = """# Test Title
|
||||
|
||||
This is some content.
|
||||
|
||||
## Section 1
|
||||
|
||||
More content here.
|
||||
"""
|
||||
doc = MarkdownParser.parse(content, "test.md")
|
||||
|
||||
assert doc.title == "Test Title"
|
||||
assert doc.file_type == "markdown"
|
||||
assert "content" in doc.text
|
||||
|
||||
def test_extract_title_from_heading(self):
|
||||
"""Test extracting title from h1 heading."""
|
||||
from github_crawler import MarkdownParser
|
||||
|
||||
title = MarkdownParser._extract_title("# My Document\n\nContent", "fallback.md")
|
||||
assert title == "My Document"
|
||||
|
||||
def test_extract_title_fallback(self):
|
||||
"""Test fallback to filename when no heading."""
|
||||
from github_crawler import MarkdownParser
|
||||
|
||||
title = MarkdownParser._extract_title("No heading here", "my-document.md")
|
||||
assert title == "My Document"
|
||||
|
||||
def test_detect_german_language(self):
|
||||
"""Test German language detection."""
|
||||
from github_crawler import MarkdownParser
|
||||
|
||||
german_text = "Dies ist eine Datenschutzerklaerung fuer die Verarbeitung personenbezogener Daten."
|
||||
lang = MarkdownParser._detect_language(german_text)
|
||||
assert lang == "de"
|
||||
|
||||
def test_detect_english_language(self):
|
||||
"""Test English language detection."""
|
||||
from github_crawler import MarkdownParser
|
||||
|
||||
english_text = "This is a privacy policy for processing personal data in our application."
|
||||
lang = MarkdownParser._detect_language(english_text)
|
||||
assert lang == "en"
|
||||
|
||||
def test_find_placeholders(self):
|
||||
"""Test finding placeholder patterns."""
|
||||
from github_crawler import MarkdownParser
|
||||
|
||||
content = "Company: [COMPANY_NAME], Contact: {email}, Address: __ADDRESS__"
|
||||
placeholders = MarkdownParser._find_placeholders(content)
|
||||
|
||||
assert "[COMPANY_NAME]" in placeholders
|
||||
assert "{email}" in placeholders
|
||||
assert "__ADDRESS__" in placeholders
|
||||
|
||||
|
||||
class TestHTMLParser:
|
||||
"""Tests for HTMLParser class."""
|
||||
|
||||
def test_parse_simple_html(self):
|
||||
"""Test parsing simple HTML content."""
|
||||
from github_crawler import HTMLParser
|
||||
|
||||
content = """<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Test Page</title></head>
|
||||
<body>
|
||||
<h1>Welcome</h1>
|
||||
<p>This is content.</p>
|
||||
</body>
|
||||
</html>"""
|
||||
doc = HTMLParser.parse(content, "test.html")
|
||||
|
||||
assert doc.title == "Test Page"
|
||||
assert doc.file_type == "html"
|
||||
assert "Welcome" in doc.text
|
||||
assert "content" in doc.text
|
||||
|
||||
def test_html_to_text_removes_scripts(self):
|
||||
"""Test that scripts are removed from HTML."""
|
||||
from github_crawler import HTMLParser
|
||||
|
||||
html = "<p>Text</p><script>alert('bad');</script><p>More</p>"
|
||||
text = HTMLParser._html_to_text(html)
|
||||
|
||||
assert "alert" not in text
|
||||
assert "Text" in text
|
||||
assert "More" in text
|
||||
|
||||
|
||||
class TestJSONParser:
|
||||
"""Tests for JSONParser class."""
|
||||
|
||||
def test_parse_simple_json(self):
|
||||
"""Test parsing simple JSON content."""
|
||||
from github_crawler import JSONParser
|
||||
|
||||
content = json.dumps({
|
||||
"title": "Privacy Policy",
|
||||
"text": "This is the privacy policy content.",
|
||||
"language": "en",
|
||||
})
|
||||
|
||||
docs = JSONParser.parse(content, "policy.json")
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].title == "Privacy Policy"
|
||||
assert "privacy policy content" in docs[0].text
|
||||
|
||||
def test_parse_nested_json(self):
|
||||
"""Test parsing nested JSON structures."""
|
||||
from github_crawler import JSONParser
|
||||
|
||||
content = json.dumps({
|
||||
"sections": {
|
||||
"intro": {"title": "Introduction", "text": "Welcome text"},
|
||||
"data": {"title": "Data Collection", "text": "Collection info"},
|
||||
}
|
||||
})
|
||||
|
||||
docs = JSONParser.parse(content, "nested.json")
|
||||
# Should extract nested documents
|
||||
assert len(docs) >= 2
|
||||
|
||||
|
||||
class TestExtractedDocument:
|
||||
"""Tests for ExtractedDocument dataclass."""
|
||||
|
||||
def test_extracted_document_hash(self):
|
||||
"""Test that source hash is auto-generated."""
|
||||
from github_crawler import ExtractedDocument
|
||||
|
||||
doc = ExtractedDocument(
|
||||
text="Some content",
|
||||
title="Test",
|
||||
file_path="test.md",
|
||||
file_type="markdown",
|
||||
source_url="https://example.com",
|
||||
)
|
||||
|
||||
assert doc.source_hash != ""
|
||||
assert len(doc.source_hash) == 64 # SHA256 hex
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Legal Templates Ingestion Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestLegalTemplatesIngestion:
|
||||
"""Tests for LegalTemplatesIngestion class."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_qdrant(self):
|
||||
"""Mock Qdrant client."""
|
||||
with patch('legal_templates_ingestion.QdrantClient') as mock:
|
||||
client = MagicMock()
|
||||
client.get_collections.return_value.collections = []
|
||||
mock.return_value = client
|
||||
yield client
|
||||
|
||||
@pytest.fixture
|
||||
def mock_http_client(self):
|
||||
"""Mock HTTP client for embeddings."""
|
||||
with patch('legal_templates_ingestion.httpx.AsyncClient') as mock:
|
||||
client = AsyncMock()
|
||||
mock.return_value = client
|
||||
yield client
|
||||
|
||||
def test_chunk_text_short(self):
|
||||
"""Test chunking short text."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
|
||||
with patch('legal_templates_ingestion.QdrantClient'):
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
chunks = ingestion._chunk_text("Short text", chunk_size=1000)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0] == "Short text"
|
||||
|
||||
def test_chunk_text_long(self):
|
||||
"""Test chunking long text."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
|
||||
with patch('legal_templates_ingestion.QdrantClient'):
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
|
||||
# Create text longer than chunk size
|
||||
long_text = "This is a sentence. " * 100
|
||||
chunks = ingestion._chunk_text(long_text, chunk_size=200, overlap=50)
|
||||
|
||||
assert len(chunks) > 1
|
||||
# Each chunk should be roughly chunk_size
|
||||
for chunk in chunks:
|
||||
assert len(chunk) <= 250 # Allow some buffer
|
||||
|
||||
def test_split_sentences(self):
|
||||
"""Test German sentence splitting."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
|
||||
with patch('legal_templates_ingestion.QdrantClient'):
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
text = "Dies ist Satz eins. Dies ist Satz zwei. Und Satz drei."
|
||||
sentences = ingestion._split_sentences(text)
|
||||
|
||||
assert len(sentences) == 3
|
||||
|
||||
def test_split_sentences_preserves_abbreviations(self):
|
||||
"""Test that abbreviations don't split sentences."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
|
||||
with patch('legal_templates_ingestion.QdrantClient'):
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
text = "Das ist z.B. ein Beispiel. Und noch ein Satz."
|
||||
sentences = ingestion._split_sentences(text)
|
||||
|
||||
assert len(sentences) == 2
|
||||
assert "z.B." in sentences[0] or "z.b." in sentences[0].lower()
|
||||
|
||||
def test_infer_template_type_privacy(self):
|
||||
"""Test inferring privacy policy type."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
from github_crawler import ExtractedDocument
|
||||
from template_sources import SourceConfig, LicenseType
|
||||
|
||||
with patch('legal_templates_ingestion.QdrantClient'):
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
|
||||
doc = ExtractedDocument(
|
||||
text="Diese Datenschutzerklaerung informiert Sie ueber die Verarbeitung personenbezogener Daten.",
|
||||
title="Datenschutz",
|
||||
file_path="privacy.md",
|
||||
file_type="markdown",
|
||||
source_url="https://example.com",
|
||||
)
|
||||
|
||||
source = SourceConfig(
|
||||
name="test",
|
||||
license_type=LicenseType.CC0,
|
||||
template_types=["privacy_policy"],
|
||||
languages=["de"],
|
||||
jurisdiction="DE",
|
||||
description="Test",
|
||||
)
|
||||
|
||||
template_type = ingestion._infer_template_type(doc, source)
|
||||
assert template_type == "privacy_policy"
|
||||
|
||||
def test_infer_clause_category(self):
|
||||
"""Test inferring clause category."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
|
||||
with patch('legal_templates_ingestion.QdrantClient'):
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
|
||||
# Test liability clause
|
||||
text = "Die Haftung des Anbieters ist auf grobe Fahrlässigkeit beschränkt."
|
||||
category = ingestion._infer_clause_category(text)
|
||||
assert category == "haftung"
|
||||
|
||||
# Test privacy clause
|
||||
text = "Wir verarbeiten personenbezogene Daten gemäß der DSGVO."
|
||||
category = ingestion._infer_clause_category(text)
|
||||
assert category == "datenschutz"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Admin API Templates Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestTemplatesAdminAPI:
|
||||
"""Tests for /api/v1/admin/templates/* endpoints."""
|
||||
|
||||
def test_templates_status_structure(self):
|
||||
"""Test the structure of templates status response."""
|
||||
from admin_api import _templates_ingestion_status
|
||||
|
||||
# Reset status
|
||||
_templates_ingestion_status["running"] = False
|
||||
_templates_ingestion_status["last_run"] = None
|
||||
_templates_ingestion_status["current_source"] = None
|
||||
_templates_ingestion_status["results"] = {}
|
||||
|
||||
assert _templates_ingestion_status["running"] is False
|
||||
assert _templates_ingestion_status["results"] == {}
|
||||
|
||||
def test_templates_status_running(self):
|
||||
"""Test status when ingestion is running."""
|
||||
from admin_api import _templates_ingestion_status
|
||||
|
||||
_templates_ingestion_status["running"] = True
|
||||
_templates_ingestion_status["current_source"] = "github-site-policy"
|
||||
_templates_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
|
||||
assert _templates_ingestion_status["running"] is True
|
||||
assert _templates_ingestion_status["current_source"] == "github-site-policy"
|
||||
|
||||
def test_templates_results_tracking(self):
|
||||
"""Test that ingestion results are tracked correctly."""
|
||||
from admin_api import _templates_ingestion_status
|
||||
|
||||
_templates_ingestion_status["results"] = {
|
||||
"github-site-policy": {
|
||||
"status": "completed",
|
||||
"documents_found": 15,
|
||||
"chunks_indexed": 42,
|
||||
"errors": [],
|
||||
},
|
||||
"opr-vc": {
|
||||
"status": "failed",
|
||||
"documents_found": 0,
|
||||
"chunks_indexed": 0,
|
||||
"errors": ["Connection timeout"],
|
||||
},
|
||||
}
|
||||
|
||||
results = _templates_ingestion_status["results"]
|
||||
assert results["github-site-policy"]["status"] == "completed"
|
||||
assert results["github-site-policy"]["chunks_indexed"] == 42
|
||||
assert results["opr-vc"]["status"] == "failed"
|
||||
assert len(results["opr-vc"]["errors"]) > 0
|
||||
|
||||
|
||||
class TestTemplateTypeLabels:
|
||||
"""Tests for template type labels and constants."""
|
||||
|
||||
def test_template_types_defined(self):
|
||||
"""Test that template types are properly defined."""
|
||||
from template_sources import TEMPLATE_TYPES
|
||||
|
||||
assert "privacy_policy" in TEMPLATE_TYPES
|
||||
assert "terms_of_service" in TEMPLATE_TYPES
|
||||
assert "cookie_banner" in TEMPLATE_TYPES
|
||||
assert "impressum" in TEMPLATE_TYPES
|
||||
assert "widerruf" in TEMPLATE_TYPES
|
||||
assert "dpa" in TEMPLATE_TYPES
|
||||
|
||||
def test_jurisdictions_defined(self):
|
||||
"""Test that jurisdictions are properly defined."""
|
||||
from template_sources import JURISDICTIONS
|
||||
|
||||
assert "DE" in JURISDICTIONS
|
||||
assert "AT" in JURISDICTIONS
|
||||
assert "CH" in JURISDICTIONS
|
||||
assert "EU" in JURISDICTIONS
|
||||
assert "US" in JURISDICTIONS
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Qdrant Service Templates Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestQdrantServiceTemplates:
|
||||
"""Tests for legal templates Qdrant service functions."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_qdrant_client(self):
|
||||
"""Mock Qdrant client for templates."""
|
||||
with patch('qdrant_service.get_qdrant_client') as mock:
|
||||
client = MagicMock()
|
||||
client.get_collections.return_value.collections = []
|
||||
mock.return_value = client
|
||||
yield client
|
||||
|
||||
def test_legal_templates_collection_name(self):
|
||||
"""Test that collection name is correct."""
|
||||
from qdrant_service import LEGAL_TEMPLATES_COLLECTION
|
||||
|
||||
assert LEGAL_TEMPLATES_COLLECTION == "bp_legal_templates"
|
||||
|
||||
def test_legal_templates_vector_size(self):
|
||||
"""Test that vector size is correct for BGE-M3."""
|
||||
from qdrant_service import LEGAL_TEMPLATES_VECTOR_SIZE
|
||||
|
||||
assert LEGAL_TEMPLATES_VECTOR_SIZE == 1024
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests (require mocking external services)
|
||||
# =============================================================================
|
||||
|
||||
class TestTemplatesIntegration:
|
||||
"""Integration tests for the templates system."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_all_services(self):
|
||||
"""Mock all external services."""
|
||||
with patch('legal_templates_ingestion.QdrantClient') as qdrant_mock, \
|
||||
patch('legal_templates_ingestion.httpx.AsyncClient') as http_mock:
|
||||
|
||||
qdrant = MagicMock()
|
||||
qdrant.get_collections.return_value.collections = []
|
||||
qdrant_mock.return_value = qdrant
|
||||
|
||||
http = AsyncMock()
|
||||
http.post.return_value.json.return_value = {"embeddings": [[0.1] * 1024]}
|
||||
http.post.return_value.raise_for_status = MagicMock()
|
||||
http_mock.return_value.__aenter__.return_value = http
|
||||
|
||||
yield {"qdrant": qdrant, "http": http}
|
||||
|
||||
def test_full_chunk_creation_pipeline(self, mock_all_services):
|
||||
"""Test the full chunk creation pipeline."""
|
||||
from legal_templates_ingestion import LegalTemplatesIngestion
|
||||
from github_crawler import ExtractedDocument
|
||||
from template_sources import SourceConfig, LicenseType
|
||||
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
|
||||
doc = ExtractedDocument(
|
||||
text="# Datenschutzerklaerung\n\nWir nehmen den Schutz Ihrer personenbezogenen Daten sehr ernst. Diese Datenschutzerklaerung informiert Sie ueber die Verarbeitung Ihrer Daten gemaess der DSGVO.",
|
||||
title="Datenschutzerklaerung",
|
||||
file_path="privacy.md",
|
||||
file_type="markdown",
|
||||
source_url="https://example.com/privacy.md",
|
||||
source_commit="abc123",
|
||||
placeholders=["[FIRMENNAME]"],
|
||||
language="de", # Explicitly set language
|
||||
)
|
||||
|
||||
source = SourceConfig(
|
||||
name="test-source",
|
||||
license_type=LicenseType.CC0,
|
||||
template_types=["privacy_policy"],
|
||||
languages=["de"],
|
||||
jurisdiction="DE",
|
||||
description="Test source",
|
||||
repo_url="https://github.com/test/repo",
|
||||
)
|
||||
|
||||
chunks = ingestion._create_chunks(doc, source)
|
||||
|
||||
assert len(chunks) >= 1
|
||||
assert chunks[0].template_type == "privacy_policy"
|
||||
assert chunks[0].language == "de"
|
||||
assert chunks[0].jurisdiction == "DE"
|
||||
assert chunks[0].license_id == "cc0"
|
||||
assert chunks[0].attribution_required is False
|
||||
assert "[FIRMENNAME]" in chunks[0].placeholders
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Runner Configuration
|
||||
# =============================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user