"""Tests for D3: Structural metadata flow (section priority, page in citation).""" import json from typing import Optional from services.rag_client import RAGSearchResult def _make_chunk( article: str = "", paragraph: str = "", page: Optional[int] = None, ) -> RAGSearchResult: return RAGSearchResult( text="Test chunk text", regulation_code="DSGVO", regulation_name="Datenschutz-Grundverordnung", regulation_short="DSGVO", category="data_protection", article=article, paragraph=paragraph, source_url="https://example.com", score=0.95, collection="bp_compliance_de", page=page, ) class TestRAGSearchResultPage: """RAGSearchResult now carries a page field.""" def test_page_default_none(self): chunk = _make_chunk() assert chunk.page is None def test_page_set(self): chunk = _make_chunk(page=42) assert chunk.page == 42 def test_page_zero(self): chunk = _make_chunk(page=0) assert chunk.page == 0 class TestQdrantPayloadPriority: """section (D2) should take priority over article (legacy).""" def test_section_preferred_over_article(self): payload = {"section": "§ 312k", "article": "Art. 312", "section_title": "Kuendigungsbutton"} article = payload.get("section", "") or payload.get("article", "") or payload.get("section_title", "") assert article == "§ 312k" def test_article_fallback_when_no_section(self): payload = {"section": "", "article": "Art. 35", "section_title": ""} article = payload.get("section", "") or payload.get("article", "") or payload.get("section_title", "") assert article == "Art. 35" def test_section_title_last_resort(self): payload = {"section": "", "article": "", "section_title": "Informationspflichten"} article = payload.get("section", "") or payload.get("article", "") or payload.get("section_title", "") assert article == "Informationspflichten" def test_all_empty(self): payload = {"section": "", "article": "", "section_title": ""} article = payload.get("section", "") or payload.get("article", "") or payload.get("section_title", "") assert article == "" def test_page_from_payload(self): payload = {"page": 847} assert payload.get("page") == 847 def test_page_none_from_payload(self): payload = {} assert payload.get("page") is None class TestSourceCitationPage: """source_citation dict should include page when available.""" def _build_citation(self, chunk: RAGSearchResult) -> dict: """Mirrors the citation-building logic from control_generator.py.""" return { "source": chunk.regulation_name, "article": chunk.article, "paragraph": chunk.paragraph, "page": chunk.page, "license": "free_use", "source_type": "law", "url": chunk.source_url or "", } def test_citation_with_page(self): chunk = _make_chunk(article="§ 312k", paragraph="Abs. 1", page=847) citation = self._build_citation(chunk) assert citation["page"] == 847 def test_citation_without_page(self): chunk = _make_chunk(article="§ 312k", paragraph="Abs. 1") citation = self._build_citation(chunk) assert citation["page"] is None def test_citation_serializable(self): chunk = _make_chunk(article="Art. 35", page=12) citation = self._build_citation(chunk) serialized = json.dumps(citation) restored = json.loads(serialized) assert restored["page"] == 12 class TestFormatCitation: """_format_citation should include page number.""" def _format_citation(self, citation) -> str: """Mirrors _format_citation from decomposition_pass.py.""" if not citation: return "" if isinstance(citation, str): try: c = json.loads(citation) if isinstance(c, dict): parts = [] if c.get("source"): parts.append(c["source"]) if c.get("article"): parts.append(c["article"]) if c.get("paragraph"): parts.append(c["paragraph"]) if c.get("page") is not None: parts.append(f"S. {c['page']}") return " ".join(parts) if parts else citation except (json.JSONDecodeError, TypeError): return citation return str(citation) def test_format_with_page(self): citation = json.dumps({ "source": "DSGVO", "article": "Art. 35", "paragraph": "Abs. 1", "page": 42, }) result = self._format_citation(citation) assert result == "DSGVO Art. 35 Abs. 1 S. 42" def test_format_without_page(self): citation = json.dumps({ "source": "BGB", "article": "§ 312k", "paragraph": "", }) result = self._format_citation(citation) assert result == "BGB § 312k" def test_format_page_zero(self): citation = json.dumps({ "source": "BGB", "article": "§ 1", "paragraph": "", "page": 0, }) result = self._format_citation(citation) assert result == "BGB § 1 S. 0" def test_format_empty_citation(self): assert self._format_citation("") == "" assert self._format_citation(None) == ""