""" Tests for the version-chain diff endpoint added in DSMS Stufe 3. Mocks ipfs_cat so the test does not require a running IPFS node. """ import base64 import json from unittest.mock import AsyncMock, patch import pytest from fastapi.testclient import TestClient from main import app client = TestClient(app) def _wrap(metadata: dict, content_text: str) -> str: """Mimic the JSON envelope that routers.documents.ipfs_cat returns.""" return json.dumps( { "metadata": metadata, "content_base64": base64.b64encode(content_text.encode("utf-8")).decode("ascii"), } ) @pytest.mark.asyncio async def test_diff_text_documents_returns_unified_diff(): pkg_a = _wrap({"version": "1", "document_type": "ce_techfile"}, "alpha\nbeta\ngamma\n") pkg_b = _wrap({"version": "2", "document_type": "ce_techfile"}, "alpha\nDELTA\ngamma\n") async def fake_cat(cid: str): return pkg_a if cid == "cidA" else pkg_b with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/cidA/diff/cidB") assert resp.status_code == 200 body = resp.json() assert body["kind"] == "text" assert body["cid_a"] == "cidA" assert body["cid_b"] == "cidB" assert body["added_lines"] >= 1 assert body["removed_lines"] >= 1 assert "DELTA" in body["diff"] assert body["metadata_diff"] == {"version": {"old": "1", "new": "2"}} @pytest.mark.asyncio async def test_diff_binary_documents_returns_metadata_only(): # Use raw bytes that are not utf-8 decodable invalid_utf8 = b"\xff\xfe\xfd\xfc" pkg_a = json.dumps( {"metadata": {"version": "1"}, "content_base64": base64.b64encode(invalid_utf8).decode()} ) pkg_b = json.dumps( {"metadata": {"version": "2"}, "content_base64": base64.b64encode(invalid_utf8 + b"\x00").decode()} ) async def fake_cat(cid: str): return pkg_a if cid == "cidA" else pkg_b with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/cidA/diff/cidB") assert resp.status_code == 200 body = resp.json() assert body["kind"] == "binary" assert body["metadata_diff"] == {"version": {"old": "1", "new": "2"}} @pytest.mark.asyncio async def test_diff_handles_fetch_error(): async def fake_cat(cid: str): raise RuntimeError("not pinned") with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/cidA/diff/cidB") assert resp.status_code == 200 body = resp.json() assert "error" in body assert body["cid_a"] == "cidA" assert body["cid_b"] == "cidB" @pytest.mark.asyncio async def test_history_endpoint_follows_parent_chain(): """Sanity check that the existing history endpoint still works after route alias.""" chain = { "v3": _wrap({"version": "3", "parent_cid": "v2"}, "x"), "v2": _wrap({"version": "2", "parent_cid": "v1"}, "x"), "v1": _wrap({"version": "1", "parent_cid": None}, "x"), } async def fake_cat(cid: str): return chain[cid] with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/v3/history") assert resp.status_code == 200 body = resp.json() assert body["depth"] == 3 assert [h["version"] for h in body["history"]] == ["3", "2", "1"] @pytest.mark.asyncio async def test_bulk_diff_aggregates_text_chain(): """Bulk-diff walks the chain V1→V_latest and sums per-step additions/removals.""" chain = { "v3": _wrap({"version": "3", "parent_cid": "v2"}, "alpha\nbeta\nGAMMA-CHANGED\n"), "v2": _wrap({"version": "2", "parent_cid": "v1"}, "alpha\nbeta\ngamma\n"), "v1": _wrap({"version": "1", "parent_cid": None}, "alpha\ngamma\n"), } async def fake_cat(cid: str): return chain[cid] with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/v3/bulk-diff") assert resp.status_code == 200 body = resp.json() assert body["cid_latest"] == "v3" assert body["cid_baseline"] == "v1" assert body["versions"] == 3 assert len(body["steps"]) == 2 # Step ordering must be chronological (oldest pair first). assert body["steps"][0]["from_version"] == "1" assert body["steps"][0]["to_version"] == "2" assert body["steps"][1]["from_version"] == "2" assert body["steps"][1]["to_version"] == "3" # All steps are text — totals must include real added/removed counts. assert body["totals"]["added_lines"] > 0 assert body["totals"]["binary_steps"] == 0 # Each step bumped the "version" metadata field — collected globally. assert body["totals"]["metadata_fields_changed"] >= 1 @pytest.mark.asyncio async def test_bulk_diff_single_version_returns_empty_steps(): """A CID without a parent_cid is a baseline — no steps to aggregate.""" chain = {"only": _wrap({"version": "1", "parent_cid": None}, "alpha\n")} async def fake_cat(cid: str): return chain[cid] with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/only/bulk-diff") assert resp.status_code == 200 body = resp.json() assert body["versions"] == 1 assert body["steps"] == [] assert body["totals"]["added_lines"] == 0 assert body["totals"]["binary_steps"] == 0 assert "note" in body @pytest.mark.asyncio async def test_bulk_diff_handles_binary_step(): """Binary diffs cannot be line-counted — bump binary_steps, do not crash.""" binary = b"\xff\xfe\xfd" chain = { "v2": json.dumps({ "metadata": {"version": "2", "parent_cid": "v1"}, "content_base64": base64.b64encode(binary + b"\x00").decode(), }), "v1": json.dumps({ "metadata": {"version": "1", "parent_cid": None}, "content_base64": base64.b64encode(binary).decode(), }), } async def fake_cat(cid: str): return chain[cid] with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/v2/bulk-diff") assert resp.status_code == 200 body = resp.json() assert body["versions"] == 2 assert body["totals"]["binary_steps"] == 1 assert body["steps"][0]["kind"] == "binary" assert body["steps"][0]["added_lines"] == 0 @pytest.mark.asyncio async def test_bulk_diff_stops_on_fetch_error(): """If one CID in the chain is unreachable, return what we have — do not raise.""" chain = { "v2": _wrap({"version": "2", "parent_cid": "v1-missing"}, "alpha\n"), } async def fake_cat(cid: str): if cid not in chain: raise RuntimeError("not pinned") return chain[cid] with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): resp = client.get("/api/v1/documents/v2/bulk-diff") assert resp.status_code == 200 body = resp.json() # Only v2 was readable — chain is effectively length 1. assert body["versions"] == 1 assert body["steps"] == []