feat(dsms): version chain history + diff endpoint + Audit Timeline UI
DSMS Stufe 3 — making the parent_cid chain useful end-to-end.
Gateway (dsms-gateway):
- /api/v1/documents/{cid}/history alias added next to the legacy
/documents/{cid}/history (history endpoint itself was already there,
just under an inconsistent prefix).
- NEW /api/v1/documents/{cid_a}/diff/{cid_b}: fetches both packages from
IPFS, computes a metadata diff (per-field old/new), and renders a
unified text diff for utf-8 payloads. Binary payloads return only
metadata diff with a "binary — compare via rendered export" note.
- 4 new pytest cases (mocking ipfs_cat): text diff, binary fallback,
fetch error, history chain depth — all green.
Frontend (admin-compliance):
- CIDHistoryModal: lazy-loads /dsms/documents/:cid/history, renders the
version chain as a vertical timeline, marks the AKTUELL entry, and
per-step exposes a "Diff zu V<n>" button that loads + renders the diff
inline (metadata table + unified text diff in a monospace panel).
- AuditTimelinePage: existing CID badge now sits next to a "Verlauf
anzeigen" link that opens the modal. Handles both Python's plain-CID
audit values and the Go techfile flow's JSON envelope {cid, filename,
size} via extractCID() helper.
This makes "show me how this CE-Akte changed between V2 and V3"
self-service in the UI instead of a curl-against-IPFS workflow.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -256,7 +256,8 @@ async def archive_legal_document(
|
||||
}
|
||||
|
||||
|
||||
@router.get("/documents/{cid}/history")
|
||||
@router.get("/api/v1/documents/{cid}/history")
|
||||
@router.get("/documents/{cid}/history") # legacy path, kept for backwards compatibility
|
||||
async def get_document_history(cid: str):
|
||||
"""Follow the parent_cid chain to reconstruct version history."""
|
||||
history = []
|
||||
@@ -285,3 +286,99 @@ async def get_document_history(cid: str):
|
||||
break
|
||||
|
||||
return {"cid": cid, "history": history, "depth": len(history)}
|
||||
|
||||
|
||||
@router.get("/api/v1/documents/{cid_a}/diff/{cid_b}")
|
||||
async def diff_documents(cid_a: str, cid_b: str):
|
||||
"""
|
||||
Compare two DSMS document versions by their CIDs.
|
||||
|
||||
Returns a unified diff of the textual content when both documents are
|
||||
text-decodable (UTF-8). For binary documents the response indicates
|
||||
"binary" and returns just the metadata differences. Used by the Audit
|
||||
Timeline UI to render "what changed between V2 and V3 of CE-Akte X".
|
||||
"""
|
||||
try:
|
||||
raw_a = await ipfs_cat(cid_a)
|
||||
raw_b = await ipfs_cat(cid_b)
|
||||
except Exception as exc:
|
||||
return {"error": f"could not fetch one of the CIDs: {exc}", "cid_a": cid_a, "cid_b": cid_b}
|
||||
|
||||
try:
|
||||
pkg_a = json.loads(raw_a)
|
||||
pkg_b = json.loads(raw_b)
|
||||
except Exception:
|
||||
# Documents are not the wrapped-package JSON shape — treat as raw.
|
||||
pkg_a = {"metadata": {}, "content_base64": ""}
|
||||
pkg_b = {"metadata": {}, "content_base64": ""}
|
||||
|
||||
meta_a = pkg_a.get("metadata", {}) or {}
|
||||
meta_b = pkg_b.get("metadata", {}) or {}
|
||||
meta_diff = _diff_metadata(meta_a, meta_b)
|
||||
|
||||
# Try to decode the content. The Archive flow stores files as base64 in
|
||||
# `content_base64`; older payloads may use `content` (utf-8 text).
|
||||
text_a, text_b, is_binary = _extract_texts(pkg_a, pkg_b)
|
||||
|
||||
if is_binary:
|
||||
return {
|
||||
"cid_a": cid_a,
|
||||
"cid_b": cid_b,
|
||||
"kind": "binary",
|
||||
"metadata_diff": meta_diff,
|
||||
"note": "Binary payload — text diff omitted. Compare via the rendered tech-file export instead.",
|
||||
}
|
||||
|
||||
diff_lines = list(
|
||||
_unified_diff(text_a.splitlines(), text_b.splitlines(), fromfile=cid_a, tofile=cid_b, lineterm="")
|
||||
)
|
||||
return {
|
||||
"cid_a": cid_a,
|
||||
"cid_b": cid_b,
|
||||
"kind": "text",
|
||||
"metadata_diff": meta_diff,
|
||||
"diff": "\n".join(diff_lines),
|
||||
"added_lines": sum(1 for ln in diff_lines if ln.startswith("+") and not ln.startswith("+++")),
|
||||
"removed_lines": sum(1 for ln in diff_lines if ln.startswith("-") and not ln.startswith("---")),
|
||||
}
|
||||
|
||||
|
||||
def _diff_metadata(a: dict, b: dict) -> dict:
|
||||
"""Return per-field change list: {field: {"old": ..., "new": ...}}."""
|
||||
keys = set(a.keys()) | set(b.keys())
|
||||
changes = {}
|
||||
for k in sorted(keys):
|
||||
if a.get(k) != b.get(k):
|
||||
changes[k] = {"old": a.get(k), "new": b.get(k)}
|
||||
return changes
|
||||
|
||||
|
||||
def _extract_texts(pkg_a: dict, pkg_b: dict) -> tuple[str, str, bool]:
|
||||
"""Return (text_a, text_b, is_binary). Falls back to base64-decode."""
|
||||
import base64
|
||||
|
||||
def to_text(pkg: dict) -> tuple[str, bool]:
|
||||
if isinstance(pkg.get("content"), str):
|
||||
return pkg["content"], False
|
||||
b64 = pkg.get("content_base64")
|
||||
if not b64:
|
||||
return "", False
|
||||
try:
|
||||
raw = base64.b64decode(b64)
|
||||
except Exception:
|
||||
return "", True
|
||||
try:
|
||||
return raw.decode("utf-8"), False
|
||||
except UnicodeDecodeError:
|
||||
return "", True
|
||||
|
||||
text_a, bin_a = to_text(pkg_a)
|
||||
text_b, bin_b = to_text(pkg_b)
|
||||
return text_a, text_b, (bin_a or bin_b)
|
||||
|
||||
|
||||
def _unified_diff(a, b, fromfile, tofile, lineterm):
|
||||
"""Tiny shim around difflib.unified_diff so the function reads cleanly."""
|
||||
import difflib
|
||||
|
||||
return difflib.unified_diff(a, b, fromfile=fromfile, tofile=tofile, lineterm=lineterm, n=2)
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
"""
|
||||
Tests for the version-chain diff endpoint added in DSMS Stufe 3.
|
||||
|
||||
Mocks ipfs_cat so the test does not require a running IPFS node.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from main import app
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
def _wrap(metadata: dict, content_text: str) -> str:
|
||||
"""Mimic the JSON envelope that routers.documents.ipfs_cat returns."""
|
||||
return json.dumps(
|
||||
{
|
||||
"metadata": metadata,
|
||||
"content_base64": base64.b64encode(content_text.encode("utf-8")).decode("ascii"),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_diff_text_documents_returns_unified_diff():
|
||||
pkg_a = _wrap({"version": "1", "document_type": "ce_techfile"}, "alpha\nbeta\ngamma\n")
|
||||
pkg_b = _wrap({"version": "2", "document_type": "ce_techfile"}, "alpha\nDELTA\ngamma\n")
|
||||
|
||||
async def fake_cat(cid: str):
|
||||
return pkg_a if cid == "cidA" else pkg_b
|
||||
|
||||
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
|
||||
resp = client.get("/api/v1/documents/cidA/diff/cidB")
|
||||
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["kind"] == "text"
|
||||
assert body["cid_a"] == "cidA"
|
||||
assert body["cid_b"] == "cidB"
|
||||
assert body["added_lines"] >= 1
|
||||
assert body["removed_lines"] >= 1
|
||||
assert "DELTA" in body["diff"]
|
||||
assert body["metadata_diff"] == {"version": {"old": "1", "new": "2"}}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_diff_binary_documents_returns_metadata_only():
|
||||
# Use raw bytes that are not utf-8 decodable
|
||||
invalid_utf8 = b"\xff\xfe\xfd\xfc"
|
||||
pkg_a = json.dumps(
|
||||
{"metadata": {"version": "1"}, "content_base64": base64.b64encode(invalid_utf8).decode()}
|
||||
)
|
||||
pkg_b = json.dumps(
|
||||
{"metadata": {"version": "2"}, "content_base64": base64.b64encode(invalid_utf8 + b"\x00").decode()}
|
||||
)
|
||||
|
||||
async def fake_cat(cid: str):
|
||||
return pkg_a if cid == "cidA" else pkg_b
|
||||
|
||||
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
|
||||
resp = client.get("/api/v1/documents/cidA/diff/cidB")
|
||||
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["kind"] == "binary"
|
||||
assert body["metadata_diff"] == {"version": {"old": "1", "new": "2"}}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_diff_handles_fetch_error():
|
||||
async def fake_cat(cid: str):
|
||||
raise RuntimeError("not pinned")
|
||||
|
||||
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
|
||||
resp = client.get("/api/v1/documents/cidA/diff/cidB")
|
||||
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert "error" in body
|
||||
assert body["cid_a"] == "cidA"
|
||||
assert body["cid_b"] == "cidB"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_history_endpoint_follows_parent_chain():
|
||||
"""Sanity check that the existing history endpoint still works after route alias."""
|
||||
|
||||
chain = {
|
||||
"v3": _wrap({"version": "3", "parent_cid": "v2"}, "x"),
|
||||
"v2": _wrap({"version": "2", "parent_cid": "v1"}, "x"),
|
||||
"v1": _wrap({"version": "1", "parent_cid": None}, "x"),
|
||||
}
|
||||
|
||||
async def fake_cat(cid: str):
|
||||
return chain[cid]
|
||||
|
||||
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
|
||||
resp = client.get("/api/v1/documents/v3/history")
|
||||
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["depth"] == 3
|
||||
assert [h["version"] for h in body["history"]] == ["3", "2", "1"]
|
||||
Reference in New Issue
Block a user