feat(dsms): version chain history + diff endpoint + Audit Timeline UI

DSMS Stufe 3 — making the parent_cid chain useful end-to-end. Gateway (dsms-gateway): - /api/v1/documents/{cid}/history alias added next to the legacy /documents/{cid}/history (history endpoint itself was already there, just under an inconsistent prefix). - NEW /api/v1/documents/{cid_a}/diff/{cid_b}: fetches both packages from IPFS, computes a metadata diff (per-field old/new), and renders a unified text diff for utf-8 payloads. Binary payloads return only metadata diff with a "binary — compare via rendered export" note. - 4 new pytest cases (mocking ipfs_cat): text diff, binary fallback, fetch error, history chain depth — all green. Frontend (admin-compliance): - CIDHistoryModal: lazy-loads /dsms/documents/:cid/history, renders the version chain as a vertical timeline, marks the AKTUELL entry, and per-step exposes a "Diff zu V<n>" button that loads + renders the diff inline (metadata table + unified text diff in a monospace panel). - AuditTimelinePage: existing CID badge now sits next to a "Verlauf anzeigen" link that opens the modal. Handles both Python's plain-CID audit values and the Go techfile flow's JSON envelope {cid, filename, size} via extractCID() helper. This makes "show me how this CE-Akte changed between V2 and V3" self-service in the UI instead of a curl-against-IPFS workflow. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 10:10:07 +02:00
parent e2be51b0aa
commit 299375e486
4 changed files with 446 additions and 4 deletions
@@ -256,7 +256,8 @@ async def archive_legal_document(
    }


-@router.get("/documents/{cid}/history")
+@router.get("/api/v1/documents/{cid}/history")
+@router.get("/documents/{cid}/history")  # legacy path, kept for backwards compatibility
 async def get_document_history(cid: str):
    """Follow the parent_cid chain to reconstruct version history."""
    history = []
@@ -285,3 +286,99 @@ async def get_document_history(cid: str):
            break

    return {"cid": cid, "history": history, "depth": len(history)}
+
+
+@router.get("/api/v1/documents/{cid_a}/diff/{cid_b}")
+async def diff_documents(cid_a: str, cid_b: str):
+    """
+    Compare two DSMS document versions by their CIDs.
+
+    Returns a unified diff of the textual content when both documents are
+    text-decodable (UTF-8). For binary documents the response indicates
+    "binary" and returns just the metadata differences. Used by the Audit
+    Timeline UI to render "what changed between V2 and V3 of CE-Akte X".
+    """
+    try:
+        raw_a = await ipfs_cat(cid_a)
+        raw_b = await ipfs_cat(cid_b)
+    except Exception as exc:
+        return {"error": f"could not fetch one of the CIDs: {exc}", "cid_a": cid_a, "cid_b": cid_b}
+
+    try:
+        pkg_a = json.loads(raw_a)
+        pkg_b = json.loads(raw_b)
+    except Exception:
+        # Documents are not the wrapped-package JSON shape — treat as raw.
+        pkg_a = {"metadata": {}, "content_base64": ""}
+        pkg_b = {"metadata": {}, "content_base64": ""}
+
+    meta_a = pkg_a.get("metadata", {}) or {}
+    meta_b = pkg_b.get("metadata", {}) or {}
+    meta_diff = _diff_metadata(meta_a, meta_b)
+
+    # Try to decode the content. The Archive flow stores files as base64 in
+    # `content_base64`; older payloads may use `content` (utf-8 text).
+    text_a, text_b, is_binary = _extract_texts(pkg_a, pkg_b)
+
+    if is_binary:
+        return {
+            "cid_a": cid_a,
+            "cid_b": cid_b,
+            "kind": "binary",
+            "metadata_diff": meta_diff,
+            "note": "Binary payload — text diff omitted. Compare via the rendered tech-file export instead.",
+        }
+
+    diff_lines = list(
+        _unified_diff(text_a.splitlines(), text_b.splitlines(), fromfile=cid_a, tofile=cid_b, lineterm="")
+    )
+    return {
+        "cid_a": cid_a,
+        "cid_b": cid_b,
+        "kind": "text",
+        "metadata_diff": meta_diff,
+        "diff": "\n".join(diff_lines),
+        "added_lines": sum(1 for ln in diff_lines if ln.startswith("+") and not ln.startswith("+++")),
+        "removed_lines": sum(1 for ln in diff_lines if ln.startswith("-") and not ln.startswith("---")),
+    }
+
+
+def _diff_metadata(a: dict, b: dict) -> dict:
+    """Return per-field change list: {field: {"old": ..., "new": ...}}."""
+    keys = set(a.keys()) | set(b.keys())
+    changes = {}
+    for k in sorted(keys):
+        if a.get(k) != b.get(k):
+            changes[k] = {"old": a.get(k), "new": b.get(k)}
+    return changes
+
+
+def _extract_texts(pkg_a: dict, pkg_b: dict) -> tuple[str, str, bool]:
+    """Return (text_a, text_b, is_binary). Falls back to base64-decode."""
+    import base64
+
+    def to_text(pkg: dict) -> tuple[str, bool]:
+        if isinstance(pkg.get("content"), str):
+            return pkg["content"], False
+        b64 = pkg.get("content_base64")
+        if not b64:
+            return "", False
+        try:
+            raw = base64.b64decode(b64)
+        except Exception:
+            return "", True
+        try:
+            return raw.decode("utf-8"), False
+        except UnicodeDecodeError:
+            return "", True
+
+    text_a, bin_a = to_text(pkg_a)
+    text_b, bin_b = to_text(pkg_b)
+    return text_a, text_b, (bin_a or bin_b)
+
+
+def _unified_diff(a, b, fromfile, tofile, lineterm):
+    """Tiny shim around difflib.unified_diff so the function reads cleanly."""
+    import difflib
+
+    return difflib.unified_diff(a, b, fromfile=fromfile, tofile=tofile, lineterm=lineterm, n=2)