Files
breakpilot-compliance/dsms-gateway/test_diff.py
T
Benjamin Admin 216c7b8eca
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m21s
CI / test-go (push) Failing after 37s
CI / iace-gt-coverage (push) Successful in 23s
CI / test-python-backend (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Successful in 17s
feat(iace): DSMS-CID-Badge im Tech-File-Export + aggregierter Bulk-Diff
Punkt 1 — UI-CID-Badge nach erfolgreichem Tech-File-Export:
- archiveTechFile setzt X-DSMS-CID / X-DSMS-Filename / X-DSMS-Size response
  headers + Access-Control-Expose-Headers, sobald DSMS-Archive durchlief
- Split iace_handler_techfile.go (war ueber 500 LOC) → archiveTechFile lebt
  jetzt in iace_handler_techfile_archive.go, setDSMSResponseHeaders als
  pure Helper mit 3 unit tests
- Next.js IACE-Proxy forwarded die X-DSMS-* Header und erkennt jetzt auch
  XLSX/DOCX/MD als Binary-Response (vorher nur PDF/ZIP/octet-stream)
- ExportCIDBadge.tsx zeigt CID, Filename, Groesse + Kopieren-Button +
  "Verlauf anzeigen" (oeffnet CIDHistoryModal)

Punkt 2 — Bulk-Diff Report V1 → V_latest:
- Neuer Endpoint GET /api/v1/documents/{cid}/bulk-diff im dsms-gateway:
  laeuft parent_cid-Kette ab, berechnet chronologische Step-Diffs,
  aggregiert Totals (added/removed lines, metadata_fields_changed,
  binary_steps). Edge-Cases: einzelne Version, binaere Steps, abgebrochene
  Kette
- BulkDiffPanel.tsx zeigt 4-Stat-Header + Step-Tabelle
- CIDHistoryModal bekommt Toggle-Button "Bulk-Diff V1 → V_latest anzeigen"
  neben dem Versions-Counter; damit auch vom IACE-Export-Badge erreichbar

Tests: 3 neue Go-Tests, 4 neue pytest-Tests, alle gruen

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-09 09:07:20 +02:00

219 lines
7.1 KiB
Python

"""
Tests for the version-chain diff endpoint added in DSMS Stufe 3.
Mocks ipfs_cat so the test does not require a running IPFS node.
"""
import base64
import json
from unittest.mock import AsyncMock, patch
import pytest
from fastapi.testclient import TestClient
from main import app
client = TestClient(app)
def _wrap(metadata: dict, content_text: str) -> str:
"""Mimic the JSON envelope that routers.documents.ipfs_cat returns."""
return json.dumps(
{
"metadata": metadata,
"content_base64": base64.b64encode(content_text.encode("utf-8")).decode("ascii"),
}
)
@pytest.mark.asyncio
async def test_diff_text_documents_returns_unified_diff():
pkg_a = _wrap({"version": "1", "document_type": "ce_techfile"}, "alpha\nbeta\ngamma\n")
pkg_b = _wrap({"version": "2", "document_type": "ce_techfile"}, "alpha\nDELTA\ngamma\n")
async def fake_cat(cid: str):
return pkg_a if cid == "cidA" else pkg_b
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/cidA/diff/cidB")
assert resp.status_code == 200
body = resp.json()
assert body["kind"] == "text"
assert body["cid_a"] == "cidA"
assert body["cid_b"] == "cidB"
assert body["added_lines"] >= 1
assert body["removed_lines"] >= 1
assert "DELTA" in body["diff"]
assert body["metadata_diff"] == {"version": {"old": "1", "new": "2"}}
@pytest.mark.asyncio
async def test_diff_binary_documents_returns_metadata_only():
# Use raw bytes that are not utf-8 decodable
invalid_utf8 = b"\xff\xfe\xfd\xfc"
pkg_a = json.dumps(
{"metadata": {"version": "1"}, "content_base64": base64.b64encode(invalid_utf8).decode()}
)
pkg_b = json.dumps(
{"metadata": {"version": "2"}, "content_base64": base64.b64encode(invalid_utf8 + b"\x00").decode()}
)
async def fake_cat(cid: str):
return pkg_a if cid == "cidA" else pkg_b
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/cidA/diff/cidB")
assert resp.status_code == 200
body = resp.json()
assert body["kind"] == "binary"
assert body["metadata_diff"] == {"version": {"old": "1", "new": "2"}}
@pytest.mark.asyncio
async def test_diff_handles_fetch_error():
async def fake_cat(cid: str):
raise RuntimeError("not pinned")
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/cidA/diff/cidB")
assert resp.status_code == 200
body = resp.json()
assert "error" in body
assert body["cid_a"] == "cidA"
assert body["cid_b"] == "cidB"
@pytest.mark.asyncio
async def test_history_endpoint_follows_parent_chain():
"""Sanity check that the existing history endpoint still works after route alias."""
chain = {
"v3": _wrap({"version": "3", "parent_cid": "v2"}, "x"),
"v2": _wrap({"version": "2", "parent_cid": "v1"}, "x"),
"v1": _wrap({"version": "1", "parent_cid": None}, "x"),
}
async def fake_cat(cid: str):
return chain[cid]
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/v3/history")
assert resp.status_code == 200
body = resp.json()
assert body["depth"] == 3
assert [h["version"] for h in body["history"]] == ["3", "2", "1"]
@pytest.mark.asyncio
async def test_bulk_diff_aggregates_text_chain():
"""Bulk-diff walks the chain V1→V_latest and sums per-step additions/removals."""
chain = {
"v3": _wrap({"version": "3", "parent_cid": "v2"}, "alpha\nbeta\nGAMMA-CHANGED\n"),
"v2": _wrap({"version": "2", "parent_cid": "v1"}, "alpha\nbeta\ngamma\n"),
"v1": _wrap({"version": "1", "parent_cid": None}, "alpha\ngamma\n"),
}
async def fake_cat(cid: str):
return chain[cid]
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/v3/bulk-diff")
assert resp.status_code == 200
body = resp.json()
assert body["cid_latest"] == "v3"
assert body["cid_baseline"] == "v1"
assert body["versions"] == 3
assert len(body["steps"]) == 2
# Step ordering must be chronological (oldest pair first).
assert body["steps"][0]["from_version"] == "1"
assert body["steps"][0]["to_version"] == "2"
assert body["steps"][1]["from_version"] == "2"
assert body["steps"][1]["to_version"] == "3"
# All steps are text — totals must include real added/removed counts.
assert body["totals"]["added_lines"] > 0
assert body["totals"]["binary_steps"] == 0
# Each step bumped the "version" metadata field — collected globally.
assert body["totals"]["metadata_fields_changed"] >= 1
@pytest.mark.asyncio
async def test_bulk_diff_single_version_returns_empty_steps():
"""A CID without a parent_cid is a baseline — no steps to aggregate."""
chain = {"only": _wrap({"version": "1", "parent_cid": None}, "alpha\n")}
async def fake_cat(cid: str):
return chain[cid]
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/only/bulk-diff")
assert resp.status_code == 200
body = resp.json()
assert body["versions"] == 1
assert body["steps"] == []
assert body["totals"]["added_lines"] == 0
assert body["totals"]["binary_steps"] == 0
assert "note" in body
@pytest.mark.asyncio
async def test_bulk_diff_handles_binary_step():
"""Binary diffs cannot be line-counted — bump binary_steps, do not crash."""
binary = b"\xff\xfe\xfd"
chain = {
"v2": json.dumps({
"metadata": {"version": "2", "parent_cid": "v1"},
"content_base64": base64.b64encode(binary + b"\x00").decode(),
}),
"v1": json.dumps({
"metadata": {"version": "1", "parent_cid": None},
"content_base64": base64.b64encode(binary).decode(),
}),
}
async def fake_cat(cid: str):
return chain[cid]
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/v2/bulk-diff")
assert resp.status_code == 200
body = resp.json()
assert body["versions"] == 2
assert body["totals"]["binary_steps"] == 1
assert body["steps"][0]["kind"] == "binary"
assert body["steps"][0]["added_lines"] == 0
@pytest.mark.asyncio
async def test_bulk_diff_stops_on_fetch_error():
"""If one CID in the chain is unreachable, return what we have — do not raise."""
chain = {
"v2": _wrap({"version": "2", "parent_cid": "v1-missing"}, "alpha\n"),
}
async def fake_cat(cid: str):
if cid not in chain:
raise RuntimeError("not pinned")
return chain[cid]
with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)):
resp = client.get("/api/v1/documents/v2/bulk-diff")
assert resp.status_code == 200
body = resp.json()
# Only v2 was readable — chain is effectively length 1.
assert body["versions"] == 1
assert body["steps"] == []