From 216c7b8eca0d65974e098c3b1c05cc959e15dd9a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 9 Jun 2026 09:07:20 +0200 Subject: [PATCH] feat(iace): DSMS-CID-Badge im Tech-File-Export + aggregierter Bulk-Diff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Punkt 1 — UI-CID-Badge nach erfolgreichem Tech-File-Export: - archiveTechFile setzt X-DSMS-CID / X-DSMS-Filename / X-DSMS-Size response headers + Access-Control-Expose-Headers, sobald DSMS-Archive durchlief - Split iace_handler_techfile.go (war ueber 500 LOC) → archiveTechFile lebt jetzt in iace_handler_techfile_archive.go, setDSMSResponseHeaders als pure Helper mit 3 unit tests - Next.js IACE-Proxy forwarded die X-DSMS-* Header und erkennt jetzt auch XLSX/DOCX/MD als Binary-Response (vorher nur PDF/ZIP/octet-stream) - ExportCIDBadge.tsx zeigt CID, Filename, Groesse + Kopieren-Button + "Verlauf anzeigen" (oeffnet CIDHistoryModal) Punkt 2 — Bulk-Diff Report V1 → V_latest: - Neuer Endpoint GET /api/v1/documents/{cid}/bulk-diff im dsms-gateway: laeuft parent_cid-Kette ab, berechnet chronologische Step-Diffs, aggregiert Totals (added/removed lines, metadata_fields_changed, binary_steps). Edge-Cases: einzelne Version, binaere Steps, abgebrochene Kette - BulkDiffPanel.tsx zeigt 4-Stat-Header + Step-Tabelle - CIDHistoryModal bekommt Toggle-Button "Bulk-Diff V1 → V_latest anzeigen" neben dem Versions-Counter; damit auch vom IACE-Export-Badge erreichbar Tests: 3 neue Go-Tests, 4 neue pytest-Tests, alle gruen Co-Authored-By: Claude Opus 4.7 (1M context) --- .../app/api/sdk/v1/iace/[[...path]]/route.ts | 31 +++- .../_components/BulkDiffPanel.tsx | 175 ++++++++++++++++++ .../_components/CIDHistoryModal.tsx | 19 +- .../tech-file/_components/ExportCIDBadge.tsx | 95 ++++++++++ .../sdk/iace/[projectId]/tech-file/page.tsx | 18 ++ .../api/handlers/iace_handler_techfile.go | 35 +--- .../handlers/iace_handler_techfile_archive.go | 65 +++++++ .../iace_handler_techfile_archive_test.go | 76 ++++++++ dsms-gateway/routers/documents.py | 102 ++++++++++ dsms-gateway/test_diff.py | 110 +++++++++++ 10 files changed, 684 insertions(+), 42 deletions(-) create mode 100644 admin-compliance/app/sdk/audit-timeline/_components/BulkDiffPanel.tsx create mode 100644 admin-compliance/app/sdk/iace/[projectId]/tech-file/_components/ExportCIDBadge.tsx create mode 100644 ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive.go create mode 100644 ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive_test.go diff --git a/admin-compliance/app/api/sdk/v1/iace/[[...path]]/route.ts b/admin-compliance/app/api/sdk/v1/iace/[[...path]]/route.ts index 6c620bc1..f2f2a518 100644 --- a/admin-compliance/app/api/sdk/v1/iace/[[...path]]/route.ts +++ b/admin-compliance/app/api/sdk/v1/iace/[[...path]]/route.ts @@ -66,18 +66,31 @@ async function proxyRequest( const response = await fetch(url, fetchOptions) - // Handle non-JSON responses (PDF exports, ZIP CE technical file) - const responseContentType = response.headers.get('content-type') - if (responseContentType?.includes('application/pdf') || - responseContentType?.includes('application/zip') || - responseContentType?.includes('application/octet-stream')) { + // Handle non-JSON responses (PDF/ZIP CE technical file, XLSX/DOCX/MD exports). + const responseContentType = response.headers.get('content-type') || '' + const isBinary = + responseContentType.includes('application/pdf') || + responseContentType.includes('application/zip') || + responseContentType.includes('application/octet-stream') || + responseContentType.includes('application/vnd.openxmlformats-officedocument') || + responseContentType.includes('application/vnd.ms-excel') || + responseContentType.includes('application/msword') || + responseContentType.includes('text/markdown') + if (isBinary) { const blob = await response.blob() + const forwardedHeaders: Record = { + 'Content-Type': responseContentType, + 'Content-Disposition': response.headers.get('content-disposition') || '', + } + // Forward DSMS archive metadata so the frontend can render the CID badge + // (set by archiveTechFile when the backend persisted the export to DSMS). + for (const h of ['x-dsms-cid', 'x-dsms-filename', 'x-dsms-size']) { + const v = response.headers.get(h) + if (v) forwardedHeaders[h] = v + } return new NextResponse(blob, { status: response.status, - headers: { - 'Content-Type': responseContentType, - 'Content-Disposition': response.headers.get('content-disposition') || '', - }, + headers: forwardedHeaders, }) } diff --git a/admin-compliance/app/sdk/audit-timeline/_components/BulkDiffPanel.tsx b/admin-compliance/app/sdk/audit-timeline/_components/BulkDiffPanel.tsx new file mode 100644 index 00000000..6823289f --- /dev/null +++ b/admin-compliance/app/sdk/audit-timeline/_components/BulkDiffPanel.tsx @@ -0,0 +1,175 @@ +'use client' + +import { useEffect, useState } from 'react' + +interface BulkDiffStep { + from: string + from_version: string | null + to: string + to_version: string | null + created_at: string | null + kind: 'text' | 'binary' + added_lines: number + removed_lines: number + metadata_diff_fields: string[] +} + +interface BulkDiffResponse { + cid_latest: string + cid_baseline: string + versions: number + steps: BulkDiffStep[] + totals: { + added_lines: number + removed_lines: number + metadata_fields_changed: number + binary_steps: number + } + note?: string +} + +interface Props { + cid: string + onClose: () => void +} + +function shorten(cid: string): string { + if (cid.length <= 14) return cid + return cid.slice(0, 8) + '…' + cid.slice(-6) +} + +export default function BulkDiffPanel({ cid, onClose }: Props) { + const [data, setData] = useState(null) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + + useEffect(() => { + let cancel = false + setLoading(true) + setError(null) + fetch(`/api/sdk/v1/dsms/documents/${encodeURIComponent(cid)}/bulk-diff`) + .then(async (r) => { + if (!r.ok) throw new Error(`HTTP ${r.status}`) + const json = (await r.json()) as BulkDiffResponse + if (!cancel) setData(json) + }) + .catch((e) => { + if (!cancel) setError(e?.message || 'Fehler beim Laden') + }) + .finally(() => { + if (!cancel) setLoading(false) + }) + return () => { + cancel = true + } + }, [cid]) + + return ( +
+
+

+ Aggregierter Diff: V1 → V_latest +

+ +
+ + {loading &&
Bulk-Diff wird berechnet…
} + {error &&
{error}
} + + {!loading && !error && data && ( + <> +
+ + + + +
+ + {data.totals.binary_steps > 0 && ( +
+ {data.totals.binary_steps} von {data.steps.length} Schritten binaer — Text-Diff nicht moeglich. +
+ )} + + {data.steps.length === 0 ? ( +
{data.note || 'Keine Vorgaengerversion vorhanden.'}
+ ) : ( +
+ + + + + + + + + + + + + {data.steps.map((step, i) => ( + + + + + + + + + ))} + +
SchrittDatumTyp+Metadaten-Felder
+ V{step.from_version || '?'} → V{step.to_version || '?'} +
+ {shorten(step.from)} → {shorten(step.to)} +
+
+ {step.created_at ? new Date(step.created_at).toLocaleDateString('de-DE') : '—'} + + + {step.kind === 'binary' ? 'binaer' : 'text'} + + + {step.kind === 'binary' ? '—' : step.added_lines} + + {step.kind === 'binary' ? '—' : step.removed_lines} + + {step.metadata_diff_fields.length === 0 + ? '—' + : step.metadata_diff_fields.slice(0, 3).join(', ') + + (step.metadata_diff_fields.length > 3 ? ` (+${step.metadata_diff_fields.length - 3})` : '')} +
+
+ )} + + )} +
+ ) +} + +function Stat({ label, value, tone }: { label: string; value: number; tone: 'positive' | 'negative' | 'neutral' }) { + const color = + tone === 'positive' + ? 'text-emerald-700 dark:text-emerald-400' + : tone === 'negative' + ? 'text-red-700 dark:text-red-400' + : 'text-gray-800 dark:text-gray-200' + return ( +
+
{value.toLocaleString('de-DE')}
+
{label}
+
+ ) +} diff --git a/admin-compliance/app/sdk/audit-timeline/_components/CIDHistoryModal.tsx b/admin-compliance/app/sdk/audit-timeline/_components/CIDHistoryModal.tsx index 95aaac20..a92edc46 100644 --- a/admin-compliance/app/sdk/audit-timeline/_components/CIDHistoryModal.tsx +++ b/admin-compliance/app/sdk/audit-timeline/_components/CIDHistoryModal.tsx @@ -1,6 +1,7 @@ 'use client' import { useEffect, useState } from 'react' +import BulkDiffPanel from './BulkDiffPanel' interface HistoryEntry { cid: string @@ -40,6 +41,7 @@ export default function CIDHistoryModal({ cid, onClose }: Props) { const [diffPair, setDiffPair] = useState<{ a: string; b: string } | null>(null) const [diff, setDiff] = useState(null) const [diffLoading, setDiffLoading] = useState(false) + const [showBulkDiff, setShowBulkDiff] = useState(false) useEffect(() => { let cancel = false @@ -109,9 +111,22 @@ export default function CIDHistoryModal({ cid, onClose }: Props) { {!loading && !error && history.length > 0 && ( <> -
- {history.length} Version{history.length > 1 ? 'en' : ''} in der Kette (neueste oben). +
+
+ {history.length} Version{history.length > 1 ? 'en' : ''} in der Kette (neueste oben). +
+ {history.length > 1 && ( + + )}
+ + {showBulkDiff && setShowBulkDiff(false)} />}
    {history.map((entry, idx) => { const next = history[idx + 1] diff --git a/admin-compliance/app/sdk/iace/[projectId]/tech-file/_components/ExportCIDBadge.tsx b/admin-compliance/app/sdk/iace/[projectId]/tech-file/_components/ExportCIDBadge.tsx new file mode 100644 index 00000000..012f8d03 --- /dev/null +++ b/admin-compliance/app/sdk/iace/[projectId]/tech-file/_components/ExportCIDBadge.tsx @@ -0,0 +1,95 @@ +'use client' + +import { useState } from 'react' +import CIDHistoryModal from '@/app/sdk/audit-timeline/_components/CIDHistoryModal' + +export interface LastExport { + cid: string + filename: string + size: number + format: string +} + +interface Props { + lastExport: LastExport | null + onDismiss: () => void +} + +function formatBytes(n: number): string { + if (n < 1024) return `${n} B` + if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB` + return `${(n / 1024 / 1024).toFixed(2)} MB` +} + +export function ExportCIDBadge({ lastExport, onDismiss }: Props) { + const [showHistory, setShowHistory] = useState(false) + const [copied, setCopied] = useState(false) + + if (!lastExport) return null + + async function copyToClipboard() { + if (!lastExport) return + try { + await navigator.clipboard.writeText(lastExport.cid) + setCopied(true) + setTimeout(() => setCopied(false), 1500) + } catch { + // clipboard not available — silent + } + } + + return ( + <> +
    +
    +
    + + + +
    +
    +
    + CE-Akte exportiert und in DSMS archiviert +
    +
    + {lastExport.filename} · {formatBytes(lastExport.size)} · Format {lastExport.format.toUpperCase()} +
    +
    + + CID + + + {lastExport.cid} + + + +
    +
    + +
    +
    + {showHistory && setShowHistory(false)} />} + + ) +} diff --git a/admin-compliance/app/sdk/iace/[projectId]/tech-file/page.tsx b/admin-compliance/app/sdk/iace/[projectId]/tech-file/page.tsx index 7a4102a1..0220bb9d 100644 --- a/admin-compliance/app/sdk/iace/[projectId]/tech-file/page.tsx +++ b/admin-compliance/app/sdk/iace/[projectId]/tech-file/page.tsx @@ -4,6 +4,7 @@ import React, { useState, useEffect, useRef } from 'react' import { useParams } from 'next/navigation' import { TechFileEditor } from '@/components/sdk/iace/TechFileEditor' import { ReportGenerator } from './_components/ReportGenerator' +import { ExportCIDBadge, type LastExport } from './_components/ExportCIDBadge' import { SECTION_TYPES, STATUS_CONFIG, EXPORT_FORMATS } from './_constants' interface TechFileSection { @@ -116,6 +117,7 @@ export default function TechFilePage() { const [viewingSection, setViewingSection] = useState(null) const [exporting, setExporting] = useState(false) const [showExportMenu, setShowExportMenu] = useState(false) + const [lastExport, setLastExport] = useState(null) const exportMenuRef = useRef(null) // Close export menu when clicking outside @@ -224,6 +226,19 @@ export default function TechFilePage() { a.click() document.body.removeChild(a) window.URL.revokeObjectURL(url) + + // DSMS archive metadata is forwarded by the backend in X-DSMS-* headers + // when archiving succeeded. If headers are absent (DSMS gateway down) + // the export still works but no badge is shown. + const cid = res.headers.get('x-dsms-cid') + if (cid) { + setLastExport({ + cid, + filename: res.headers.get('x-dsms-filename') || `CE-Akte-${projectId}${extension}`, + size: parseInt(res.headers.get('x-dsms-size') || '0', 10) || blob.size, + format, + }) + } } } catch (err) { console.error('Failed to export:', err) @@ -305,6 +320,9 @@ export default function TechFilePage() {
+ {/* DSMS-CID badge nach erfolgreichem Export */} + setLastExport(null)} /> + {/* Progress */}
diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile.go index 6df3f194..af83d258 100644 --- a/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile.go +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile.go @@ -1,12 +1,10 @@ package handlers import ( - "encoding/json" "fmt" "net/http" "strings" - "github.com/breakpilot/ai-compliance-sdk/internal/dsms" "github.com/breakpilot/ai-compliance-sdk/internal/iace" "github.com/breakpilot/ai-compliance-sdk/internal/rbac" "github.com/gin-gonic/gin" @@ -367,7 +365,10 @@ func (h *IACEHandler) ApproveTechFileSection(c *gin.Context) { } // ExportTechFile handles GET /projects/:id/tech-file/export?format=pdf|xlsx|docx|md|json -// Exports all tech file sections in the requested format. +// Exports all tech file sections in the requested format. When the archive +// succeeds, archiveTechFile (in iace_handler_techfile_archive.go) attaches +// X-DSMS-* response headers carrying the resulting CID so the frontend can +// render an inline CID-badge in the export-success path. func (h *IACEHandler) ExportTechFile(c *gin.Context) { projectID, err := uuid.Parse(c.Param("id")) if err != nil { @@ -468,31 +469,3 @@ func (h *IACEHandler) ExportTechFile(c *gin.Context) { }) } } - -// archiveTechFile stores a tech-file export to DSMS (best-effort, non-blocking) -// AND records the resulting CID in the IACE audit trail so the export is -// traceable. The "new_values" JSON carries the CID + filename so the audit -// timeline can later resolve the CID against the DSMS gateway for verify. -func (h *IACEHandler) archiveTechFile(c *gin.Context, data []byte, filename string, projectID uuid.UUID) { - result := dsms.Archive(data, filename, "ce_techfile", projectID.String(), "1") - if result == nil || result.CID == "" { - return - } - payload := map[string]string{ - "cid": result.CID, - "filename": filename, - "size": fmt.Sprintf("%d", result.Size), - } - newValues, _ := json.Marshal(payload) - userID := rbac.GetUserID(c) - _ = h.store.AddAuditEntry( - c.Request.Context(), - projectID, - "tech_file_export", - projectID, - iace.AuditActionCreate, - userID.String(), - nil, - newValues, - ) -} diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive.go new file mode 100644 index 00000000..1adfef55 --- /dev/null +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive.go @@ -0,0 +1,65 @@ +package handlers + +import ( + "encoding/json" + "fmt" + + "github.com/breakpilot/ai-compliance-sdk/internal/dsms" + "github.com/breakpilot/ai-compliance-sdk/internal/iace" + "github.com/breakpilot/ai-compliance-sdk/internal/rbac" + "github.com/gin-gonic/gin" + "github.com/google/uuid" +) + +// archiveTechFile stores a tech-file export to DSMS (best-effort, non-blocking) +// AND records the resulting CID in the IACE audit trail so the export is +// traceable. The "new_values" JSON carries the CID + filename so the audit +// timeline can later resolve the CID against the DSMS gateway for verify. +// +// Side-effect: when the archive succeeds, X-DSMS-CID / X-DSMS-Filename / +// X-DSMS-Size response headers are attached so the frontend can render an +// inline CID-badge directly in the export-success path (no separate audit +// query needed). Headers are written before c.Data() and survive the binary +// blob response. +func (h *IACEHandler) archiveTechFile(c *gin.Context, data []byte, filename string, projectID uuid.UUID) { + result := dsms.Archive(data, filename, "ce_techfile", projectID.String(), "1") + if result == nil || result.CID == "" { + return + } + + setDSMSResponseHeaders(c, result.CID, filename, result.Size) + + if h.store == nil { + return + } + payload := map[string]string{ + "cid": result.CID, + "filename": filename, + "size": fmt.Sprintf("%d", result.Size), + } + newValues, _ := json.Marshal(payload) + userID := rbac.GetUserID(c) + _ = h.store.AddAuditEntry( + c.Request.Context(), + projectID, + "tech_file_export", + projectID, + iace.AuditActionCreate, + userID.String(), + nil, + newValues, + ) +} + +// setDSMSResponseHeaders attaches the X-DSMS-* headers so the frontend can +// surface the archived CID inline (export-success badge) without re-querying +// the audit trail. Pure helper — no store, no side effects beyond headers. +func setDSMSResponseHeaders(c *gin.Context, cid, filename string, size int) { + if cid == "" { + return + } + c.Header("X-DSMS-CID", cid) + c.Header("X-DSMS-Filename", filename) + c.Header("X-DSMS-Size", fmt.Sprintf("%d", size)) + c.Header("Access-Control-Expose-Headers", "X-DSMS-CID, X-DSMS-Filename, X-DSMS-Size") +} diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive_test.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive_test.go new file mode 100644 index 00000000..ca8c5af3 --- /dev/null +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_techfile_archive_test.go @@ -0,0 +1,76 @@ +package handlers + +import ( + "net/http/httptest" + "testing" + + "github.com/gin-gonic/gin" +) + +func TestSetDSMSResponseHeaders_NonEmptyCID_WritesAllHeaders(t *testing.T) { + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + setDSMSResponseHeaders(c, "bafytest123", "CE-Akte-FOO.pdf", 42) + + if got := w.Header().Get("X-DSMS-CID"); got != "bafytest123" { + t.Errorf("X-DSMS-CID: want bafytest123, got %q", got) + } + if got := w.Header().Get("X-DSMS-Filename"); got != "CE-Akte-FOO.pdf" { + t.Errorf("X-DSMS-Filename: want CE-Akte-FOO.pdf, got %q", got) + } + if got := w.Header().Get("X-DSMS-Size"); got != "42" { + t.Errorf("X-DSMS-Size: want 42, got %q", got) + } + expose := w.Header().Get("Access-Control-Expose-Headers") + if expose == "" { + t.Error("Access-Control-Expose-Headers should be set so the browser surfaces the X-DSMS-* headers across same-origin proxies and CORS") + } + for _, h := range []string{"X-DSMS-CID", "X-DSMS-Filename", "X-DSMS-Size"} { + if !contains(expose, h) { + t.Errorf("Access-Control-Expose-Headers missing %s, got %q", h, expose) + } + } +} + +func TestSetDSMSResponseHeaders_EmptyCID_WritesNothing(t *testing.T) { + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + setDSMSResponseHeaders(c, "", "irrelevant.pdf", 100) + + if got := w.Header().Get("X-DSMS-CID"); got != "" { + t.Errorf("X-DSMS-CID should be absent for empty CID, got %q", got) + } + if got := w.Header().Get("X-DSMS-Filename"); got != "" { + t.Errorf("X-DSMS-Filename should be absent for empty CID, got %q", got) + } + if got := w.Header().Get("X-DSMS-Size"); got != "" { + t.Errorf("X-DSMS-Size should be absent for empty CID, got %q", got) + } +} + +func TestSetDSMSResponseHeaders_ZeroSize_StillWritesHeader(t *testing.T) { + // A 0-byte archive is degenerate but valid — the frontend still needs the + // CID badge to expose the chain to the user. Don't suppress the header. + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + setDSMSResponseHeaders(c, "bafyzero", "empty.pdf", 0) + + if got := w.Header().Get("X-DSMS-CID"); got != "bafyzero" { + t.Errorf("X-DSMS-CID: want bafyzero, got %q", got) + } + if got := w.Header().Get("X-DSMS-Size"); got != "0" { + t.Errorf("X-DSMS-Size: want 0, got %q", got) + } +} + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/dsms-gateway/routers/documents.py b/dsms-gateway/routers/documents.py index 1882c721..2cbe651e 100644 --- a/dsms-gateway/routers/documents.py +++ b/dsms-gateway/routers/documents.py @@ -343,6 +343,108 @@ async def diff_documents(cid_a: str, cid_b: str): } +@router.get("/api/v1/documents/{cid}/bulk-diff") +async def bulk_diff_chain(cid: str): + """ + Aggregate diff across the entire parent_cid chain (V1 → V_latest). + + Walks the history chain once, then computes per-step diffs between every + chronological pair plus running totals. Designed for the "Bulk-Diff + Report" panel in the IACE audit timeline so the user can see how a + tech-file evolved across all versions without clicking each pair. + """ + history: list[dict] = [] + current_cid: Optional[str] = cid + max_depth = 50 + + for _ in range(max_depth): + if current_cid is None: + break + try: + raw = await ipfs_cat(current_cid) + package = json.loads(raw) + except Exception: + break + metadata = package.get("metadata", {}) or {} + history.append({ + "cid": current_cid, + "version": metadata.get("version"), + "created_at": metadata.get("created_at"), + "metadata": metadata, + "package": package, + }) + parent = metadata.get("parent_cid") + if not parent or parent == current_cid: + break + current_cid = parent + + if len(history) < 2: + return { + "cid_latest": cid, + "cid_baseline": cid, + "versions": len(history), + "steps": [], + "totals": {"added_lines": 0, "removed_lines": 0, "metadata_fields_changed": 0, "binary_steps": 0}, + "note": "No predecessor versions found." if history else "CID not found.", + } + + # history is newest→oldest; reverse to walk chronologically. + chronological = list(reversed(history)) + steps: list[dict] = [] + total_added = 0 + total_removed = 0 + binary_steps = 0 + fields_changed: set[str] = set() + + for i in range(len(chronological) - 1): + older = chronological[i] + newer = chronological[i + 1] + meta_diff = _diff_metadata(older["metadata"], newer["metadata"]) + text_a, text_b, is_binary = _extract_texts(older["package"], newer["package"]) + + step: dict = { + "from": older["cid"], + "from_version": older["version"], + "to": newer["cid"], + "to_version": newer["version"], + "created_at": newer["created_at"], + "metadata_diff_fields": sorted(meta_diff.keys()), + } + + if is_binary: + step["kind"] = "binary" + step["added_lines"] = 0 + step["removed_lines"] = 0 + binary_steps += 1 + else: + diff_lines = list( + _unified_diff(text_a.splitlines(), text_b.splitlines(), fromfile=older["cid"], tofile=newer["cid"], lineterm="") + ) + added = sum(1 for ln in diff_lines if ln.startswith("+") and not ln.startswith("+++")) + removed = sum(1 for ln in diff_lines if ln.startswith("-") and not ln.startswith("---")) + step["kind"] = "text" + step["added_lines"] = added + step["removed_lines"] = removed + total_added += added + total_removed += removed + + fields_changed.update(meta_diff.keys()) + steps.append(step) + + return { + "cid_latest": cid, + "cid_baseline": chronological[0]["cid"], + "versions": len(history), + "steps": steps, + "totals": { + "added_lines": total_added, + "removed_lines": total_removed, + "metadata_fields_changed": len(fields_changed), + "binary_steps": binary_steps, + }, + } + + def _diff_metadata(a: dict, b: dict) -> dict: """Return per-field change list: {field: {"old": ..., "new": ...}}.""" keys = set(a.keys()) | set(b.keys()) diff --git a/dsms-gateway/test_diff.py b/dsms-gateway/test_diff.py index 2ae8f47d..c4b83c25 100644 --- a/dsms-gateway/test_diff.py +++ b/dsms-gateway/test_diff.py @@ -106,3 +106,113 @@ async def test_history_endpoint_follows_parent_chain(): body = resp.json() assert body["depth"] == 3 assert [h["version"] for h in body["history"]] == ["3", "2", "1"] + + +@pytest.mark.asyncio +async def test_bulk_diff_aggregates_text_chain(): + """Bulk-diff walks the chain V1→V_latest and sums per-step additions/removals.""" + + chain = { + "v3": _wrap({"version": "3", "parent_cid": "v2"}, "alpha\nbeta\nGAMMA-CHANGED\n"), + "v2": _wrap({"version": "2", "parent_cid": "v1"}, "alpha\nbeta\ngamma\n"), + "v1": _wrap({"version": "1", "parent_cid": None}, "alpha\ngamma\n"), + } + + async def fake_cat(cid: str): + return chain[cid] + + with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): + resp = client.get("/api/v1/documents/v3/bulk-diff") + + assert resp.status_code == 200 + body = resp.json() + assert body["cid_latest"] == "v3" + assert body["cid_baseline"] == "v1" + assert body["versions"] == 3 + assert len(body["steps"]) == 2 + + # Step ordering must be chronological (oldest pair first). + assert body["steps"][0]["from_version"] == "1" + assert body["steps"][0]["to_version"] == "2" + assert body["steps"][1]["from_version"] == "2" + assert body["steps"][1]["to_version"] == "3" + + # All steps are text — totals must include real added/removed counts. + assert body["totals"]["added_lines"] > 0 + assert body["totals"]["binary_steps"] == 0 + # Each step bumped the "version" metadata field — collected globally. + assert body["totals"]["metadata_fields_changed"] >= 1 + + +@pytest.mark.asyncio +async def test_bulk_diff_single_version_returns_empty_steps(): + """A CID without a parent_cid is a baseline — no steps to aggregate.""" + + chain = {"only": _wrap({"version": "1", "parent_cid": None}, "alpha\n")} + + async def fake_cat(cid: str): + return chain[cid] + + with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): + resp = client.get("/api/v1/documents/only/bulk-diff") + + assert resp.status_code == 200 + body = resp.json() + assert body["versions"] == 1 + assert body["steps"] == [] + assert body["totals"]["added_lines"] == 0 + assert body["totals"]["binary_steps"] == 0 + assert "note" in body + + +@pytest.mark.asyncio +async def test_bulk_diff_handles_binary_step(): + """Binary diffs cannot be line-counted — bump binary_steps, do not crash.""" + + binary = b"\xff\xfe\xfd" + chain = { + "v2": json.dumps({ + "metadata": {"version": "2", "parent_cid": "v1"}, + "content_base64": base64.b64encode(binary + b"\x00").decode(), + }), + "v1": json.dumps({ + "metadata": {"version": "1", "parent_cid": None}, + "content_base64": base64.b64encode(binary).decode(), + }), + } + + async def fake_cat(cid: str): + return chain[cid] + + with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): + resp = client.get("/api/v1/documents/v2/bulk-diff") + + assert resp.status_code == 200 + body = resp.json() + assert body["versions"] == 2 + assert body["totals"]["binary_steps"] == 1 + assert body["steps"][0]["kind"] == "binary" + assert body["steps"][0]["added_lines"] == 0 + + +@pytest.mark.asyncio +async def test_bulk_diff_stops_on_fetch_error(): + """If one CID in the chain is unreachable, return what we have — do not raise.""" + + chain = { + "v2": _wrap({"version": "2", "parent_cid": "v1-missing"}, "alpha\n"), + } + + async def fake_cat(cid: str): + if cid not in chain: + raise RuntimeError("not pinned") + return chain[cid] + + with patch("routers.documents.ipfs_cat", new=AsyncMock(side_effect=fake_cat)): + resp = client.get("/api/v1/documents/v2/bulk-diff") + + assert resp.status_code == 200 + body = resp.json() + # Only v2 was readable — chain is effectively length 1. + assert body["versions"] == 1 + assert body["steps"] == []