Files
breakpilot-compliance/dsms-gateway/routers/documents.py
T
Benjamin Admin edbf6d2be5
Build + Deploy / build-admin-compliance (push) Successful in 1m58s
Build + Deploy / build-backend-compliance (push) Successful in 12s
Build + Deploy / build-ai-sdk (push) Successful in 11s
Build + Deploy / build-developer-portal (push) Successful in 11s
Build + Deploy / build-tts (push) Successful in 21s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 14s
Build + Deploy / build-dsms-node (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m40s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 40s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 14s
Build + Deploy / trigger-orca (push) Successful in 2m26s
feat(dsms): Stufe 2+3 — Evidence/TechFile → DSMS + Version Chains + Audit Timeline
Stufe 2A: Evidence Upload → automatische DSMS-Archivierung
- Nach SHA-256 Hash → archive_to_dsms(), CID im Audit-Trail
- Evidence mit CID wird automatisch zu E2 (hash-verifiziert) hochgestuft

Stufe 2B: IACE Tech-File Export → DSMS
- PDF/Excel/DOCX/Markdown Exporte werden nach DSMS archiviert
- archiveTechFile() Helper fuer alle 4 Formate

Stufe 3A: DSMS Gateway — parent_cid + History Endpoint
- parent_cid + tenant_id Felder in DocumentMetadata
- GET /documents/{cid}/history — folgt parent_cid-Chain (max 50 deep)

Stufe 3C: Audit Timeline UI
- Neue Seite /sdk/audit-timeline
- Vertikale Timeline mit farbigen Action-Dots
- Filter: Alle, Nachweis, DSMS-Archiv, Control, Dokument, DSFA, VVT, TOM
- CID-Badges fuer DSMS-archivierte Eintraege

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-12 13:55:07 +02:00

288 lines
8.0 KiB
Python

"""
Documents router — handles /api/v1/documents and /api/v1/legal-documents endpoints.
"""
import hashlib
import json
import io
from datetime import datetime
from typing import Optional
import httpx
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from fastapi.responses import StreamingResponse
from models import DocumentList, DocumentMetadata, StoredDocument
from dependencies import verify_token, ipfs_add, ipfs_cat, ipfs_pin_ls
from config import IPFS_API_URL, IPFS_GATEWAY_URL
router = APIRouter()
@router.post("/api/v1/documents", response_model=StoredDocument)
async def store_document(
file: UploadFile = File(...),
document_type: str = "legal_document",
document_id: Optional[str] = None,
version: Optional[str] = None,
language: str = "de",
_auth: dict = Depends(verify_token)
):
"""
Speichert ein Dokument im DSMS.
- **file**: Das zu speichernde Dokument
- **document_type**: Typ des Dokuments (legal_document, consent_record, audit_log)
- **document_id**: Optionale ID des Dokuments
- **version**: Optionale Versionsnummer
- **language**: Sprache (default: de)
"""
content = await file.read()
# Checksum berechnen
checksum = hashlib.sha256(content).hexdigest()
# Metadaten erstellen
metadata = DocumentMetadata(
document_type=document_type,
document_id=document_id,
version=version,
language=language,
created_at=datetime.utcnow().isoformat(),
checksum=checksum,
encrypted=False
)
# Dokument mit Metadaten als JSON verpacken
package = {
"metadata": metadata.model_dump(),
"content_base64": content.hex(), # Hex-encodiert für JSON
"filename": file.filename
}
package_bytes = json.dumps(package).encode()
# Zu IPFS hinzufügen
result = await ipfs_add(package_bytes)
cid = result.get("Hash")
size = int(result.get("Size", 0))
return StoredDocument(
cid=cid,
size=size,
metadata=metadata,
gateway_url=f"{IPFS_GATEWAY_URL}/ipfs/{cid}",
timestamp=datetime.utcnow().isoformat()
)
@router.get("/api/v1/documents/{cid}")
async def get_document(
cid: str,
_auth: dict = Depends(verify_token)
):
"""
Ruft ein Dokument aus dem DSMS ab.
- **cid**: Content Identifier (IPFS Hash)
"""
content = await ipfs_cat(cid)
try:
package = json.loads(content)
metadata = package.get("metadata", {})
original_content = bytes.fromhex(package.get("content_base64", ""))
filename = package.get("filename", "document")
return StreamingResponse(
io.BytesIO(original_content),
media_type="application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"X-DSMS-Document-Type": metadata.get("document_type", "unknown"),
"X-DSMS-Checksum": metadata.get("checksum", ""),
"X-DSMS-Created-At": metadata.get("created_at", "")
}
)
except json.JSONDecodeError:
# Wenn es kein DSMS-Paket ist, gib rohen Inhalt zurück
return StreamingResponse(
io.BytesIO(content),
media_type="application/octet-stream"
)
@router.get("/api/v1/documents/{cid}/metadata")
async def get_document_metadata(
cid: str,
_auth: dict = Depends(verify_token)
):
"""
Ruft nur die Metadaten eines Dokuments ab.
- **cid**: Content Identifier (IPFS Hash)
"""
content = await ipfs_cat(cid)
try:
package = json.loads(content)
return {
"cid": cid,
"metadata": package.get("metadata", {}),
"filename": package.get("filename"),
"size": len(bytes.fromhex(package.get("content_base64", "")))
}
except json.JSONDecodeError:
return {
"cid": cid,
"metadata": {},
"raw_size": len(content)
}
@router.get("/api/v1/documents", response_model=DocumentList)
async def list_documents(
_auth: dict = Depends(verify_token)
):
"""
Listet alle gespeicherten Dokumente auf.
"""
cids = await ipfs_pin_ls()
documents = []
for cid in cids[:100]: # Limit auf 100 für Performance
try:
content = await ipfs_cat(cid)
package = json.loads(content)
documents.append({
"cid": cid,
"metadata": package.get("metadata", {}),
"filename": package.get("filename")
})
except Exception:
# Überspringe nicht-DSMS Objekte
continue
return DocumentList(
documents=documents,
total=len(documents)
)
@router.delete("/api/v1/documents/{cid}")
async def unpin_document(
cid: str,
_auth: dict = Depends(verify_token)
):
"""
Entfernt ein Dokument aus dem lokalen Pin-Set.
Das Dokument bleibt im Netzwerk, wird aber bei GC entfernt.
- **cid**: Content Identifier (IPFS Hash)
"""
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{IPFS_API_URL}/api/v0/pin/rm",
params={"arg": cid}
)
if response.status_code != 200:
raise HTTPException(
status_code=404,
detail=f"Konnte Pin nicht entfernen: {cid}"
)
return {
"status": "unpinned",
"cid": cid,
"message": "Dokument wird bei nächster Garbage Collection entfernt"
}
@router.post("/api/v1/legal-documents/archive")
async def archive_legal_document(
document_id: str,
version: str,
content: str,
language: str = "de",
_auth: dict = Depends(verify_token)
):
"""
Archiviert eine rechtliche Dokumentversion dauerhaft.
Speziell für AGB, Datenschutzerklärung, etc.
- **document_id**: ID des Legal Documents
- **version**: Versionsnummer
- **content**: HTML/Markdown Inhalt
- **language**: Sprache
"""
# Checksum berechnen
content_bytes = content.encode('utf-8')
checksum = hashlib.sha256(content_bytes).hexdigest()
# Metadaten
metadata = {
"document_type": "legal_document",
"document_id": document_id,
"version": version,
"language": language,
"created_at": datetime.utcnow().isoformat(),
"checksum": checksum,
"content_type": "text/html"
}
# Paket erstellen
package = {
"metadata": metadata,
"content": content,
"archived_at": datetime.utcnow().isoformat()
}
package_bytes = json.dumps(package, ensure_ascii=False).encode('utf-8')
# Zu IPFS hinzufügen
result = await ipfs_add(package_bytes)
cid = result.get("Hash")
return {
"cid": cid,
"document_id": document_id,
"version": version,
"checksum": checksum,
"archived_at": datetime.utcnow().isoformat(),
"verification_url": f"{IPFS_GATEWAY_URL}/ipfs/{cid}"
}
@router.get("/documents/{cid}/history")
async def get_document_history(cid: str):
"""Follow the parent_cid chain to reconstruct version history."""
history = []
current_cid = cid
max_depth = 50 # prevent infinite loops
for _ in range(max_depth):
try:
raw = await ipfs_cat(current_cid)
package = json.loads(raw)
metadata = package.get("metadata", {})
history.append({
"cid": current_cid,
"version": metadata.get("version"),
"document_type": metadata.get("document_type"),
"document_id": metadata.get("document_id"),
"parent_cid": metadata.get("parent_cid"),
"created_at": metadata.get("created_at"),
"checksum": metadata.get("checksum"),
})
parent = metadata.get("parent_cid")
if not parent:
break
current_cid = parent
except Exception:
break
return {"cid": cid, "history": history, "depth": len(history)}