Files
breakpilot-compliance/document-crawler/archiver/dsms_client.py
Benjamin Boenisch 364d2c69ff feat: Add Document Crawler & Auto-Onboarding service (Phase 1.4)
New standalone Python/FastAPI service for automatic compliance document
scanning, LLM-based classification, IPFS archival, and gap analysis.
Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier,
compliance matrix, and full REST API on port 8098.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 20:35:15 +01:00

38 lines
1.1 KiB
Python

"""Client for dsms-gateway (IPFS) document archival."""
import httpx
from config import settings
async def archive_document(
file_path: str,
file_name: str,
document_type: str,
document_id: str,
auth_token: str = "Bearer system-crawler",
) -> dict:
"""Archive a document to IPFS via the DSMS gateway.
Returns dict with cid, size, gateway_url on success.
Raises on failure.
"""
async with httpx.AsyncClient(timeout=120.0) as client:
with open(file_path, "rb") as f:
resp = await client.post(
f"{settings.DSMS_GATEWAY_URL}/api/v1/documents",
files={"file": (file_name, f)},
data={
"document_type": "compliance_document",
"document_id": document_id,
"version": "1",
"language": "de",
},
headers={"Authorization": auth_token},
)
if resp.status_code != 200:
raise RuntimeError(f"DSMS archive failed ({resp.status_code}): {resp.text}")
return resp.json()