New standalone Python/FastAPI service for automatic compliance document scanning, LLM-based classification, IPFS archival, and gap analysis. Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier, compliance matrix, and full REST API on port 8098. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
38 lines
1.1 KiB
Python
38 lines
1.1 KiB
Python
"""Client for dsms-gateway (IPFS) document archival."""
|
|
|
|
import httpx
|
|
|
|
from config import settings
|
|
|
|
|
|
async def archive_document(
|
|
file_path: str,
|
|
file_name: str,
|
|
document_type: str,
|
|
document_id: str,
|
|
auth_token: str = "Bearer system-crawler",
|
|
) -> dict:
|
|
"""Archive a document to IPFS via the DSMS gateway.
|
|
|
|
Returns dict with cid, size, gateway_url on success.
|
|
Raises on failure.
|
|
"""
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
with open(file_path, "rb") as f:
|
|
resp = await client.post(
|
|
f"{settings.DSMS_GATEWAY_URL}/api/v1/documents",
|
|
files={"file": (file_name, f)},
|
|
data={
|
|
"document_type": "compliance_document",
|
|
"document_id": document_id,
|
|
"version": "1",
|
|
"language": "de",
|
|
},
|
|
headers={"Authorization": auth_token},
|
|
)
|
|
|
|
if resp.status_code != 200:
|
|
raise RuntimeError(f"DSMS archive failed ({resp.status_code}): {resp.text}")
|
|
|
|
return resp.json()
|