New standalone Python/FastAPI service for automatic compliance document scanning, LLM-based classification, IPFS archival, and gap analysis. Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier, compliance matrix, and full REST API on port 8098. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
64 lines
1.7 KiB
Python
64 lines
1.7 KiB
Python
"""
|
|
Document Crawler & Auto-Onboarding Service
|
|
Scans local filesystems for compliance documents, classifies them via LLM,
|
|
archives to IPFS, and generates compliance gap analysis reports.
|
|
"""
|
|
|
|
from contextlib import asynccontextmanager
|
|
from fastapi import FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
from db import get_pool, close_pool, run_migration
|
|
from api.sources import router as sources_router
|
|
from api.jobs import router as jobs_router
|
|
from api.documents import router as documents_router
|
|
from api.reports import router as reports_router
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
# Startup
|
|
await get_pool()
|
|
await run_migration()
|
|
yield
|
|
# Shutdown
|
|
await close_pool()
|
|
|
|
|
|
app = FastAPI(
|
|
title="Document Crawler",
|
|
description="Auto-Onboarding: Filesystem scanning, LLM classification, IPFS archival, gap analysis",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Register routers
|
|
app.include_router(sources_router, prefix="/api/v1/crawler")
|
|
app.include_router(jobs_router, prefix="/api/v1/crawler")
|
|
app.include_router(documents_router, prefix="/api/v1/crawler")
|
|
app.include_router(reports_router, prefix="/api/v1/crawler")
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
try:
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
await conn.fetchval("SELECT 1")
|
|
return {"status": "healthy", "service": "document-crawler"}
|
|
except Exception as e:
|
|
return {"status": "degraded", "error": str(e)}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=8098)
|