""" Document Crawler & Auto-Onboarding Service Scans local filesystems for compliance documents, classifies them via LLM, archives to IPFS, and generates compliance gap analysis reports. """ from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from db import get_pool, close_pool, run_migration from api.sources import router as sources_router from api.jobs import router as jobs_router from api.documents import router as documents_router from api.reports import router as reports_router @asynccontextmanager async def lifespan(app: FastAPI): # Startup await get_pool() await run_migration() yield # Shutdown await close_pool() app = FastAPI( title="Document Crawler", description="Auto-Onboarding: Filesystem scanning, LLM classification, IPFS archival, gap analysis", version="1.0.0", lifespan=lifespan, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Register routers app.include_router(sources_router, prefix="/api/v1/crawler") app.include_router(jobs_router, prefix="/api/v1/crawler") app.include_router(documents_router, prefix="/api/v1/crawler") app.include_router(reports_router, prefix="/api/v1/crawler") @app.get("/health") async def health(): try: pool = await get_pool() async with pool.acquire() as conn: await conn.fetchval("SELECT 1") return {"status": "healthy", "service": "document-crawler"} except Exception as e: return {"status": "degraded", "error": str(e)} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8098)