New standalone Python/FastAPI service for automatic compliance document scanning, LLM-based classification, IPFS archival, and gap analysis. Includes extractors (PDF, DOCX, XLSX, PPTX), keyword fallback classifier, compliance matrix, and full REST API on port 8098. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
100 lines
3.4 KiB
Python
100 lines
3.4 KiB
Python
"""Onboarding report + gap analysis endpoints."""
|
|
|
|
import json
|
|
import uuid
|
|
from fastapi import APIRouter, HTTPException, Header
|
|
from pydantic import BaseModel
|
|
|
|
from db import get_pool
|
|
from gap_analysis.analyzer import generate_gap_analysis
|
|
|
|
router = APIRouter(tags=["reports"])
|
|
|
|
|
|
class ReportGenerate(BaseModel):
|
|
job_id: str | None = None
|
|
company_profiles: list[str] = ["universal", "data_processor", "ai_user"]
|
|
|
|
|
|
@router.post("/reports/generate", status_code=201)
|
|
async def generate_report(body: ReportGenerate, x_tenant_id: str = Header(...)):
|
|
pool = await get_pool()
|
|
tid = uuid.UUID(x_tenant_id)
|
|
|
|
async with pool.acquire() as conn:
|
|
# Count documents by classification for this tenant
|
|
rows = await conn.fetch(
|
|
"""SELECT classification, COUNT(*) as cnt
|
|
FROM crawler_documents
|
|
WHERE tenant_id = $1 AND classification IS NOT NULL
|
|
GROUP BY classification""",
|
|
tid,
|
|
)
|
|
classification_counts = {r["classification"]: r["cnt"] for r in rows}
|
|
|
|
total_docs = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM crawler_documents WHERE tenant_id = $1", tid
|
|
)
|
|
|
|
# Run gap analysis
|
|
analysis = generate_gap_analysis(classification_counts, body.company_profiles)
|
|
|
|
# Store report
|
|
async with pool.acquire() as conn:
|
|
jid = uuid.UUID(body.job_id) if body.job_id else None
|
|
row = await conn.fetchrow(
|
|
"""INSERT INTO crawler_onboarding_reports
|
|
(tenant_id, job_id, total_documents_found, classification_breakdown, gaps, compliance_score)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
RETURNING *""",
|
|
tid, jid, total_docs,
|
|
json.dumps(classification_counts),
|
|
json.dumps(analysis["gaps"]),
|
|
analysis["compliance_score"],
|
|
)
|
|
|
|
result = dict(row)
|
|
result["gap_summary"] = analysis["gap_summary"]
|
|
result["covered"] = analysis["covered"]
|
|
result["total_required"] = analysis["total_required"]
|
|
return result
|
|
|
|
|
|
@router.get("/reports")
|
|
async def list_reports(x_tenant_id: str = Header(...)):
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
"SELECT * FROM crawler_onboarding_reports WHERE tenant_id = $1 ORDER BY created_at DESC LIMIT 20",
|
|
uuid.UUID(x_tenant_id),
|
|
)
|
|
return [dict(r) for r in rows]
|
|
|
|
|
|
@router.get("/reports/{report_id}")
|
|
async def get_report(report_id: str, x_tenant_id: str = Header(...)):
|
|
pool = await get_pool()
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow(
|
|
"SELECT * FROM crawler_onboarding_reports WHERE id = $1 AND tenant_id = $2",
|
|
uuid.UUID(report_id), uuid.UUID(x_tenant_id),
|
|
)
|
|
if not row:
|
|
raise HTTPException(404, "Report not found")
|
|
|
|
result = dict(row)
|
|
# Parse stored JSON
|
|
if isinstance(result.get("gaps"), str):
|
|
result["gaps"] = json.loads(result["gaps"])
|
|
if isinstance(result.get("classification_breakdown"), str):
|
|
result["classification_breakdown"] = json.loads(result["classification_breakdown"])
|
|
|
|
# Add computed summary
|
|
gaps = result.get("gaps", [])
|
|
result["gap_summary"] = {
|
|
"critical": sum(1 for g in gaps if g.get("severity") == "CRITICAL"),
|
|
"high": sum(1 for g in gaps if g.get("severity") == "HIGH"),
|
|
"medium": sum(1 for g in gaps if g.get("severity") == "MEDIUM"),
|
|
}
|
|
return result
|