"""Onboarding report + gap analysis endpoints.""" import json import uuid from fastapi import APIRouter, HTTPException, Header from pydantic import BaseModel from db import get_pool from gap_analysis.analyzer import generate_gap_analysis router = APIRouter(tags=["reports"]) class ReportGenerate(BaseModel): job_id: str | None = None company_profiles: list[str] = ["universal", "data_processor", "ai_user"] @router.post("/reports/generate", status_code=201) async def generate_report(body: ReportGenerate, x_tenant_id: str = Header(...)): pool = await get_pool() tid = uuid.UUID(x_tenant_id) async with pool.acquire() as conn: # Count documents by classification for this tenant rows = await conn.fetch( """SELECT classification, COUNT(*) as cnt FROM crawler_documents WHERE tenant_id = $1 AND classification IS NOT NULL GROUP BY classification""", tid, ) classification_counts = {r["classification"]: r["cnt"] for r in rows} total_docs = await conn.fetchval( "SELECT COUNT(*) FROM crawler_documents WHERE tenant_id = $1", tid ) # Run gap analysis analysis = generate_gap_analysis(classification_counts, body.company_profiles) # Store report async with pool.acquire() as conn: jid = uuid.UUID(body.job_id) if body.job_id else None row = await conn.fetchrow( """INSERT INTO crawler_onboarding_reports (tenant_id, job_id, total_documents_found, classification_breakdown, gaps, compliance_score) VALUES ($1, $2, $3, $4, $5, $6) RETURNING *""", tid, jid, total_docs, json.dumps(classification_counts), json.dumps(analysis["gaps"]), analysis["compliance_score"], ) result = dict(row) result["gap_summary"] = analysis["gap_summary"] result["covered"] = analysis["covered"] result["total_required"] = analysis["total_required"] return result @router.get("/reports") async def list_reports(x_tenant_id: str = Header(...)): pool = await get_pool() async with pool.acquire() as conn: rows = await conn.fetch( "SELECT * FROM crawler_onboarding_reports WHERE tenant_id = $1 ORDER BY created_at DESC LIMIT 20", uuid.UUID(x_tenant_id), ) return [dict(r) for r in rows] @router.get("/reports/{report_id}") async def get_report(report_id: str, x_tenant_id: str = Header(...)): pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow( "SELECT * FROM crawler_onboarding_reports WHERE id = $1 AND tenant_id = $2", uuid.UUID(report_id), uuid.UUID(x_tenant_id), ) if not row: raise HTTPException(404, "Report not found") result = dict(row) # Parse stored JSON if isinstance(result.get("gaps"), str): result["gaps"] = json.loads(result["gaps"]) if isinstance(result.get("classification_breakdown"), str): result["classification_breakdown"] = json.loads(result["classification_breakdown"]) # Add computed summary gaps = result.get("gaps", []) result["gap_summary"] = { "critical": sum(1 for g in gaps if g.get("severity") == "CRITICAL"), "high": sum(1 for g in gaps if g.get("severity") == "HIGH"), "medium": sum(1 for g in gaps if g.get("severity") == "MEDIUM"), } return result