breakpilot-compliance/document-crawler/api/reports.py

"""Onboarding report + gap analysis endpoints."""

import json
import uuid
from fastapi import APIRouter, HTTPException, Header
from pydantic import BaseModel

from db import get_pool
from gap_analysis.analyzer import generate_gap_analysis

router = APIRouter(tags=["reports"])


class ReportGenerate(BaseModel):
    job_id: str | None = None
    company_profiles: list[str] = ["universal", "data_processor", "ai_user"]


@router.post("/reports/generate", status_code=201)
async def generate_report(body: ReportGenerate, x_tenant_id: str = Header(...)):
    pool = await get_pool()
    tid = uuid.UUID(x_tenant_id)

    async with pool.acquire() as conn:
        # Count documents by classification for this tenant
        rows = await conn.fetch(
            """SELECT classification, COUNT(*) as cnt
               FROM crawler_documents
               WHERE tenant_id = $1 AND classification IS NOT NULL
               GROUP BY classification""",
            tid,
        )
        classification_counts = {r["classification"]: r["cnt"] for r in rows}

        total_docs = await conn.fetchval(
            "SELECT COUNT(*) FROM crawler_documents WHERE tenant_id = $1", tid
        )

    # Run gap analysis
    analysis = generate_gap_analysis(classification_counts, body.company_profiles)

    # Store report
    async with pool.acquire() as conn:
        jid = uuid.UUID(body.job_id) if body.job_id else None
        row = await conn.fetchrow(
            """INSERT INTO crawler_onboarding_reports
               (tenant_id, job_id, total_documents_found, classification_breakdown, gaps, compliance_score)
               VALUES ($1, $2, $3, $4, $5, $6)
               RETURNING *""",
            tid, jid, total_docs,
            json.dumps(classification_counts),
            json.dumps(analysis["gaps"]),
            analysis["compliance_score"],
        )

    result = dict(row)
    result["gap_summary"] = analysis["gap_summary"]
    result["covered"] = analysis["covered"]
    result["total_required"] = analysis["total_required"]
    return result


@router.get("/reports")
async def list_reports(x_tenant_id: str = Header(...)):
    pool = await get_pool()
    async with pool.acquire() as conn:
        rows = await conn.fetch(
            "SELECT * FROM crawler_onboarding_reports WHERE tenant_id = $1 ORDER BY created_at DESC LIMIT 20",
            uuid.UUID(x_tenant_id),
        )
    return [dict(r) for r in rows]


@router.get("/reports/{report_id}")
async def get_report(report_id: str, x_tenant_id: str = Header(...)):
    pool = await get_pool()
    async with pool.acquire() as conn:
        row = await conn.fetchrow(
            "SELECT * FROM crawler_onboarding_reports WHERE id = $1 AND tenant_id = $2",
            uuid.UUID(report_id), uuid.UUID(x_tenant_id),
        )
    if not row:
        raise HTTPException(404, "Report not found")

    result = dict(row)
    # Parse stored JSON
    if isinstance(result.get("gaps"), str):
        result["gaps"] = json.loads(result["gaps"])
    if isinstance(result.get("classification_breakdown"), str):
        result["classification_breakdown"] = json.loads(result["classification_breakdown"])

    # Add computed summary
    gaps = result.get("gaps", [])
    result["gap_summary"] = {
        "critical": sum(1 for g in gaps if g.get("severity") == "CRITICAL"),
        "high": sum(1 for g in gaps if g.get("severity") == "HIGH"),
        "medium": sum(1 for g in gaps if g.get("severity") == "MEDIUM"),
    }
    return result