compliance-scanner-agent/compliance-agent/src/api/handlers/pentest_handlers/export.rs

use std::sync::Arc;

use axum::extract::{Extension, Path};
use axum::http::StatusCode;
use axum::response::IntoResponse;
use axum::Json;
use mongodb::bson::doc;
use serde::Deserialize;

use futures_util::StreamExt;

use compliance_core::models::dast::DastFinding;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::*;
use compliance_core::models::sbom::SbomEntry;
use compliance_core::tenant_ctx::TenantCtx;

use crate::agent::ComplianceAgent;

use super::super::dto::{collect_cursor_async, tenant_db};

type AgentExt = Extension<Arc<ComplianceAgent>>;

#[derive(Deserialize)]
pub struct ExportBody {
    pub password: String,
    /// Requester display name (from auth)
    #[serde(default)]
    pub requester_name: String,
    /// Requester email (from auth)
    #[serde(default)]
    pub requester_email: String,
}

/// POST /api/v1/pentest/sessions/:id/export — Export an encrypted pentest report archive
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn export_session_report(
    Extension(agent): AgentExt,
    tenant: TenantCtx,
    Path(id): Path<String>,
    Json(body): Json<ExportBody>,
) -> Result<axum::response::Response, (StatusCode, String)> {
    let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
        .map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
    let db = tenant_db(&agent, &tenant)
        .await
        .map_err(|s| (s, "failed to acquire tenant database".to_string()))?;

    if body.password.len() < 8 {
        return Err((
            StatusCode::BAD_REQUEST,
            "Password must be at least 8 characters".to_string(),
        ));
    }

    // Fetch session
    let session = db
        .pentest_sessions()
        .find_one(doc! { "_id": oid })
        .await
        .map_err(|e| {
            (
                StatusCode::INTERNAL_SERVER_ERROR,
                format!("Database error: {e}"),
            )
        })?
        .ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;

    // Resolve target name
    let target = if let Ok(tid) = mongodb::bson::oid::ObjectId::parse_str(&session.target_id) {
        db.dast_targets()
            .find_one(doc! { "_id": tid })
            .await
            .ok()
            .flatten()
    } else {
        None
    };
    let target_name = target
        .as_ref()
        .map(|t| t.name.clone())
        .unwrap_or_else(|| "Unknown Target".to_string());
    let target_url = target
        .as_ref()
        .map(|t| t.base_url.clone())
        .unwrap_or_default();

    // Fetch attack chain nodes
    let nodes: Vec<AttackChainNode> = match db
        .attack_chain_nodes()
        .find(doc! { "session_id": &id })
        .sort(doc! { "started_at": 1 })
        .await
    {
        Ok(cursor) => collect_cursor_async(cursor).await,
        Err(_) => Vec::new(),
    };

    // Fetch DAST findings for this session, then deduplicate
    let raw_findings: Vec<DastFinding> = match db
        .dast_findings()
        .find(doc! { "session_id": &id })
        .sort(doc! { "severity": -1, "created_at": -1 })
        .await
    {
        Ok(cursor) => collect_cursor_async(cursor).await,
        Err(_) => Vec::new(),
    };
    let raw_count = raw_findings.len();
    let findings = crate::pipeline::dedup::dedup_dast_findings(raw_findings);
    if findings.len() < raw_count {
        tracing::info!(
            "Deduped DAST findings for session {id}: {raw_count} → {}",
            findings.len()
        );
    }

    // Fetch SAST findings, SBOM, and code context for the linked repository
    let repo_id = session
        .repo_id
        .clone()
        .or_else(|| target.as_ref().and_then(|t| t.repo_id.clone()));

    let (sast_findings, sbom_entries, code_context) = if let Some(ref rid) = repo_id {
        let sast: Vec<Finding> = match db
            .findings()
            .find(doc! {
                "repo_id": rid,
                "status": { "$in": ["open", "triaged"] },
            })
            .sort(doc! { "severity": -1 })
            .limit(100)
            .await
        {
            Ok(mut cursor) => {
                let mut results = Vec::new();
                while let Some(Ok(f)) = cursor.next().await {
                    results.push(f);
                }
                results
            }
            Err(_) => Vec::new(),
        };

        let sbom: Vec<SbomEntry> = match db
            .sbom_entries()
            .find(doc! {
                "repo_id": rid,
                "known_vulnerabilities": { "$exists": true, "$ne": [] },
            })
            .limit(50)
            .await
        {
            Ok(mut cursor) => {
                let mut results = Vec::new();
                while let Some(Ok(e)) = cursor.next().await {
                    results.push(e);
                }
                results
            }
            Err(_) => Vec::new(),
        };

        // Build code context from graph nodes
        let code_ctx: Vec<CodeContextHint> = match db
            .graph_nodes()
            .find(doc! { "repo_id": rid, "is_entry_point": true })
            .limit(50)
            .await
        {
            Ok(mut cursor) => {
                let mut nodes_vec = Vec::new();
                while let Some(Ok(n)) = cursor.next().await {
                    let linked_vulns: Vec<String> = sast
                        .iter()
                        .filter(|f| f.file_path.as_deref() == Some(&n.file_path))
                        .map(|f| {
                            format!(
                                "[{}] {}: {} (line {})",
                                f.severity,
                                f.scanner,
                                f.title,
                                f.line_number.unwrap_or(0)
                            )
                        })
                        .collect();
                    nodes_vec.push(CodeContextHint {
                        endpoint_pattern: n.qualified_name.clone(),
                        handler_function: n.name.clone(),
                        file_path: n.file_path.clone(),
                        code_snippet: String::new(),
                        known_vulnerabilities: linked_vulns,
                    });
                }
                nodes_vec
            }
            Err(_) => Vec::new(),
        };

        (sast, sbom, code_ctx)
    } else {
        (Vec::new(), Vec::new(), Vec::new())
    };

    let config = session.config.clone();
    let ctx = crate::pentest::report::ReportContext {
        session,
        target_name,
        target_url,
        findings,
        attack_chain: nodes,
        requester_name: if body.requester_name.is_empty() {
            "Unknown".to_string()
        } else {
            body.requester_name
        },
        requester_email: body.requester_email,
        config,
        sast_findings,
        sbom_entries,
        code_context,
    };

    let report = crate::pentest::generate_encrypted_report(&ctx, &body.password)
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;

    let response = serde_json::json!({
        "archive_base64": base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &report.archive),
        "sha256": report.sha256,
        "filename": format!("pentest-report-{id}.zip"),
    });

    Ok(Json(response).into_response())
}