compliance-scanner-agent/compliance-core/src/models/pentest.rs

use std::collections::HashMap;

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};

/// Status of a pentest session
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PentestStatus {
    Running,
    Paused,
    Completed,
    Failed,
}

impl std::fmt::Display for PentestStatus {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Running => write!(f, "running"),
            Self::Paused => write!(f, "paused"),
            Self::Completed => write!(f, "completed"),
            Self::Failed => write!(f, "failed"),
        }
    }
}

/// Strategy for the AI pentest orchestrator
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PentestStrategy {
    /// Quick scan focusing on common vulnerabilities
    Quick,
    /// Standard comprehensive scan
    Comprehensive,
    /// Focus on specific vulnerability types guided by SAST/SBOM
    Targeted,
    /// Aggressive testing with more payloads and deeper exploitation
    Aggressive,
    /// Stealth mode with slower rate and fewer noisy payloads
    Stealth,
}

impl std::fmt::Display for PentestStrategy {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Quick => write!(f, "quick"),
            Self::Comprehensive => write!(f, "comprehensive"),
            Self::Targeted => write!(f, "targeted"),
            Self::Aggressive => write!(f, "aggressive"),
            Self::Stealth => write!(f, "stealth"),
        }
    }
}

/// Authentication mode for the pentest target
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AuthMode {
    #[default]
    None,
    Manual,
    AutoRegister,
}

/// Target environment classification
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Environment {
    #[default]
    Development,
    Staging,
    Production,
}

impl std::fmt::Display for Environment {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Development => write!(f, "Development"),
            Self::Staging => write!(f, "Staging"),
            Self::Production => write!(f, "Production"),
        }
    }
}

/// Tester identity for the engagement record
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TesterInfo {
    pub name: String,
    pub email: String,
}

/// Authentication configuration for the pentest session
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PentestAuthConfig {
    pub mode: AuthMode,
    pub username: Option<String>,
    pub password: Option<String>,
    /// Optional — if omitted the orchestrator uses Playwright to discover it.
    pub registration_url: Option<String>,
    /// Base email for plus-addressing (e.g. `pentest@scanner.example.com`).
    /// The orchestrator generates `base+{session_id}@domain` per session.
    pub verification_email: Option<String>,
    /// IMAP server to poll for verification emails (e.g. `imap.example.com`).
    pub imap_host: Option<String>,
    /// IMAP port (default 993 for TLS).
    pub imap_port: Option<u16>,
    /// IMAP username (defaults to `verification_email` if omitted).
    pub imap_username: Option<String>,
    /// IMAP password / app-specific password.
    pub imap_password: Option<String>,
    #[serde(default)]
    pub cleanup_test_user: bool,
}

/// Full wizard configuration for a pentest session
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestConfig {
    // Step 1: Target & Scope
    pub app_url: String,
    pub git_repo_url: Option<String>,
    pub branch: Option<String>,
    pub commit_hash: Option<String>,
    pub app_type: Option<String>,
    pub rate_limit: Option<u32>,

    // Step 2: Authentication
    #[serde(default)]
    pub auth: PentestAuthConfig,
    #[serde(default)]
    pub custom_headers: HashMap<String, String>,

    // Step 3: Strategy & Instructions
    pub strategy: Option<String>,
    #[serde(default)]
    pub allow_destructive: bool,
    pub initial_instructions: Option<String>,
    #[serde(default)]
    pub scope_exclusions: Vec<String>,

    // Step 4: Disclaimer & Confirm
    #[serde(default)]
    pub disclaimer_accepted: bool,
    pub disclaimer_accepted_at: Option<DateTime<Utc>>,
    #[serde(default)]
    pub environment: Environment,
    #[serde(default)]
    pub tester: TesterInfo,
    pub max_duration_minutes: Option<u32>,
    #[serde(default)]
    pub skip_mode: bool,
}

/// Identity provider type for cleanup routing
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum IdentityProvider {
    Keycloak,
    Auth0,
    Okta,
    Firebase,
    Custom,
}

/// Details of a test user created during a pentest session.
/// Stored so the cleanup step knows exactly what to delete and where.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TestUserRecord {
    /// Username used to register
    pub username: Option<String>,
    /// Email used to register
    pub email: Option<String>,
    /// User ID returned by the identity provider (if known)
    pub provider_user_id: Option<String>,
    /// Which identity provider holds this user
    pub provider: Option<IdentityProvider>,
    /// Whether cleanup has been completed
    #[serde(default)]
    pub cleaned_up: bool,
}

/// A pentest session initiated via the chat interface
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestSession {
    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
    pub id: Option<bson::oid::ObjectId>,
    pub target_id: String,
    /// Linked repository for code-aware testing
    pub repo_id: Option<String>,
    pub status: PentestStatus,
    pub strategy: PentestStrategy,
    /// Wizard configuration (None for legacy sessions)
    pub config: Option<PentestConfig>,
    pub created_by: Option<String>,
    /// Test user created during auto-register (for cleanup)
    #[serde(default)]
    pub test_user: Option<TestUserRecord>,
    /// Total number of tool invocations in this session
    pub tool_invocations: u32,
    /// Total successful tool invocations
    pub tool_successes: u32,
    /// Number of findings discovered
    pub findings_count: u32,
    /// Number of confirmed exploitable findings
    pub exploitable_count: u32,
    #[serde(with = "super::serde_helpers::bson_datetime")]
    pub started_at: DateTime<Utc>,
    #[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
    pub completed_at: Option<DateTime<Utc>>,
}

impl PentestSession {
    pub fn new(target_id: String, strategy: PentestStrategy) -> Self {
        Self {
            id: None,
            target_id,
            repo_id: None,
            status: PentestStatus::Running,
            strategy,
            config: None,
            created_by: None,
            test_user: None,
            tool_invocations: 0,
            tool_successes: 0,
            findings_count: 0,
            exploitable_count: 0,
            started_at: Utc::now(),
            completed_at: None,
        }
    }

    pub fn success_rate(&self) -> f64 {
        if self.tool_invocations == 0 {
            return 100.0;
        }
        (self.tool_successes as f64 / self.tool_invocations as f64) * 100.0
    }
}

/// Status of a node in the attack chain
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AttackNodeStatus {
    Pending,
    Running,
    Completed,
    Failed,
    Skipped,
}

/// A single step in the LLM-driven attack chain DAG
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttackChainNode {
    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
    pub id: Option<bson::oid::ObjectId>,
    pub session_id: String,
    /// Unique ID for DAG references
    pub node_id: String,
    /// Parent node IDs (multiple for merge nodes)
    pub parent_node_ids: Vec<String>,
    /// Tool that was invoked
    pub tool_name: String,
    /// Input parameters passed to the tool
    pub tool_input: serde_json::Value,
    /// Output from the tool
    pub tool_output: Option<serde_json::Value>,
    pub status: AttackNodeStatus,
    /// LLM's reasoning for choosing this action
    pub llm_reasoning: String,
    /// IDs of DastFindings produced by this step
    pub findings_produced: Vec<String>,
    /// Risk score (0-100) assigned by the LLM
    pub risk_score: Option<u8>,
    #[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
    pub started_at: Option<DateTime<Utc>>,
    #[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
    pub completed_at: Option<DateTime<Utc>>,
}

impl AttackChainNode {
    pub fn new(
        session_id: String,
        node_id: String,
        tool_name: String,
        tool_input: serde_json::Value,
        llm_reasoning: String,
    ) -> Self {
        Self {
            id: None,
            session_id,
            node_id,
            parent_node_ids: Vec::new(),
            tool_name,
            tool_input,
            tool_output: None,
            status: AttackNodeStatus::Pending,
            llm_reasoning,
            findings_produced: Vec::new(),
            risk_score: None,
            started_at: None,
            completed_at: None,
        }
    }
}

/// Chat message within a pentest session
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestMessage {
    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
    pub id: Option<bson::oid::ObjectId>,
    pub session_id: String,
    /// "user", "assistant", "tool_result", "system"
    pub role: String,
    pub content: String,
    /// Tool calls made by the assistant in this message
    pub tool_calls: Option<Vec<ToolCallRecord>>,
    /// Link to the attack chain node (for tool_result messages)
    pub attack_node_id: Option<String>,
    #[serde(with = "super::serde_helpers::bson_datetime")]
    pub created_at: DateTime<Utc>,
}

impl PentestMessage {
    pub fn user(session_id: String, content: String) -> Self {
        Self {
            id: None,
            session_id,
            role: "user".to_string(),
            content,
            tool_calls: None,
            attack_node_id: None,
            created_at: Utc::now(),
        }
    }

    pub fn assistant(session_id: String, content: String) -> Self {
        Self {
            id: None,
            session_id,
            role: "assistant".to_string(),
            content,
            tool_calls: None,
            attack_node_id: None,
            created_at: Utc::now(),
        }
    }

    pub fn tool_result(session_id: String, content: String, node_id: String) -> Self {
        Self {
            id: None,
            session_id,
            role: "tool_result".to_string(),
            content,
            tool_calls: None,
            attack_node_id: Some(node_id),
            created_at: Utc::now(),
        }
    }
}

/// Record of a tool call made by the LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallRecord {
    pub call_id: String,
    pub tool_name: String,
    pub arguments: serde_json::Value,
    pub result: Option<serde_json::Value>,
}

/// SSE event types for real-time pentest streaming
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum PentestEvent {
    /// LLM is thinking/reasoning
    Thinking { reasoning: String },
    /// A tool execution has started
    ToolStart {
        node_id: String,
        tool_name: String,
        input: serde_json::Value,
    },
    /// A tool execution completed
    ToolComplete {
        node_id: String,
        summary: String,
        findings_count: u32,
    },
    /// A new finding was discovered
    Finding {
        finding_id: String,
        title: String,
        severity: String,
    },
    /// Assistant message (streaming text)
    Message { content: String },
    /// Session completed
    Complete { summary: String },
    /// Error occurred
    Error { message: String },
    /// Session paused
    Paused,
    /// Session resumed
    Resumed,
}

/// Aggregated stats for the pentest dashboard
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestStats {
    pub running_sessions: u32,
    pub total_vulnerabilities: u32,
    pub total_tool_invocations: u32,
    pub tool_success_rate: f64,
    pub severity_distribution: SeverityDistribution,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeverityDistribution {
    pub critical: u32,
    pub high: u32,
    pub medium: u32,
    pub low: u32,
    pub info: u32,
}

/// Code context hint linking a discovered endpoint to source code
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeContextHint {
    /// HTTP route pattern (e.g., "GET /api/users/:id")
    pub endpoint_pattern: String,
    /// Handler function name
    pub handler_function: String,
    /// Source file path
    pub file_path: String,
    /// Relevant code snippet
    pub code_snippet: String,
    /// SAST findings associated with this code
    pub known_vulnerabilities: Vec<String>,
}