feat: AI-driven automated penetration testing system

Add a complete AI pentest system where Claude autonomously drives security testing via tool-calling. The LLM selects from 16 tools, chains results, and builds an attack chain DAG. Core: - PentestTool trait (dyn-compatible) with PentestToolContext/Result - PentestSession, AttackChainNode, PentestMessage, PentestEvent models - 10 new DastVulnType variants (DNS, DMARC, TLS, cookies, CSP, CORS, etc.) - LLM client chat_with_tools() for OpenAI-compatible tool calling Tools (16 total): - 5 agent wrappers: SQL injection, XSS, auth bypass, SSRF, API fuzzer - 11 new infra tools: DNS checker, DMARC checker, TLS analyzer, security headers, cookie analyzer, CSP analyzer, rate limit tester, console log detector, CORS checker, OpenAPI parser, recon - ToolRegistry for tool lookup and LLM definition generation Orchestrator: - PentestOrchestrator with iterative tool-calling loop (max 50 rounds) - Attack chain node recording per tool invocation - SSE event broadcasting for real-time progress - Strategy-aware system prompts (quick/comprehensive/targeted/aggressive/stealth) API (9 endpoints): - POST/GET /pentest/sessions, GET /pentest/sessions/:id - POST /pentest/sessions/:id/chat, GET /pentest/sessions/:id/stream - GET /pentest/sessions/:id/attack-chain, messages, findings - GET /pentest/stats Dashboard: - Pentest dashboard with stat cards, severity distribution, session list - Chat-based session page with split layout (chat + findings/attack chain) - Inline tool execution indicators, auto-polling, new session modal - Sidebar navigation item Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 19:23:21 +01:00
parent 76260acc76
commit 71d8741e10
40 changed files with 7546 additions and 90 deletions
--- a/compliance-core/src/models/dast.rs
+++ b/compliance-core/src/models/dast.rs
@@ -176,6 +176,16 @@ pub enum DastVulnType {
    InformationDisclosure,
    SecurityMisconfiguration,
    BrokenAuth,
+    DnsMisconfiguration,
+    EmailSecurity,
+    TlsMisconfiguration,
+    CookieSecurity,
+    CspIssue,
+    CorsMisconfiguration,
+    RateLimitAbsent,
+    ConsoleLogLeakage,
+    SecurityHeaderMissing,
+    KnownCveExploit,
    Other,
 }

@@ -192,6 +202,16 @@ impl std::fmt::Display for DastVulnType {
            Self::InformationDisclosure => write!(f, "information_disclosure"),
            Self::SecurityMisconfiguration => write!(f, "security_misconfiguration"),
            Self::BrokenAuth => write!(f, "broken_auth"),
+            Self::DnsMisconfiguration => write!(f, "dns_misconfiguration"),
+            Self::EmailSecurity => write!(f, "email_security"),
+            Self::TlsMisconfiguration => write!(f, "tls_misconfiguration"),
+            Self::CookieSecurity => write!(f, "cookie_security"),
+            Self::CspIssue => write!(f, "csp_issue"),
+            Self::CorsMisconfiguration => write!(f, "cors_misconfiguration"),
+            Self::RateLimitAbsent => write!(f, "rate_limit_absent"),
+            Self::ConsoleLogLeakage => write!(f, "console_log_leakage"),
+            Self::SecurityHeaderMissing => write!(f, "security_header_missing"),
+            Self::KnownCveExploit => write!(f, "known_cve_exploit"),
            Self::Other => write!(f, "other"),
        }
    }
@@ -244,6 +264,8 @@ pub struct DastFinding {
    pub remediation: Option<String>,
    /// Linked SAST finding ID (if correlated)
    pub linked_sast_finding_id: Option<String>,
+    /// Pentest session that produced this finding (if AI-driven)
+    pub session_id: Option<String>,
    #[serde(with = "super::serde_helpers::bson_datetime")]
    pub created_at: DateTime<Utc>,
 }
@@ -276,6 +298,7 @@ impl DastFinding {
            evidence: Vec::new(),
            remediation: None,
            linked_sast_finding_id: None,
+            session_id: None,
            created_at: Utc::now(),
        }
    }
--- a/compliance-core/src/models/mod.rs
+++ b/compliance-core/src/models/mod.rs
@@ -7,6 +7,7 @@ pub mod finding;
 pub mod graph;
 pub mod issue;
 pub mod mcp;
+pub mod pentest;
 pub mod repository;
 pub mod sbom;
 pub mod scan;
@@ -26,6 +27,11 @@ pub use graph::{
 };
 pub use issue::{IssueStatus, TrackerIssue, TrackerType};
 pub use mcp::{McpServerConfig, McpServerStatus, McpTransport};
+pub use pentest::{
+    AttackChainNode, AttackNodeStatus, CodeContextHint, PentestEvent, PentestMessage,
+    PentestSession, PentestStats, PentestStatus, PentestStrategy, SeverityDistribution,
+    ToolCallRecord,
+};
 pub use repository::{ScanTrigger, TrackedRepository};
 pub use sbom::{SbomEntry, VulnRef};
 pub use scan::{ScanPhase, ScanRun, ScanRunStatus, ScanType};
--- a/compliance-core/src/models/pentest.rs
+++ b/compliance-core/src/models/pentest.rs
@@ -0,0 +1,294 @@
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+/// Status of a pentest session
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum PentestStatus {
+    Running,
+    Paused,
+    Completed,
+    Failed,
+}
+
+impl std::fmt::Display for PentestStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Running => write!(f, "running"),
+            Self::Paused => write!(f, "paused"),
+            Self::Completed => write!(f, "completed"),
+            Self::Failed => write!(f, "failed"),
+        }
+    }
+}
+
+/// Strategy for the AI pentest orchestrator
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum PentestStrategy {
+    /// Quick scan focusing on common vulnerabilities
+    Quick,
+    /// Standard comprehensive scan
+    Comprehensive,
+    /// Focus on specific vulnerability types guided by SAST/SBOM
+    Targeted,
+    /// Aggressive testing with more payloads and deeper exploitation
+    Aggressive,
+    /// Stealth mode with slower rate and fewer noisy payloads
+    Stealth,
+}
+
+impl std::fmt::Display for PentestStrategy {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Quick => write!(f, "quick"),
+            Self::Comprehensive => write!(f, "comprehensive"),
+            Self::Targeted => write!(f, "targeted"),
+            Self::Aggressive => write!(f, "aggressive"),
+            Self::Stealth => write!(f, "stealth"),
+        }
+    }
+}
+
+/// A pentest session initiated via the chat interface
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PentestSession {
+    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
+    pub id: Option<bson::oid::ObjectId>,
+    pub target_id: String,
+    /// Linked repository for code-aware testing
+    pub repo_id: Option<String>,
+    pub status: PentestStatus,
+    pub strategy: PentestStrategy,
+    pub created_by: Option<String>,
+    /// Total number of tool invocations in this session
+    pub tool_invocations: u32,
+    /// Total successful tool invocations
+    pub tool_successes: u32,
+    /// Number of findings discovered
+    pub findings_count: u32,
+    /// Number of confirmed exploitable findings
+    pub exploitable_count: u32,
+    #[serde(with = "super::serde_helpers::bson_datetime")]
+    pub started_at: DateTime<Utc>,
+    #[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
+    pub completed_at: Option<DateTime<Utc>>,
+}
+
+impl PentestSession {
+    pub fn new(target_id: String, strategy: PentestStrategy) -> Self {
+        Self {
+            id: None,
+            target_id,
+            repo_id: None,
+            status: PentestStatus::Running,
+            strategy,
+            created_by: None,
+            tool_invocations: 0,
+            tool_successes: 0,
+            findings_count: 0,
+            exploitable_count: 0,
+            started_at: Utc::now(),
+            completed_at: None,
+        }
+    }
+
+    pub fn success_rate(&self) -> f64 {
+        if self.tool_invocations == 0 {
+            return 100.0;
+        }
+        (self.tool_successes as f64 / self.tool_invocations as f64) * 100.0
+    }
+}
+
+/// Status of a node in the attack chain
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum AttackNodeStatus {
+    Pending,
+    Running,
+    Completed,
+    Failed,
+    Skipped,
+}
+
+/// A single step in the LLM-driven attack chain DAG
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AttackChainNode {
+    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
+    pub id: Option<bson::oid::ObjectId>,
+    pub session_id: String,
+    /// Unique ID for DAG references
+    pub node_id: String,
+    /// Parent node IDs (multiple for merge nodes)
+    pub parent_node_ids: Vec<String>,
+    /// Tool that was invoked
+    pub tool_name: String,
+    /// Input parameters passed to the tool
+    pub tool_input: serde_json::Value,
+    /// Output from the tool
+    pub tool_output: Option<serde_json::Value>,
+    pub status: AttackNodeStatus,
+    /// LLM's reasoning for choosing this action
+    pub llm_reasoning: String,
+    /// IDs of DastFindings produced by this step
+    pub findings_produced: Vec<String>,
+    /// Risk score (0-100) assigned by the LLM
+    pub risk_score: Option<u8>,
+    #[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
+    pub started_at: Option<DateTime<Utc>>,
+    #[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
+    pub completed_at: Option<DateTime<Utc>>,
+}
+
+impl AttackChainNode {
+    pub fn new(
+        session_id: String,
+        node_id: String,
+        tool_name: String,
+        tool_input: serde_json::Value,
+        llm_reasoning: String,
+    ) -> Self {
+        Self {
+            id: None,
+            session_id,
+            node_id,
+            parent_node_ids: Vec::new(),
+            tool_name,
+            tool_input,
+            tool_output: None,
+            status: AttackNodeStatus::Pending,
+            llm_reasoning,
+            findings_produced: Vec::new(),
+            risk_score: None,
+            started_at: None,
+            completed_at: None,
+        }
+    }
+}
+
+/// Chat message within a pentest session
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PentestMessage {
+    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
+    pub id: Option<bson::oid::ObjectId>,
+    pub session_id: String,
+    /// "user", "assistant", "tool_result", "system"
+    pub role: String,
+    pub content: String,
+    /// Tool calls made by the assistant in this message
+    pub tool_calls: Option<Vec<ToolCallRecord>>,
+    /// Link to the attack chain node (for tool_result messages)
+    pub attack_node_id: Option<String>,
+    #[serde(with = "super::serde_helpers::bson_datetime")]
+    pub created_at: DateTime<Utc>,
+}
+
+impl PentestMessage {
+    pub fn user(session_id: String, content: String) -> Self {
+        Self {
+            id: None,
+            session_id,
+            role: "user".to_string(),
+            content,
+            tool_calls: None,
+            attack_node_id: None,
+            created_at: Utc::now(),
+        }
+    }
+
+    pub fn assistant(session_id: String, content: String) -> Self {
+        Self {
+            id: None,
+            session_id,
+            role: "assistant".to_string(),
+            content,
+            tool_calls: None,
+            attack_node_id: None,
+            created_at: Utc::now(),
+        }
+    }
+
+    pub fn tool_result(session_id: String, content: String, node_id: String) -> Self {
+        Self {
+            id: None,
+            session_id,
+            role: "tool_result".to_string(),
+            content,
+            tool_calls: None,
+            attack_node_id: Some(node_id),
+            created_at: Utc::now(),
+        }
+    }
+}
+
+/// Record of a tool call made by the LLM
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ToolCallRecord {
+    pub call_id: String,
+    pub tool_name: String,
+    pub arguments: serde_json::Value,
+    pub result: Option<serde_json::Value>,
+}
+
+/// SSE event types for real-time pentest streaming
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum PentestEvent {
+    /// LLM is thinking/reasoning
+    Thinking { reasoning: String },
+    /// A tool execution has started
+    ToolStart {
+        node_id: String,
+        tool_name: String,
+        input: serde_json::Value,
+    },
+    /// A tool execution completed
+    ToolComplete {
+        node_id: String,
+        summary: String,
+        findings_count: u32,
+    },
+    /// A new finding was discovered
+    Finding { finding_id: String, title: String, severity: String },
+    /// Assistant message (streaming text)
+    Message { content: String },
+    /// Session completed
+    Complete { summary: String },
+    /// Error occurred
+    Error { message: String },
+}
+
+/// Aggregated stats for the pentest dashboard
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PentestStats {
+    pub running_sessions: u32,
+    pub total_vulnerabilities: u32,
+    pub total_tool_invocations: u32,
+    pub tool_success_rate: f64,
+    pub severity_distribution: SeverityDistribution,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SeverityDistribution {
+    pub critical: u32,
+    pub high: u32,
+    pub medium: u32,
+    pub low: u32,
+    pub info: u32,
+}
+
+/// Code context hint linking a discovered endpoint to source code
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CodeContextHint {
+    /// HTTP route pattern (e.g., "GET /api/users/:id")
+    pub endpoint_pattern: String,
+    /// Handler function name
+    pub handler_function: String,
+    /// Source file path
+    pub file_path: String,
+    /// Relevant code snippet
+    pub code_snippet: String,
+    /// SAST findings associated with this code
+    pub known_vulnerabilities: Vec<String>,
+}
--- a/compliance-core/src/traits/mod.rs
+++ b/compliance-core/src/traits/mod.rs
@@ -1,9 +1,11 @@
 pub mod dast_agent;
 pub mod graph_builder;
 pub mod issue_tracker;
+pub mod pentest_tool;
 pub mod scanner;

 pub use dast_agent::{DastAgent, DastContext, DiscoveredEndpoint, EndpointParameter};
 pub use graph_builder::{LanguageParser, ParseOutput};
 pub use issue_tracker::IssueTracker;
+pub use pentest_tool::{PentestTool, PentestToolContext, PentestToolResult};
 pub use scanner::{ScanOutput, Scanner};
--- a/compliance-core/src/traits/pentest_tool.rs
+++ b/compliance-core/src/traits/pentest_tool.rs
@@ -0,0 +1,63 @@
+use std::future::Future;
+use std::pin::Pin;
+
+use crate::error::CoreError;
+use crate::models::dast::{DastFinding, DastTarget};
+use crate::models::finding::Finding;
+use crate::models::pentest::CodeContextHint;
+use crate::models::sbom::SbomEntry;
+
+/// Context passed to pentest tools during execution.
+///
+/// The HTTP client is not included here because `compliance-core` does not
+/// depend on `reqwest`. Tools that need HTTP should hold their own client
+/// or receive one via the `compliance-dast` orchestrator.
+pub struct PentestToolContext {
+    /// The DAST target being tested
+    pub target: DastTarget,
+    /// Session ID for this pentest run
+    pub session_id: String,
+    /// SAST findings for the linked repo (if any)
+    pub sast_findings: Vec<Finding>,
+    /// SBOM entries with known CVEs (if any)
+    pub sbom_entries: Vec<SbomEntry>,
+    /// Code knowledge graph hints mapping endpoints to source code
+    pub code_context: Vec<CodeContextHint>,
+    /// Rate limit (requests per second)
+    pub rate_limit: u32,
+    /// Whether destructive operations are allowed
+    pub allow_destructive: bool,
+}
+
+/// Result from a pentest tool execution
+pub struct PentestToolResult {
+    /// Human-readable summary of what the tool found
+    pub summary: String,
+    /// DAST findings produced by this tool
+    pub findings: Vec<DastFinding>,
+    /// Tool-specific structured output data
+    pub data: serde_json::Value,
+}
+
+/// A tool that the LLM pentest orchestrator can invoke.
+///
+/// Each tool represents a specific security testing capability
+/// (e.g., SQL injection scanner, DNS checker, TLS analyzer).
+/// Uses boxed futures for dyn-compatibility.
+pub trait PentestTool: Send + Sync {
+    /// Tool name for LLM tool_use (e.g., "sql_injection_scanner")
+    fn name(&self) -> &str;
+
+    /// Human-readable description for the LLM system prompt
+    fn description(&self) -> &str;
+
+    /// JSON Schema for the tool's input parameters
+    fn input_schema(&self) -> serde_json::Value;
+
+    /// Execute the tool with the given input
+    fn execute<'a>(
+        &'a self,
+        input: serde_json::Value,
+        context: &'a PentestToolContext,
+    ) -> Pin<Box<dyn Future<Output = Result<PentestToolResult, CoreError>> + Send + 'a>>;
+}