feat: AI-driven automated penetration testing system

Add a complete AI pentest system where Claude autonomously drives security
testing via tool-calling. The LLM selects from 16 tools, chains results,
and builds an attack chain DAG.

Core:
- PentestTool trait (dyn-compatible) with PentestToolContext/Result
- PentestSession, AttackChainNode, PentestMessage, PentestEvent models
- 10 new DastVulnType variants (DNS, DMARC, TLS, cookies, CSP, CORS, etc.)
- LLM client chat_with_tools() for OpenAI-compatible tool calling

Tools (16 total):
- 5 agent wrappers: SQL injection, XSS, auth bypass, SSRF, API fuzzer
- 11 new infra tools: DNS checker, DMARC checker, TLS analyzer,
  security headers, cookie analyzer, CSP analyzer, rate limit tester,
  console log detector, CORS checker, OpenAPI parser, recon
- ToolRegistry for tool lookup and LLM definition generation

Orchestrator:
- PentestOrchestrator with iterative tool-calling loop (max 50 rounds)
- Attack chain node recording per tool invocation
- SSE event broadcasting for real-time progress
- Strategy-aware system prompts (quick/comprehensive/targeted/aggressive/stealth)

API (9 endpoints):
- POST/GET /pentest/sessions, GET /pentest/sessions/:id
- POST /pentest/sessions/:id/chat, GET /pentest/sessions/:id/stream
- GET /pentest/sessions/:id/attack-chain, messages, findings
- GET /pentest/stats

Dashboard:
- Pentest dashboard with stat cards, severity distribution, session list
- Chat-based session page with split layout (chat + findings/attack chain)
- Inline tool execution indicators, auto-polling, new session modal
- Sidebar navigation item

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-11 19:23:21 +01:00
parent 76260acc76
commit 71d8741e10
40 changed files with 7546 additions and 90 deletions

View File

@@ -176,6 +176,16 @@ pub enum DastVulnType {
InformationDisclosure,
SecurityMisconfiguration,
BrokenAuth,
DnsMisconfiguration,
EmailSecurity,
TlsMisconfiguration,
CookieSecurity,
CspIssue,
CorsMisconfiguration,
RateLimitAbsent,
ConsoleLogLeakage,
SecurityHeaderMissing,
KnownCveExploit,
Other,
}
@@ -192,6 +202,16 @@ impl std::fmt::Display for DastVulnType {
Self::InformationDisclosure => write!(f, "information_disclosure"),
Self::SecurityMisconfiguration => write!(f, "security_misconfiguration"),
Self::BrokenAuth => write!(f, "broken_auth"),
Self::DnsMisconfiguration => write!(f, "dns_misconfiguration"),
Self::EmailSecurity => write!(f, "email_security"),
Self::TlsMisconfiguration => write!(f, "tls_misconfiguration"),
Self::CookieSecurity => write!(f, "cookie_security"),
Self::CspIssue => write!(f, "csp_issue"),
Self::CorsMisconfiguration => write!(f, "cors_misconfiguration"),
Self::RateLimitAbsent => write!(f, "rate_limit_absent"),
Self::ConsoleLogLeakage => write!(f, "console_log_leakage"),
Self::SecurityHeaderMissing => write!(f, "security_header_missing"),
Self::KnownCveExploit => write!(f, "known_cve_exploit"),
Self::Other => write!(f, "other"),
}
}
@@ -244,6 +264,8 @@ pub struct DastFinding {
pub remediation: Option<String>,
/// Linked SAST finding ID (if correlated)
pub linked_sast_finding_id: Option<String>,
/// Pentest session that produced this finding (if AI-driven)
pub session_id: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
}
@@ -276,6 +298,7 @@ impl DastFinding {
evidence: Vec::new(),
remediation: None,
linked_sast_finding_id: None,
session_id: None,
created_at: Utc::now(),
}
}

View File

@@ -7,6 +7,7 @@ pub mod finding;
pub mod graph;
pub mod issue;
pub mod mcp;
pub mod pentest;
pub mod repository;
pub mod sbom;
pub mod scan;
@@ -26,6 +27,11 @@ pub use graph::{
};
pub use issue::{IssueStatus, TrackerIssue, TrackerType};
pub use mcp::{McpServerConfig, McpServerStatus, McpTransport};
pub use pentest::{
AttackChainNode, AttackNodeStatus, CodeContextHint, PentestEvent, PentestMessage,
PentestSession, PentestStats, PentestStatus, PentestStrategy, SeverityDistribution,
ToolCallRecord,
};
pub use repository::{ScanTrigger, TrackedRepository};
pub use sbom::{SbomEntry, VulnRef};
pub use scan::{ScanPhase, ScanRun, ScanRunStatus, ScanType};

View File

@@ -0,0 +1,294 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
/// Status of a pentest session
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PentestStatus {
Running,
Paused,
Completed,
Failed,
}
impl std::fmt::Display for PentestStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Running => write!(f, "running"),
Self::Paused => write!(f, "paused"),
Self::Completed => write!(f, "completed"),
Self::Failed => write!(f, "failed"),
}
}
}
/// Strategy for the AI pentest orchestrator
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PentestStrategy {
/// Quick scan focusing on common vulnerabilities
Quick,
/// Standard comprehensive scan
Comprehensive,
/// Focus on specific vulnerability types guided by SAST/SBOM
Targeted,
/// Aggressive testing with more payloads and deeper exploitation
Aggressive,
/// Stealth mode with slower rate and fewer noisy payloads
Stealth,
}
impl std::fmt::Display for PentestStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Quick => write!(f, "quick"),
Self::Comprehensive => write!(f, "comprehensive"),
Self::Targeted => write!(f, "targeted"),
Self::Aggressive => write!(f, "aggressive"),
Self::Stealth => write!(f, "stealth"),
}
}
}
/// A pentest session initiated via the chat interface
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestSession {
#[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
pub id: Option<bson::oid::ObjectId>,
pub target_id: String,
/// Linked repository for code-aware testing
pub repo_id: Option<String>,
pub status: PentestStatus,
pub strategy: PentestStrategy,
pub created_by: Option<String>,
/// Total number of tool invocations in this session
pub tool_invocations: u32,
/// Total successful tool invocations
pub tool_successes: u32,
/// Number of findings discovered
pub findings_count: u32,
/// Number of confirmed exploitable findings
pub exploitable_count: u32,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub started_at: DateTime<Utc>,
#[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
pub completed_at: Option<DateTime<Utc>>,
}
impl PentestSession {
pub fn new(target_id: String, strategy: PentestStrategy) -> Self {
Self {
id: None,
target_id,
repo_id: None,
status: PentestStatus::Running,
strategy,
created_by: None,
tool_invocations: 0,
tool_successes: 0,
findings_count: 0,
exploitable_count: 0,
started_at: Utc::now(),
completed_at: None,
}
}
pub fn success_rate(&self) -> f64 {
if self.tool_invocations == 0 {
return 100.0;
}
(self.tool_successes as f64 / self.tool_invocations as f64) * 100.0
}
}
/// Status of a node in the attack chain
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AttackNodeStatus {
Pending,
Running,
Completed,
Failed,
Skipped,
}
/// A single step in the LLM-driven attack chain DAG
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttackChainNode {
#[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
pub id: Option<bson::oid::ObjectId>,
pub session_id: String,
/// Unique ID for DAG references
pub node_id: String,
/// Parent node IDs (multiple for merge nodes)
pub parent_node_ids: Vec<String>,
/// Tool that was invoked
pub tool_name: String,
/// Input parameters passed to the tool
pub tool_input: serde_json::Value,
/// Output from the tool
pub tool_output: Option<serde_json::Value>,
pub status: AttackNodeStatus,
/// LLM's reasoning for choosing this action
pub llm_reasoning: String,
/// IDs of DastFindings produced by this step
pub findings_produced: Vec<String>,
/// Risk score (0-100) assigned by the LLM
pub risk_score: Option<u8>,
#[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
pub started_at: Option<DateTime<Utc>>,
#[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
pub completed_at: Option<DateTime<Utc>>,
}
impl AttackChainNode {
pub fn new(
session_id: String,
node_id: String,
tool_name: String,
tool_input: serde_json::Value,
llm_reasoning: String,
) -> Self {
Self {
id: None,
session_id,
node_id,
parent_node_ids: Vec::new(),
tool_name,
tool_input,
tool_output: None,
status: AttackNodeStatus::Pending,
llm_reasoning,
findings_produced: Vec::new(),
risk_score: None,
started_at: None,
completed_at: None,
}
}
}
/// Chat message within a pentest session
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestMessage {
#[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
pub id: Option<bson::oid::ObjectId>,
pub session_id: String,
/// "user", "assistant", "tool_result", "system"
pub role: String,
pub content: String,
/// Tool calls made by the assistant in this message
pub tool_calls: Option<Vec<ToolCallRecord>>,
/// Link to the attack chain node (for tool_result messages)
pub attack_node_id: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
}
impl PentestMessage {
pub fn user(session_id: String, content: String) -> Self {
Self {
id: None,
session_id,
role: "user".to_string(),
content,
tool_calls: None,
attack_node_id: None,
created_at: Utc::now(),
}
}
pub fn assistant(session_id: String, content: String) -> Self {
Self {
id: None,
session_id,
role: "assistant".to_string(),
content,
tool_calls: None,
attack_node_id: None,
created_at: Utc::now(),
}
}
pub fn tool_result(session_id: String, content: String, node_id: String) -> Self {
Self {
id: None,
session_id,
role: "tool_result".to_string(),
content,
tool_calls: None,
attack_node_id: Some(node_id),
created_at: Utc::now(),
}
}
}
/// Record of a tool call made by the LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallRecord {
pub call_id: String,
pub tool_name: String,
pub arguments: serde_json::Value,
pub result: Option<serde_json::Value>,
}
/// SSE event types for real-time pentest streaming
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum PentestEvent {
/// LLM is thinking/reasoning
Thinking { reasoning: String },
/// A tool execution has started
ToolStart {
node_id: String,
tool_name: String,
input: serde_json::Value,
},
/// A tool execution completed
ToolComplete {
node_id: String,
summary: String,
findings_count: u32,
},
/// A new finding was discovered
Finding { finding_id: String, title: String, severity: String },
/// Assistant message (streaming text)
Message { content: String },
/// Session completed
Complete { summary: String },
/// Error occurred
Error { message: String },
}
/// Aggregated stats for the pentest dashboard
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestStats {
pub running_sessions: u32,
pub total_vulnerabilities: u32,
pub total_tool_invocations: u32,
pub tool_success_rate: f64,
pub severity_distribution: SeverityDistribution,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeverityDistribution {
pub critical: u32,
pub high: u32,
pub medium: u32,
pub low: u32,
pub info: u32,
}
/// Code context hint linking a discovered endpoint to source code
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeContextHint {
/// HTTP route pattern (e.g., "GET /api/users/:id")
pub endpoint_pattern: String,
/// Handler function name
pub handler_function: String,
/// Source file path
pub file_path: String,
/// Relevant code snippet
pub code_snippet: String,
/// SAST findings associated with this code
pub known_vulnerabilities: Vec<String>,
}

View File

@@ -1,9 +1,11 @@
pub mod dast_agent;
pub mod graph_builder;
pub mod issue_tracker;
pub mod pentest_tool;
pub mod scanner;
pub use dast_agent::{DastAgent, DastContext, DiscoveredEndpoint, EndpointParameter};
pub use graph_builder::{LanguageParser, ParseOutput};
pub use issue_tracker::IssueTracker;
pub use pentest_tool::{PentestTool, PentestToolContext, PentestToolResult};
pub use scanner::{ScanOutput, Scanner};

View File

@@ -0,0 +1,63 @@
use std::future::Future;
use std::pin::Pin;
use crate::error::CoreError;
use crate::models::dast::{DastFinding, DastTarget};
use crate::models::finding::Finding;
use crate::models::pentest::CodeContextHint;
use crate::models::sbom::SbomEntry;
/// Context passed to pentest tools during execution.
///
/// The HTTP client is not included here because `compliance-core` does not
/// depend on `reqwest`. Tools that need HTTP should hold their own client
/// or receive one via the `compliance-dast` orchestrator.
pub struct PentestToolContext {
/// The DAST target being tested
pub target: DastTarget,
/// Session ID for this pentest run
pub session_id: String,
/// SAST findings for the linked repo (if any)
pub sast_findings: Vec<Finding>,
/// SBOM entries with known CVEs (if any)
pub sbom_entries: Vec<SbomEntry>,
/// Code knowledge graph hints mapping endpoints to source code
pub code_context: Vec<CodeContextHint>,
/// Rate limit (requests per second)
pub rate_limit: u32,
/// Whether destructive operations are allowed
pub allow_destructive: bool,
}
/// Result from a pentest tool execution
pub struct PentestToolResult {
/// Human-readable summary of what the tool found
pub summary: String,
/// DAST findings produced by this tool
pub findings: Vec<DastFinding>,
/// Tool-specific structured output data
pub data: serde_json::Value,
}
/// A tool that the LLM pentest orchestrator can invoke.
///
/// Each tool represents a specific security testing capability
/// (e.g., SQL injection scanner, DNS checker, TLS analyzer).
/// Uses boxed futures for dyn-compatibility.
pub trait PentestTool: Send + Sync {
/// Tool name for LLM tool_use (e.g., "sql_injection_scanner")
fn name(&self) -> &str;
/// Human-readable description for the LLM system prompt
fn description(&self) -> &str;
/// JSON Schema for the tool's input parameters
fn input_schema(&self) -> serde_json::Value;
/// Execute the tool with the given input
fn execute<'a>(
&'a self,
input: serde_json::Value,
context: &'a PentestToolContext,
) -> Pin<Box<dyn Future<Output = Result<PentestToolResult, CoreError>> + Send + 'a>>;
}