Files
compliance-scanner-agent/compliance-agent/src/pentest/orchestrator.rs
Sharang Parnerkar c461faa2fb
All checks were successful
CI / Check (push) Has been skipped
CI / Detect Changes (push) Successful in 7s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Successful in 2s
CI / Deploy MCP (push) Successful in 2s
feat: pentest onboarding — streaming, browser automation, reports, user cleanup (#16)
Complete pentest feature overhaul: SSE streaming, session-persistent browser tool (CDP), AES-256 credential encryption, auto-screenshots in reports, code-level remediation correlation, SAST triage chunking, context window optimization, test user cleanup (Keycloak/Auth0/Okta), wizard dropdowns, attack chain improvements, architecture docs with Mermaid diagrams.

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: #16
2026-03-17 20:32:20 +00:00

707 lines
28 KiB
Rust

use std::sync::Arc;
use std::time::Duration;
use mongodb::bson::doc;
use tokio::sync::{broadcast, watch};
use compliance_core::models::dast::DastTarget;
use compliance_core::models::pentest::*;
use compliance_core::traits::pentest_tool::PentestToolContext;
use compliance_dast::ToolRegistry;
use crate::database::Database;
use crate::llm::{
ChatMessage, LlmClient, LlmResponse, ToolCallRequest, ToolCallRequestFunction, ToolDefinition,
};
/// Maximum duration for a single pentest session before timeout
const SESSION_TIMEOUT: Duration = Duration::from_secs(30 * 60); // 30 minutes
pub struct PentestOrchestrator {
pub(crate) tool_registry: ToolRegistry,
pub(crate) llm: Arc<LlmClient>,
pub(crate) db: Database,
pub(crate) event_tx: broadcast::Sender<PentestEvent>,
pub(crate) pause_rx: Option<watch::Receiver<bool>>,
}
impl PentestOrchestrator {
/// Create a new orchestrator with an externally-provided broadcast sender
/// and an optional pause receiver.
pub fn new(
llm: Arc<LlmClient>,
db: Database,
event_tx: broadcast::Sender<PentestEvent>,
pause_rx: Option<watch::Receiver<bool>>,
) -> Self {
Self {
tool_registry: ToolRegistry::new(),
llm,
db,
event_tx,
pause_rx,
}
}
/// Run a pentest session with timeout and automatic failure marking on errors.
pub async fn run_session_guarded(
&self,
session: &PentestSession,
target: &DastTarget,
initial_message: &str,
) {
let session_id = session.id;
// Use config-specified timeout or default
let timeout_duration = session
.config
.as_ref()
.and_then(|c| c.max_duration_minutes)
.map(|m| Duration::from_secs(m as u64 * 60))
.unwrap_or(SESSION_TIMEOUT);
let timeout_minutes = timeout_duration.as_secs() / 60;
match tokio::time::timeout(
timeout_duration,
self.run_session(session, target, initial_message),
)
.await
{
Ok(Ok(())) => {
tracing::info!(?session_id, "Pentest session completed successfully");
}
Ok(Err(e)) => {
tracing::error!(?session_id, error = %e, "Pentest session failed");
self.mark_session_failed(session_id, &format!("Error: {e}"))
.await;
let _ = self.event_tx.send(PentestEvent::Error {
message: format!("Session failed: {e}"),
});
}
Err(_) => {
let msg = format!("Session timed out after {timeout_minutes} minutes");
tracing::warn!(?session_id, "{msg}");
self.mark_session_failed(session_id, &msg).await;
let _ = self.event_tx.send(PentestEvent::Error { message: msg });
}
}
}
async fn mark_session_failed(
&self,
session_id: Option<mongodb::bson::oid::ObjectId>,
reason: &str,
) {
if let Some(sid) = session_id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"status": "failed",
"completed_at": mongodb::bson::DateTime::now(),
"error_message": reason,
}},
)
.await;
}
}
/// Check if the session is paused; if so, update DB status and wait until resumed.
async fn wait_if_paused(&self, session: &PentestSession) {
let Some(ref pause_rx) = self.pause_rx else {
return;
};
let mut rx = pause_rx.clone();
if !*rx.borrow() {
return;
}
// We are paused — update DB status
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(doc! { "_id": sid }, doc! { "$set": { "status": "paused" }})
.await;
}
let _ = self.event_tx.send(PentestEvent::Paused);
// Wait until unpaused
while *rx.borrow() {
if rx.changed().await.is_err() {
break;
}
}
// Resumed — update DB status back to running
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(doc! { "_id": sid }, doc! { "$set": { "status": "running" }})
.await;
}
let _ = self.event_tx.send(PentestEvent::Resumed);
}
async fn run_session(
&self,
session: &PentestSession,
target: &DastTarget,
initial_message: &str,
) -> Result<(), crate::error::AgentError> {
let session_id = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Gather code-awareness context from linked repo
let (sast_findings, sbom_entries, code_context) = self.gather_repo_context(target).await;
// Build system prompt with code context
let system_prompt = self
.build_system_prompt(
session,
target,
&sast_findings,
&sbom_entries,
&code_context,
)
.await;
// Build tool definitions for LLM
let tool_defs: Vec<ToolDefinition> = self
.tool_registry
.all_definitions()
.into_iter()
.map(|td| ToolDefinition {
name: td.name,
description: td.description,
parameters: td.input_schema,
})
.collect();
// Initialize messages
let mut messages = vec![
ChatMessage {
role: "system".to_string(),
content: Some(system_prompt),
tool_calls: None,
tool_call_id: None,
},
ChatMessage {
role: "user".to_string(),
content: Some(initial_message.to_string()),
tool_calls: None,
tool_call_id: None,
},
];
// Store user message
let user_msg = PentestMessage::user(session_id.clone(), initial_message.to_string());
let _ = self.db.pentest_messages().insert_one(&user_msg).await;
// Build tool context with real data
let tool_context = PentestToolContext {
target: target.clone(),
session_id: session_id.clone(),
sast_findings,
sbom_entries,
code_context,
rate_limit: target.rate_limit,
allow_destructive: target.allow_destructive,
};
let max_iterations = 50;
let mut total_findings = 0u32;
let mut total_tool_calls = 0u32;
let mut total_successes = 0u32;
let mut prev_node_ids: Vec<String> = Vec::new();
for _iteration in 0..max_iterations {
// Check pause state at top of each iteration
self.wait_if_paused(session).await;
let response = self
.llm
.chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192))
.await?;
match response {
LlmResponse::Content(content) => {
let msg = PentestMessage::assistant(session_id.clone(), content.clone());
let _ = self.db.pentest_messages().insert_one(&msg).await;
let _ = self.event_tx.send(PentestEvent::Message {
content: content.clone(),
});
messages.push(ChatMessage {
role: "assistant".to_string(),
content: Some(content.clone()),
tool_calls: None,
tool_call_id: None,
});
let done_indicators = [
"pentest complete",
"testing complete",
"scan complete",
"analysis complete",
"finished",
"that concludes",
];
let content_lower = content.to_lowercase();
if done_indicators
.iter()
.any(|ind| content_lower.contains(ind))
{
break;
}
break;
}
LlmResponse::ToolCalls {
calls: tool_calls,
reasoning,
} => {
let tc_requests: Vec<ToolCallRequest> = tool_calls
.iter()
.map(|tc| ToolCallRequest {
id: tc.id.clone(),
r#type: "function".to_string(),
function: ToolCallRequestFunction {
name: tc.name.clone(),
arguments: serde_json::to_string(&tc.arguments).unwrap_or_default(),
},
})
.collect();
messages.push(ChatMessage {
role: "assistant".to_string(),
content: if reasoning.is_empty() {
None
} else {
Some(reasoning.clone())
},
tool_calls: Some(tc_requests),
tool_call_id: None,
});
let mut current_batch_node_ids: Vec<String> = Vec::new();
for tc in &tool_calls {
total_tool_calls += 1;
let node_id = uuid::Uuid::new_v4().to_string();
let mut node = AttackChainNode::new(
session_id.clone(),
node_id.clone(),
tc.name.clone(),
tc.arguments.clone(),
reasoning.clone(),
);
// Link to previous iteration's nodes
node.parent_node_ids = prev_node_ids.clone();
node.status = AttackNodeStatus::Running;
node.started_at = Some(chrono::Utc::now());
let _ = self.db.attack_chain_nodes().insert_one(&node).await;
current_batch_node_ids.push(node_id.clone());
let _ = self.event_tx.send(PentestEvent::ToolStart {
node_id: node_id.clone(),
tool_name: tc.name.clone(),
input: tc.arguments.clone(),
});
let result = if let Some(tool) = self.tool_registry.get(&tc.name) {
match tool.execute(tc.arguments.clone(), &tool_context).await {
Ok(result) => {
total_successes += 1;
let findings_count = result.findings.len() as u32;
total_findings += findings_count;
let mut finding_ids: Vec<String> = Vec::new();
for mut finding in result.findings {
finding.scan_run_id = session_id.clone();
finding.session_id = Some(session_id.clone());
let insert_result =
self.db.dast_findings().insert_one(&finding).await;
if let Ok(res) = &insert_result {
finding_ids.push(
res.inserted_id
.as_object_id()
.map(|oid| oid.to_hex())
.unwrap_or_default(),
);
}
let _ = self.event_tx.send(PentestEvent::Finding {
finding_id: finding
.id
.map(|oid| oid.to_hex())
.unwrap_or_default(),
title: finding.title.clone(),
severity: finding.severity.to_string(),
});
}
// Compute risk score based on findings severity
let risk_score: Option<u8> = if findings_count > 0 {
Some(std::cmp::min(
100,
(findings_count as u8)
.saturating_mul(15)
.saturating_add(20),
))
} else {
None
};
let _ = self.event_tx.send(PentestEvent::ToolComplete {
node_id: node_id.clone(),
summary: result.summary.clone(),
findings_count,
});
let finding_ids_bson: Vec<mongodb::bson::Bson> = finding_ids
.iter()
.map(|id| mongodb::bson::Bson::String(id.clone()))
.collect();
let mut update_doc = doc! {
"status": "completed",
"tool_output": mongodb::bson::to_bson(&result.data)
.unwrap_or(mongodb::bson::Bson::Null),
"completed_at": mongodb::bson::DateTime::now(),
"findings_produced": finding_ids_bson,
};
if let Some(rs) = risk_score {
update_doc.insert("risk_score", rs as i32);
}
let _ = self
.db
.attack_chain_nodes()
.update_one(
doc! {
"session_id": &session_id,
"node_id": &node_id,
},
doc! { "$set": update_doc },
)
.await;
// Build LLM-facing summary: strip large fields
// (screenshots, raw HTML) to save context window
let llm_data = summarize_tool_output(&result.data);
serde_json::json!({
"summary": result.summary,
"findings_count": findings_count,
"data": llm_data,
})
.to_string()
}
Err(e) => {
let _ = self
.db
.attack_chain_nodes()
.update_one(
doc! {
"session_id": &session_id,
"node_id": &node_id,
},
doc! { "$set": {
"status": "failed",
"completed_at": mongodb::bson::DateTime::now(),
}},
)
.await;
format!("Tool execution failed: {e}")
}
}
} else {
format!("Unknown tool: {}", tc.name)
};
messages.push(ChatMessage {
role: "tool".to_string(),
content: Some(result),
tool_calls: None,
tool_call_id: Some(tc.id.clone()),
});
}
// Advance parent links so next iteration's nodes connect to this batch
prev_node_ids = current_batch_node_ids;
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"tool_invocations": total_tool_calls as i64,
"tool_successes": total_successes as i64,
"findings_count": total_findings as i64,
}},
)
.await;
}
}
}
}
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"status": "completed",
"completed_at": mongodb::bson::DateTime::now(),
"tool_invocations": total_tool_calls as i64,
"tool_successes": total_successes as i64,
"findings_count": total_findings as i64,
}},
)
.await;
}
// Clean up test user via identity provider API if requested
if session
.config
.as_ref()
.is_some_and(|c| c.auth.cleanup_test_user)
{
if let Some(ref test_user) = session.test_user {
let http = reqwest::Client::new();
// We need the AgentConfig — read from env since orchestrator doesn't hold it
let config = crate::config::load_config();
match config {
Ok(cfg) => {
match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
.await
{
Ok(true) => {
tracing::info!(
username = test_user.username.as_deref(),
"Test user cleaned up via provider API"
);
// Mark as cleaned up in DB
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": { "test_user.cleaned_up": true } },
)
.await;
}
}
Ok(false) => {
tracing::info!(
"Test user cleanup skipped (no provider configured)"
);
}
Err(e) => {
tracing::warn!(error = %e, "Test user cleanup failed");
let _ = self.event_tx.send(PentestEvent::Error {
message: format!("Test user cleanup failed: {e}"),
});
}
}
}
Err(e) => {
tracing::warn!(error = %e, "Could not load config for cleanup");
}
}
}
}
// Clean up the persistent browser session for this pentest
compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
let _ = self.event_tx.send(PentestEvent::Complete {
summary: format!(
"Pentest complete. {} findings from {} tool invocations.",
total_findings, total_tool_calls
),
});
Ok(())
}
}
/// Strip large fields from tool output before sending to the LLM.
/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
/// The full data is still stored in the DB for the report.
fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
let Some(obj) = data.as_object() else {
return data.clone();
};
let mut summarized = serde_json::Map::new();
for (key, value) in obj {
match key.as_str() {
// Replace screenshot base64 with a placeholder
"screenshot_base64" => {
if let Some(s) = value.as_str() {
if !s.is_empty() {
summarized.insert(
key.clone(),
serde_json::Value::String(
"[screenshot captured and saved to report]".to_string(),
),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate raw HTML content
"html" => {
if let Some(s) = value.as_str() {
if s.len() > 2000 {
summarized.insert(
key.clone(),
serde_json::Value::String(format!(
"{}... [truncated, {} chars total]",
&s[..2000],
s.len()
)),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate page text
"text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
let s = value.as_str().unwrap_or_default();
summarized.insert(
key.clone(),
serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
);
}
// Trim large arrays (e.g., "elements", "links", "inputs")
"elements" | "links" | "inputs" => {
if let Some(arr) = value.as_array() {
if arr.len() > 15 {
let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
trimmed.push(serde_json::json!(format!(
"... and {} more",
arr.len() - 15
)));
summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Recursively summarize nested objects (e.g., "page" in get_content)
_ if value.is_object() => {
summarized.insert(key.clone(), summarize_tool_output(value));
}
// Keep everything else as-is
_ => {
summarized.insert(key.clone(), value.clone());
}
}
}
serde_json::Value::Object(summarized)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_summarize_strips_screenshot() {
let input = json!({
"screenshot_base64": "iVBOR...",
"url": "https://example.com"
});
let result = summarize_tool_output(&input);
assert_eq!(
result["screenshot_base64"],
"[screenshot captured and saved to report]"
);
assert_eq!(result["url"], "https://example.com");
}
#[test]
fn test_summarize_truncates_html() {
let long_html = "x".repeat(3000);
let input = json!({ "html": long_html });
let result = summarize_tool_output(&input);
let s = result["html"].as_str().unwrap_or_default();
assert!(s.contains("[truncated, 3000 chars total]"));
assert!(s.starts_with(&"x".repeat(2000)));
assert!(s.len() < 3000);
}
#[test]
fn test_summarize_truncates_text() {
let long_text = "a".repeat(2000);
let input = json!({ "text": long_text });
let result = summarize_tool_output(&input);
let s = result["text"].as_str().unwrap_or_default();
assert!(s.contains("[truncated]"));
assert!(s.starts_with(&"a".repeat(1500)));
assert!(s.len() < 2000);
}
#[test]
fn test_summarize_trims_large_arrays() {
let elements: Vec<serde_json::Value> = (0..20).map(|i| json!(format!("el-{i}"))).collect();
let input = json!({ "elements": elements });
let result = summarize_tool_output(&input);
let arr = result["elements"].as_array();
assert!(arr.is_some());
if let Some(arr) = arr {
// 15 kept + 1 summary entry
assert_eq!(arr.len(), 16);
assert_eq!(arr[15], json!("... and 5 more"));
}
}
#[test]
fn test_summarize_preserves_small_data() {
let input = json!({
"url": "https://example.com",
"status": 200,
"title": "Example"
});
let result = summarize_tool_output(&input);
assert_eq!(result, input);
}
#[test]
fn test_summarize_recursive() {
let input = json!({
"page": {
"screenshot_base64": "iVBORw0KGgoAAAA...",
"url": "https://example.com"
}
});
let result = summarize_tool_output(&input);
assert_eq!(
result["page"]["screenshot_base64"],
"[screenshot captured and saved to report]"
);
assert_eq!(result["page"]["url"], "https://example.com");
}
#[test]
fn test_summarize_non_object() {
let string_val = json!("just a string");
assert_eq!(summarize_tool_output(&string_val), string_val);
let num_val = json!(42);
assert_eq!(summarize_tool_output(&num_val), num_val);
}
}