feat: pentest onboarding — streaming, browser automation, reports, user cleanup (#16)

Complete pentest feature overhaul: SSE streaming, session-persistent browser tool (CDP), AES-256 credential encryption, auto-screenshots in reports, code-level remediation correlation, SAST triage chunking, context window optimization, test user cleanup (Keycloak/Auth0/Okta), wizard dropdowns, attack chain improvements, architecture docs with Mermaid diagrams. Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #16
2026-03-17 20:32:20 +00:00
parent 11e1c5f438
commit c461faa2fb
57 changed files with 8844 additions and 2423 deletions
@@ -2,7 +2,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use mongodb::bson::doc;
-use tokio::sync::broadcast;
+use tokio::sync::{broadcast, watch};

 use compliance_core::models::dast::DastTarget;
 use compliance_core::models::pentest::*;
@@ -22,29 +22,27 @@ pub struct PentestOrchestrator {
    pub(crate) llm: Arc<LlmClient>,
    pub(crate) db: Database,
    pub(crate) event_tx: broadcast::Sender<PentestEvent>,
+    pub(crate) pause_rx: Option<watch::Receiver<bool>>,
 }

 impl PentestOrchestrator {
-    pub fn new(llm: Arc<LlmClient>, db: Database) -> Self {
-        let (event_tx, _) = broadcast::channel(256);
+    /// Create a new orchestrator with an externally-provided broadcast sender
+    /// and an optional pause receiver.
+    pub fn new(
+        llm: Arc<LlmClient>,
+        db: Database,
+        event_tx: broadcast::Sender<PentestEvent>,
+        pause_rx: Option<watch::Receiver<bool>>,
+    ) -> Self {
        Self {
            tool_registry: ToolRegistry::new(),
            llm,
            db,
            event_tx,
+            pause_rx,
        }
    }

-    #[allow(dead_code)]
-    pub fn subscribe(&self) -> broadcast::Receiver<PentestEvent> {
-        self.event_tx.subscribe()
-    }
-
-    #[allow(dead_code)]
-    pub fn event_sender(&self) -> broadcast::Sender<PentestEvent> {
-        self.event_tx.clone()
-    }
-
    /// Run a pentest session with timeout and automatic failure marking on errors.
    pub async fn run_session_guarded(
        &self,
@@ -54,8 +52,18 @@ impl PentestOrchestrator {
    ) {
        let session_id = session.id;

+        // Use config-specified timeout or default
+        let timeout_duration = session
+            .config
+            .as_ref()
+            .and_then(|c| c.max_duration_minutes)
+            .map(|m| Duration::from_secs(m as u64 * 60))
+            .unwrap_or(SESSION_TIMEOUT);
+
+        let timeout_minutes = timeout_duration.as_secs() / 60;
+
        match tokio::time::timeout(
-            SESSION_TIMEOUT,
+            timeout_duration,
            self.run_session(session, target, initial_message),
        )
        .await
@@ -72,12 +80,10 @@ impl PentestOrchestrator {
                });
            }
            Err(_) => {
-                tracing::warn!(?session_id, "Pentest session timed out after 30 minutes");
-                self.mark_session_failed(session_id, "Session timed out after 30 minutes")
-                    .await;
-                let _ = self.event_tx.send(PentestEvent::Error {
-                    message: "Session timed out after 30 minutes".to_string(),
-                });
+                let msg = format!("Session timed out after {timeout_minutes} minutes");
+                tracing::warn!(?session_id, "{msg}");
+                self.mark_session_failed(session_id, &msg).await;
+                let _ = self.event_tx.send(PentestEvent::Error { message: msg });
            }
        }
    }
@@ -103,6 +109,45 @@ impl PentestOrchestrator {
        }
    }

+    /// Check if the session is paused; if so, update DB status and wait until resumed.
+    async fn wait_if_paused(&self, session: &PentestSession) {
+        let Some(ref pause_rx) = self.pause_rx else {
+            return;
+        };
+        let mut rx = pause_rx.clone();
+
+        if !*rx.borrow() {
+            return;
+        }
+
+        // We are paused — update DB status
+        if let Some(sid) = session.id {
+            let _ = self
+                .db
+                .pentest_sessions()
+                .update_one(doc! { "_id": sid }, doc! { "$set": { "status": "paused" }})
+                .await;
+        }
+        let _ = self.event_tx.send(PentestEvent::Paused);
+
+        // Wait until unpaused
+        while *rx.borrow() {
+            if rx.changed().await.is_err() {
+                break;
+            }
+        }
+
+        // Resumed — update DB status back to running
+        if let Some(sid) = session.id {
+            let _ = self
+                .db
+                .pentest_sessions()
+                .update_one(doc! { "_id": sid }, doc! { "$set": { "status": "running" }})
+                .await;
+        }
+        let _ = self.event_tx.send(PentestEvent::Resumed);
+    }
+
    async fn run_session(
        &self,
        session: &PentestSession,
@@ -175,6 +220,9 @@ impl PentestOrchestrator {
        let mut prev_node_ids: Vec<String> = Vec::new();

        for _iteration in 0..max_iterations {
+            // Check pause state at top of each iteration
+            self.wait_if_paused(session).await;
+
            let response = self
                .llm
                .chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192))
@@ -342,10 +390,13 @@ impl PentestOrchestrator {
                                        )
                                        .await;

+                                    // Build LLM-facing summary: strip large fields
+                                    // (screenshots, raw HTML) to save context window
+                                    let llm_data = summarize_tool_output(&result.data);
                                    serde_json::json!({
                                        "summary": result.summary,
                                        "findings_count": findings_count,
-                                        "data": result.data,
+                                        "data": llm_data,
                                    })
                                    .to_string()
                                }
@@ -417,6 +468,61 @@ impl PentestOrchestrator {
                .await;
        }

+        // Clean up test user via identity provider API if requested
+        if session
+            .config
+            .as_ref()
+            .is_some_and(|c| c.auth.cleanup_test_user)
+        {
+            if let Some(ref test_user) = session.test_user {
+                let http = reqwest::Client::new();
+                // We need the AgentConfig — read from env since orchestrator doesn't hold it
+                let config = crate::config::load_config();
+                match config {
+                    Ok(cfg) => {
+                        match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
+                            .await
+                        {
+                            Ok(true) => {
+                                tracing::info!(
+                                    username = test_user.username.as_deref(),
+                                    "Test user cleaned up via provider API"
+                                );
+                                // Mark as cleaned up in DB
+                                if let Some(sid) = session.id {
+                                    let _ = self
+                                        .db
+                                        .pentest_sessions()
+                                        .update_one(
+                                            doc! { "_id": sid },
+                                            doc! { "$set": { "test_user.cleaned_up": true } },
+                                        )
+                                        .await;
+                                }
+                            }
+                            Ok(false) => {
+                                tracing::info!(
+                                    "Test user cleanup skipped (no provider configured)"
+                                );
+                            }
+                            Err(e) => {
+                                tracing::warn!(error = %e, "Test user cleanup failed");
+                                let _ = self.event_tx.send(PentestEvent::Error {
+                                    message: format!("Test user cleanup failed: {e}"),
+                                });
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        tracing::warn!(error = %e, "Could not load config for cleanup");
+                    }
+                }
+            }
+        }
+
+        // Clean up the persistent browser session for this pentest
+        compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
+
        let _ = self.event_tx.send(PentestEvent::Complete {
            summary: format!(
                "Pentest complete. {} findings from {} tool invocations.",
@@ -427,3 +533,174 @@ impl PentestOrchestrator {
        Ok(())
    }
 }
+
+/// Strip large fields from tool output before sending to the LLM.
+/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
+/// The full data is still stored in the DB for the report.
+fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
+    let Some(obj) = data.as_object() else {
+        return data.clone();
+    };
+
+    let mut summarized = serde_json::Map::new();
+    for (key, value) in obj {
+        match key.as_str() {
+            // Replace screenshot base64 with a placeholder
+            "screenshot_base64" => {
+                if let Some(s) = value.as_str() {
+                    if !s.is_empty() {
+                        summarized.insert(
+                            key.clone(),
+                            serde_json::Value::String(
+                                "[screenshot captured and saved to report]".to_string(),
+                            ),
+                        );
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Truncate raw HTML content
+            "html" => {
+                if let Some(s) = value.as_str() {
+                    if s.len() > 2000 {
+                        summarized.insert(
+                            key.clone(),
+                            serde_json::Value::String(format!(
+                                "{}... [truncated, {} chars total]",
+                                &s[..2000],
+                                s.len()
+                            )),
+                        );
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Truncate page text
+            "text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
+                let s = value.as_str().unwrap_or_default();
+                summarized.insert(
+                    key.clone(),
+                    serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
+                );
+            }
+            // Trim large arrays (e.g., "elements", "links", "inputs")
+            "elements" | "links" | "inputs" => {
+                if let Some(arr) = value.as_array() {
+                    if arr.len() > 15 {
+                        let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
+                        trimmed.push(serde_json::json!(format!(
+                            "... and {} more",
+                            arr.len() - 15
+                        )));
+                        summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Recursively summarize nested objects (e.g., "page" in get_content)
+            _ if value.is_object() => {
+                summarized.insert(key.clone(), summarize_tool_output(value));
+            }
+            // Keep everything else as-is
+            _ => {
+                summarized.insert(key.clone(), value.clone());
+            }
+        }
+    }
+    serde_json::Value::Object(summarized)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn test_summarize_strips_screenshot() {
+        let input = json!({
+            "screenshot_base64": "iVBOR...",
+            "url": "https://example.com"
+        });
+        let result = summarize_tool_output(&input);
+        assert_eq!(
+            result["screenshot_base64"],
+            "[screenshot captured and saved to report]"
+        );
+        assert_eq!(result["url"], "https://example.com");
+    }
+
+    #[test]
+    fn test_summarize_truncates_html() {
+        let long_html = "x".repeat(3000);
+        let input = json!({ "html": long_html });
+        let result = summarize_tool_output(&input);
+        let s = result["html"].as_str().unwrap_or_default();
+        assert!(s.contains("[truncated, 3000 chars total]"));
+        assert!(s.starts_with(&"x".repeat(2000)));
+        assert!(s.len() < 3000);
+    }
+
+    #[test]
+    fn test_summarize_truncates_text() {
+        let long_text = "a".repeat(2000);
+        let input = json!({ "text": long_text });
+        let result = summarize_tool_output(&input);
+        let s = result["text"].as_str().unwrap_or_default();
+        assert!(s.contains("[truncated]"));
+        assert!(s.starts_with(&"a".repeat(1500)));
+        assert!(s.len() < 2000);
+    }
+
+    #[test]
+    fn test_summarize_trims_large_arrays() {
+        let elements: Vec<serde_json::Value> = (0..20).map(|i| json!(format!("el-{i}"))).collect();
+        let input = json!({ "elements": elements });
+        let result = summarize_tool_output(&input);
+        let arr = result["elements"].as_array();
+        assert!(arr.is_some());
+        if let Some(arr) = arr {
+            // 15 kept + 1 summary entry
+            assert_eq!(arr.len(), 16);
+            assert_eq!(arr[15], json!("... and 5 more"));
+        }
+    }
+
+    #[test]
+    fn test_summarize_preserves_small_data() {
+        let input = json!({
+            "url": "https://example.com",
+            "status": 200,
+            "title": "Example"
+        });
+        let result = summarize_tool_output(&input);
+        assert_eq!(result, input);
+    }
+
+    #[test]
+    fn test_summarize_recursive() {
+        let input = json!({
+            "page": {
+                "screenshot_base64": "iVBORw0KGgoAAAA...",
+                "url": "https://example.com"
+            }
+        });
+        let result = summarize_tool_output(&input);
+        assert_eq!(
+            result["page"]["screenshot_base64"],
+            "[screenshot captured and saved to report]"
+        );
+        assert_eq!(result["page"]["url"], "https://example.com");
+    }
+
+    #[test]
+    fn test_summarize_non_object() {
+        let string_val = json!("just a string");
+        assert_eq!(summarize_tool_output(&string_val), string_val);
+
+        let num_val = json!(42);
+        assert_eq!(summarize_tool_output(&num_val), num_val);
+    }
+}