feat: browser session persistence, auto-screenshots, context optimization, user cleanup

Browser tool: - Session-persistent Chrome tab (same tab reused across all calls in a pentest) - Auto-screenshot on every navigate and click (stored in attack chain for report) - Fill uses CDP Input.insertText (fixes WebSocket corruption on special chars) - Switched from browserless/chromium to chromedp/headless-shell (stable WS) Context window optimization: - Strip screenshot_base64 from LLM conversation (kept in DB for report) - Truncate HTML to 2KB, page text to 1.5KB in LLM messages - Cap element/link arrays at 15 items - SAST triage: batch 30 findings per LLM call instead of all at once Report improvements: - Auto-embed screenshots in attack chain timeline (navigate + click nodes) - Cover page shows best app screenshot - Attack chain phases capped at 8 (no more 20x "Final") User cleanup: - TestUserRecord model tracks created test users per session - cleanup.rs: Keycloak (Admin REST API), Auth0 (Management API), Okta (Users API) - Auto-cleanup on session completion when cleanup_test_user is enabled - Env vars: KEYCLOAK_ADMIN_USERNAME, KEYCLOAK_ADMIN_PASSWORD System prompt: - Explicit browser usage instructions (navigate → get_content → click → fill) - SPA auth bypass guidance (check page content, not HTTP status) - Screenshot instructions for evidence collection Other: - Pin mongo:7 in docker-compose (mongo:latest/8 segfaults on kernel 6.19) - Add deploy/docker-compose.mailserver.yml for Postfix + Dovecot Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 19:53:55 +01:00
parent a737c36bc9
commit 37690ce734
18 changed files with 1122 additions and 215 deletions
@@ -139,6 +139,61 @@ pub async fn create_session(
        let event_tx = agent.register_session_stream(&session_id_str);
        let pause_rx = agent.register_pause_control(&session_id_str);

+        // Merge server-default IMAP/email settings where wizard left blanks
+        if let Some(ref mut cfg) = session.config {
+            if cfg.auth.mode == AuthMode::AutoRegister {
+                if cfg.auth.verification_email.is_none() {
+                    cfg.auth.verification_email = agent.config.pentest_verification_email.clone();
+                }
+                if cfg.auth.imap_host.is_none() {
+                    cfg.auth.imap_host = agent.config.pentest_imap_host.clone();
+                }
+                if cfg.auth.imap_port.is_none() {
+                    cfg.auth.imap_port = agent.config.pentest_imap_port;
+                }
+                if cfg.auth.imap_username.is_none() {
+                    cfg.auth.imap_username = agent.config.pentest_imap_username.clone();
+                }
+                if cfg.auth.imap_password.is_none() {
+                    cfg.auth.imap_password = agent.config.pentest_imap_password.as_ref().map(|s| {
+                        use secrecy::ExposeSecret;
+                        s.expose_secret().to_string()
+                    });
+                }
+            }
+        }
+
+        // Pre-populate test user record for auto-register sessions
+        if let Some(ref cfg) = session.config {
+            if cfg.auth.mode == AuthMode::AutoRegister {
+                let verification_email = cfg.auth.verification_email.clone();
+                // Build plus-addressed email for this session
+                let test_email = verification_email.as_deref().map(|email| {
+                    let parts: Vec<&str> = email.splitn(2, '@').collect();
+                    if parts.len() == 2 {
+                        format!("{}+{}@{}", parts[0], session_id_str, parts[1])
+                    } else {
+                        email.to_string()
+                    }
+                });
+
+                // Detect identity provider from keycloak config
+                let provider = if agent.config.keycloak_url.is_some() {
+                    Some(compliance_core::models::pentest::IdentityProvider::Keycloak)
+                } else {
+                    None
+                };
+
+                session.test_user = Some(compliance_core::models::pentest::TestUserRecord {
+                    username: None, // LLM will choose; updated after registration
+                    email: test_email,
+                    provider_user_id: None,
+                    provider,
+                    cleaned_up: false,
+                });
+            }
+        }
+
        // Encrypt credentials before they linger in memory
        let mut session_for_task = session.clone();
        if let Some(ref mut cfg) = session_for_task.config {
@@ -49,5 +49,12 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
            .unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
        keycloak_url: env_var_opt("KEYCLOAK_URL"),
        keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
+        keycloak_admin_username: env_var_opt("KEYCLOAK_ADMIN_USERNAME"),
+        keycloak_admin_password: env_secret_opt("KEYCLOAK_ADMIN_PASSWORD"),
+        pentest_verification_email: env_var_opt("PENTEST_VERIFICATION_EMAIL"),
+        pentest_imap_host: env_var_opt("PENTEST_IMAP_HOST"),
+        pentest_imap_port: env_var_opt("PENTEST_IMAP_PORT").and_then(|p| p.parse().ok()),
+        pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
+        pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
    })
 }
@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
 use crate::llm::LlmClient;
 use crate::pipeline::orchestrator::GraphContext;

-const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
+/// Maximum number of findings to include in a single LLM triage call.
+const TRIAGE_CHUNK_SIZE: usize = 30;
+
+const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.

 Actions:
 - "confirm": The finding is a true positive at the reported severity. Keep as-is.
@@ -19,8 +22,8 @@ Consider:
 - Is the finding actionable by a developer?
 - Would a real attacker be able to exploit this?

-Respond in JSON format:
-{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
+Respond with a JSON array, one entry per finding in the same order they were presented:
+[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;

 pub async fn triage_findings(
    llm: &Arc<LlmClient>,
@@ -29,60 +32,76 @@ pub async fn triage_findings(
 ) -> usize {
    let mut passed = 0;

-    for finding in findings.iter_mut() {
-        let file_classification = classify_file_path(finding.file_path.as_deref());
+    // Process findings in chunks to avoid overflowing the LLM context window.
+    for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
+        let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
+        let chunk = &mut findings[chunk_start..chunk_end];

-        let mut user_prompt = format!(
-            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
-            finding.scanner,
-            finding.rule_id.as_deref().unwrap_or("N/A"),
-            finding.severity,
-            finding.title,
-            finding.description,
-            finding.file_path.as_deref().unwrap_or("N/A"),
-            finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
-            finding.code_snippet.as_deref().unwrap_or("N/A"),
-            file_classification,
-        );
+        // Build a combined prompt for the entire chunk.
+        let mut user_prompt = String::new();
+        let mut file_classifications: Vec<String> = Vec::new();
+
+        for (i, finding) in chunk.iter().enumerate() {
+            let file_classification = classify_file_path(finding.file_path.as_deref());

-        // Enrich with surrounding code context if possible
-        if let Some(context) = read_surrounding_context(finding) {
            user_prompt.push_str(&format!(
-                "\n\n--- Surrounding Code (50 lines) ---\n{context}"
+                "\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
+                i + 1,
+                finding.fingerprint,
+                finding.scanner,
+                finding.rule_id.as_deref().unwrap_or("N/A"),
+                finding.severity,
+                finding.title,
+                finding.description,
+                finding.file_path.as_deref().unwrap_or("N/A"),
+                finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
+                finding.code_snippet.as_deref().unwrap_or("N/A"),
+                file_classification,
            ));
-        }

-        // Enrich with graph context if available
-        if let Some(ctx) = graph_context {
-            if let Some(impact) = ctx
-                .impacts
-                .iter()
-                .find(|i| i.finding_id == finding.fingerprint)
-            {
+            // Enrich with surrounding code context if possible
+            if let Some(context) = read_surrounding_context(finding) {
                user_prompt.push_str(&format!(
-                    "\n\n--- Code Graph Context ---\n\
-                     Blast radius: {} nodes affected\n\
-                     Entry points affected: {}\n\
-                     Direct callers: {}\n\
-                     Communities affected: {}\n\
-                     Call chains: {}",
-                    impact.blast_radius,
-                    if impact.affected_entry_points.is_empty() {
-                        "none".to_string()
-                    } else {
-                        impact.affected_entry_points.join(", ")
-                    },
-                    if impact.direct_callers.is_empty() {
-                        "none".to_string()
-                    } else {
-                        impact.direct_callers.join(", ")
-                    },
-                    impact.affected_communities.len(),
-                    impact.call_chains.len(),
+                    "\n\n--- Surrounding Code (50 lines) ---\n{context}"
                ));
            }
+
+            // Enrich with graph context if available
+            if let Some(ctx) = graph_context {
+                if let Some(impact) = ctx
+                    .impacts
+                    .iter()
+                    .find(|im| im.finding_id == finding.fingerprint)
+                {
+                    user_prompt.push_str(&format!(
+                        "\n\n--- Code Graph Context ---\n\
+                         Blast radius: {} nodes affected\n\
+                         Entry points affected: {}\n\
+                         Direct callers: {}\n\
+                         Communities affected: {}\n\
+                         Call chains: {}",
+                        impact.blast_radius,
+                        if impact.affected_entry_points.is_empty() {
+                            "none".to_string()
+                        } else {
+                            impact.affected_entry_points.join(", ")
+                        },
+                        if impact.direct_callers.is_empty() {
+                            "none".to_string()
+                        } else {
+                            impact.direct_callers.join(", ")
+                        },
+                        impact.affected_communities.len(),
+                        impact.call_chains.len(),
+                    ));
+                }
+            }
+
+            user_prompt.push('\n');
+            file_classifications.push(file_classification);
        }

+        // Send the batch to the LLM.
        match llm
            .chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
            .await
@@ -98,58 +117,77 @@ pub async fn triage_findings(
                } else {
                    cleaned
                };
-                if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
-                    // Apply file-path confidence adjustment
-                    let adjusted_confidence =
-                        adjust_confidence(result.confidence, &file_classification);
-                    finding.confidence = Some(adjusted_confidence);
-                    finding.triage_action = Some(result.action.clone());
-                    finding.triage_rationale = Some(result.rationale);

-                    if let Some(remediation) = result.remediation {
-                        finding.remediation = Some(remediation);
-                    }
-
-                    match result.action.as_str() {
-                        "dismiss" => {
-                            finding.status = FindingStatus::FalsePositive;
-                        }
-                        "downgrade" => {
-                            // Downgrade severity by one level
-                            finding.severity = downgrade_severity(&finding.severity);
-                            finding.status = FindingStatus::Triaged;
-                            passed += 1;
-                        }
-                        "upgrade" => {
-                            finding.severity = upgrade_severity(&finding.severity);
-                            finding.status = FindingStatus::Triaged;
-                            passed += 1;
-                        }
-                        _ => {
-                            // "confirm" or unknown — keep as-is
-                            if adjusted_confidence >= 3.0 {
+                match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
+                    Ok(results) => {
+                        for (idx, finding) in chunk.iter_mut().enumerate() {
+                            // Match result by position; fall back to keeping the finding.
+                            let Some(result) = results.get(idx) else {
                                finding.status = FindingStatus::Triaged;
                                passed += 1;
-                            } else {
-                                finding.status = FindingStatus::FalsePositive;
+                                continue;
+                            };
+
+                            let file_classification = file_classifications
+                                .get(idx)
+                                .map(|s| s.as_str())
+                                .unwrap_or("unknown");
+
+                            let adjusted_confidence =
+                                adjust_confidence(result.confidence, file_classification);
+                            finding.confidence = Some(adjusted_confidence);
+                            finding.triage_action = Some(result.action.clone());
+                            finding.triage_rationale = Some(result.rationale.clone());
+
+                            if let Some(ref remediation) = result.remediation {
+                                finding.remediation = Some(remediation.clone());
+                            }
+
+                            match result.action.as_str() {
+                                "dismiss" => {
+                                    finding.status = FindingStatus::FalsePositive;
+                                }
+                                "downgrade" => {
+                                    finding.severity = downgrade_severity(&finding.severity);
+                                    finding.status = FindingStatus::Triaged;
+                                    passed += 1;
+                                }
+                                "upgrade" => {
+                                    finding.severity = upgrade_severity(&finding.severity);
+                                    finding.status = FindingStatus::Triaged;
+                                    passed += 1;
+                                }
+                                _ => {
+                                    // "confirm" or unknown — keep as-is
+                                    if adjusted_confidence >= 3.0 {
+                                        finding.status = FindingStatus::Triaged;
+                                        passed += 1;
+                                    } else {
+                                        finding.status = FindingStatus::FalsePositive;
+                                    }
+                                }
                            }
                        }
                    }
-                } else {
-                    // Parse failure — keep the finding
-                    finding.status = FindingStatus::Triaged;
-                    passed += 1;
-                    tracing::warn!(
-                        "Failed to parse triage response for {}: {response}",
-                        finding.fingerprint
-                    );
+                    Err(_) => {
+                        // Batch parse failure — keep all findings in the chunk.
+                        tracing::warn!(
+                            "Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
+                        );
+                        for finding in chunk.iter_mut() {
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                    }
                }
            }
            Err(e) => {
-                // On LLM error, keep the finding
-                tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
-                finding.status = FindingStatus::Triaged;
-                passed += 1;
+                // On LLM error, keep all findings in the chunk.
+                tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
+                for finding in chunk.iter_mut() {
+                    finding.status = FindingStatus::Triaged;
+                    passed += 1;
+                }
            }
        }
    }
@@ -266,6 +304,10 @@ fn upgrade_severity(

 #[derive(serde::Deserialize)]
 struct TriageResult {
+    /// Finding fingerprint echoed back by the LLM (optional).
+    #[serde(default)]
+    #[allow(dead_code)]
+    id: String,
    #[serde(default = "default_action")]
    action: String,
    #[serde(default)]
@@ -1,6 +1,6 @@
 mod agent;
 mod api;
-mod config;
+pub(crate) mod config;
 mod database;
 mod error;
 mod llm;
@@ -15,11 +15,20 @@ mod webhooks;

 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    dotenvy::dotenv().ok();
+    match dotenvy::dotenv() {
+        Ok(path) => eprintln!("[dotenv] Loaded from: {}", path.display()),
+        Err(e) => eprintln!("[dotenv] FAILED: {e}"),
+    }

    let _telemetry_guard = compliance_core::telemetry::init_telemetry("compliance-agent");

-    tracing::info!("Loading configuration...");
+    // Log critical env vars at startup
+    tracing::info!(
+        chrome_ws_url = std::env::var("CHROME_WS_URL").ok().as_deref(),
+        pentest_email = std::env::var("PENTEST_VERIFICATION_EMAIL").ok().as_deref(),
+        encryption_key_set = std::env::var("PENTEST_ENCRYPTION_KEY").is_ok(),
+        "Loading configuration..."
+    );
    let config = config::load_config()?;

    // Ensure SSH key pair exists for cloning private repos
@@ -0,0 +1,300 @@
+use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
+use compliance_core::AgentConfig;
+use secrecy::ExposeSecret;
+use tracing::{info, warn};
+
+/// Attempt to delete a test user created during a pentest session.
+///
+/// Routes to the appropriate identity provider based on `TestUserRecord.provider`.
+/// Falls back to browser-based cleanup if no API credentials are available.
+///
+/// Returns `Ok(true)` if the user was deleted, `Ok(false)` if skipped, `Err` on failure.
+pub async fn cleanup_test_user(
+    user: &TestUserRecord,
+    config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    if user.cleaned_up {
+        return Ok(false);
+    }
+
+    let provider = user.provider.as_ref();
+
+    match provider {
+        Some(IdentityProvider::Keycloak) => cleanup_keycloak(user, config, http).await,
+        Some(IdentityProvider::Auth0) => cleanup_auth0(user, config, http).await,
+        Some(IdentityProvider::Okta) => cleanup_okta(user, config, http).await,
+        Some(IdentityProvider::Firebase) => {
+            warn!("Firebase user cleanup not yet implemented");
+            Ok(false)
+        }
+        Some(IdentityProvider::Custom) | None => {
+            // For custom/unknown providers, try Keycloak if configured, else skip
+            if config.keycloak_url.is_some() && config.keycloak_admin_username.is_some() {
+                cleanup_keycloak(user, config, http).await
+            } else {
+                warn!(
+                    username = user.username.as_deref(),
+                    "No identity provider configured for cleanup — skipping"
+                );
+                Ok(false)
+            }
+        }
+    }
+}
+
+/// Delete a user from Keycloak via the Admin REST API.
+///
+/// Flow: get admin token → search user by username → delete by ID.
+async fn cleanup_keycloak(
+    user: &TestUserRecord,
+    config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    let base_url = config
+        .keycloak_url
+        .as_deref()
+        .ok_or("KEYCLOAK_URL not configured")?;
+    let realm = config
+        .keycloak_realm
+        .as_deref()
+        .ok_or("KEYCLOAK_REALM not configured")?;
+    let admin_user = config
+        .keycloak_admin_username
+        .as_deref()
+        .ok_or("KEYCLOAK_ADMIN_USERNAME not configured")?;
+    let admin_pass = config
+        .keycloak_admin_password
+        .as_ref()
+        .ok_or("KEYCLOAK_ADMIN_PASSWORD not configured")?;
+
+    let username = user
+        .username
+        .as_deref()
+        .ok_or("No username in test user record")?;
+
+    info!(username, realm, "Cleaning up Keycloak test user");
+
+    // Step 1: Get admin access token
+    let token_url = format!("{base_url}/realms/master/protocol/openid-connect/token");
+    let token_resp = http
+        .post(&token_url)
+        .form(&[
+            ("grant_type", "password"),
+            ("client_id", "admin-cli"),
+            ("username", admin_user),
+            ("password", admin_pass.expose_secret()),
+        ])
+        .send()
+        .await
+        .map_err(|e| format!("Keycloak token request failed: {e}"))?;
+
+    if !token_resp.status().is_success() {
+        let status = token_resp.status();
+        let body = token_resp.text().await.unwrap_or_default();
+        return Err(format!("Keycloak admin auth failed ({status}): {body}"));
+    }
+
+    let token_body: serde_json::Value = token_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Keycloak token: {e}"))?;
+    let access_token = token_body
+        .get("access_token")
+        .and_then(|v| v.as_str())
+        .ok_or("No access_token in Keycloak response")?;
+
+    // Step 2: Search for user by username
+    let search_url =
+        format!("{base_url}/admin/realms/{realm}/users?username={username}&exact=true");
+    let search_resp = http
+        .get(&search_url)
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Keycloak user search failed: {e}"))?;
+
+    if !search_resp.status().is_success() {
+        let status = search_resp.status();
+        let body = search_resp.text().await.unwrap_or_default();
+        return Err(format!("Keycloak user search failed ({status}): {body}"));
+    }
+
+    let users: Vec<serde_json::Value> = search_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Keycloak users: {e}"))?;
+
+    let user_id = users
+        .first()
+        .and_then(|u| u.get("id"))
+        .and_then(|v| v.as_str())
+        .ok_or_else(|| format!("User '{username}' not found in Keycloak realm '{realm}'"))?;
+
+    // Step 3: Delete the user
+    let delete_url = format!("{base_url}/admin/realms/{realm}/users/{user_id}");
+    let delete_resp = http
+        .delete(&delete_url)
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Keycloak user delete failed: {e}"))?;
+
+    if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
+        info!(username, user_id, "Keycloak test user deleted");
+        Ok(true)
+    } else {
+        let status = delete_resp.status();
+        let body = delete_resp.text().await.unwrap_or_default();
+        Err(format!("Keycloak delete failed ({status}): {body}"))
+    }
+}
+
+/// Delete a user from Auth0 via the Management API.
+///
+/// Requires `AUTH0_DOMAIN`, `AUTH0_CLIENT_ID`, `AUTH0_CLIENT_SECRET` env vars.
+async fn cleanup_auth0(
+    user: &TestUserRecord,
+    _config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    let domain = std::env::var("AUTH0_DOMAIN").map_err(|_| "AUTH0_DOMAIN not set")?;
+    let client_id = std::env::var("AUTH0_CLIENT_ID").map_err(|_| "AUTH0_CLIENT_ID not set")?;
+    let client_secret =
+        std::env::var("AUTH0_CLIENT_SECRET").map_err(|_| "AUTH0_CLIENT_SECRET not set")?;
+
+    let email = user
+        .email
+        .as_deref()
+        .ok_or("No email in test user record for Auth0 lookup")?;
+
+    info!(email, "Cleaning up Auth0 test user");
+
+    // Get management API token
+    let token_resp = http
+        .post(format!("https://{domain}/oauth/token"))
+        .json(&serde_json::json!({
+            "grant_type": "client_credentials",
+            "client_id": client_id,
+            "client_secret": client_secret,
+            "audience": format!("https://{domain}/api/v2/"),
+        }))
+        .send()
+        .await
+        .map_err(|e| format!("Auth0 token request failed: {e}"))?;
+
+    let token_body: serde_json::Value = token_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Auth0 token: {e}"))?;
+    let access_token = token_body
+        .get("access_token")
+        .and_then(|v| v.as_str())
+        .ok_or("No access_token in Auth0 response")?;
+
+    // Search for user by email
+    let encoded_email = urlencoding::encode(email);
+    let search_url = format!("https://{domain}/api/v2/users-by-email?email={encoded_email}");
+    let search_resp = http
+        .get(&search_url)
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Auth0 user search failed: {e}"))?;
+
+    let users: Vec<serde_json::Value> = search_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Auth0 users: {e}"))?;
+
+    let user_id = users
+        .first()
+        .and_then(|u| u.get("user_id"))
+        .and_then(|v| v.as_str())
+        .ok_or_else(|| format!("User with email '{email}' not found in Auth0"))?;
+
+    // Delete
+    let encoded_id = urlencoding::encode(user_id);
+    let delete_resp = http
+        .delete(format!("https://{domain}/api/v2/users/{encoded_id}"))
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Auth0 user delete failed: {e}"))?;
+
+    if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
+        info!(email, user_id, "Auth0 test user deleted");
+        Ok(true)
+    } else {
+        let status = delete_resp.status();
+        let body = delete_resp.text().await.unwrap_or_default();
+        Err(format!("Auth0 delete failed ({status}): {body}"))
+    }
+}
+
+/// Delete a user from Okta via the Users API.
+///
+/// Requires `OKTA_DOMAIN`, `OKTA_API_TOKEN` env vars.
+async fn cleanup_okta(
+    user: &TestUserRecord,
+    _config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    let domain = std::env::var("OKTA_DOMAIN").map_err(|_| "OKTA_DOMAIN not set")?;
+    let api_token = std::env::var("OKTA_API_TOKEN").map_err(|_| "OKTA_API_TOKEN not set")?;
+
+    let username = user
+        .username
+        .as_deref()
+        .or(user.email.as_deref())
+        .ok_or("No username/email in test user record for Okta lookup")?;
+
+    info!(username, "Cleaning up Okta test user");
+
+    // Search user
+    let encoded = urlencoding::encode(username);
+    let search_url = format!("https://{domain}/api/v1/users?search=profile.login+eq+\"{encoded}\"");
+    let search_resp = http
+        .get(&search_url)
+        .header("Authorization", format!("SSWS {api_token}"))
+        .send()
+        .await
+        .map_err(|e| format!("Okta user search failed: {e}"))?;
+
+    let users: Vec<serde_json::Value> = search_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Okta users: {e}"))?;
+
+    let user_id = users
+        .first()
+        .and_then(|u| u.get("id"))
+        .and_then(|v| v.as_str())
+        .ok_or_else(|| format!("User '{username}' not found in Okta"))?;
+
+    // Deactivate first (required by Okta before delete)
+    let _ = http
+        .post(format!(
+            "https://{domain}/api/v1/users/{user_id}/lifecycle/deactivate"
+        ))
+        .header("Authorization", format!("SSWS {api_token}"))
+        .send()
+        .await;
+
+    // Delete
+    let delete_resp = http
+        .delete(format!("https://{domain}/api/v1/users/{user_id}"))
+        .header("Authorization", format!("SSWS {api_token}"))
+        .send()
+        .await
+        .map_err(|e| format!("Okta user delete failed: {e}"))?;
+
+    if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
+        info!(username, user_id, "Okta test user deleted");
+        Ok(true)
+    } else {
+        let status = delete_resp.status();
+        let body = delete_resp.text().await.unwrap_or_default();
+        Err(format!("Okta delete failed ({status}): {body}"))
+    }
+}
@@ -1,3 +1,4 @@
+pub mod cleanup;
 mod context;
 pub mod crypto;
 pub mod orchestrator;
@@ -390,10 +390,13 @@ impl PentestOrchestrator {
                                        )
                                        .await;

+                                    // Build LLM-facing summary: strip large fields
+                                    // (screenshots, raw HTML) to save context window
+                                    let llm_data = summarize_tool_output(&result.data);
                                    serde_json::json!({
                                        "summary": result.summary,
                                        "findings_count": findings_count,
-                                        "data": result.data,
+                                        "data": llm_data,
                                    })
                                    .to_string()
                                }
@@ -465,21 +468,61 @@ impl PentestOrchestrator {
                .await;
        }

-        // If cleanup_test_user is requested, append a cleanup instruction
+        // Clean up test user via identity provider API if requested
        if session
            .config
            .as_ref()
            .is_some_and(|c| c.auth.cleanup_test_user)
        {
-            let cleanup_msg = PentestMessage::user(
-                session_id.clone(),
-                "Testing is complete. Now please clean up: navigate to the application and delete \
-                 the test user account that was created during this session. Confirm once done."
-                    .to_string(),
-            );
-            let _ = self.db.pentest_messages().insert_one(&cleanup_msg).await;
+            if let Some(ref test_user) = session.test_user {
+                let http = reqwest::Client::new();
+                // We need the AgentConfig — read from env since orchestrator doesn't hold it
+                let config = crate::config::load_config();
+                match config {
+                    Ok(cfg) => {
+                        match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
+                            .await
+                        {
+                            Ok(true) => {
+                                tracing::info!(
+                                    username = test_user.username.as_deref(),
+                                    "Test user cleaned up via provider API"
+                                );
+                                // Mark as cleaned up in DB
+                                if let Some(sid) = session.id {
+                                    let _ = self
+                                        .db
+                                        .pentest_sessions()
+                                        .update_one(
+                                            doc! { "_id": sid },
+                                            doc! { "$set": { "test_user.cleaned_up": true } },
+                                        )
+                                        .await;
+                                }
+                            }
+                            Ok(false) => {
+                                tracing::info!(
+                                    "Test user cleanup skipped (no provider configured)"
+                                );
+                            }
+                            Err(e) => {
+                                tracing::warn!(error = %e, "Test user cleanup failed");
+                                let _ = self.event_tx.send(PentestEvent::Error {
+                                    message: format!("Test user cleanup failed: {e}"),
+                                });
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        tracing::warn!(error = %e, "Could not load config for cleanup");
+                    }
+                }
+            }
        }

+        // Clean up the persistent browser session for this pentest
+        compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
+
        let _ = self.event_tx.send(PentestEvent::Complete {
            summary: format!(
                "Pentest complete. {} findings from {} tool invocations.",
@@ -490,3 +533,82 @@ impl PentestOrchestrator {
        Ok(())
    }
 }
+
+/// Strip large fields from tool output before sending to the LLM.
+/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
+/// The full data is still stored in the DB for the report.
+fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
+    let Some(obj) = data.as_object() else {
+        return data.clone();
+    };
+
+    let mut summarized = serde_json::Map::new();
+    for (key, value) in obj {
+        match key.as_str() {
+            // Replace screenshot base64 with a placeholder
+            "screenshot_base64" => {
+                if let Some(s) = value.as_str() {
+                    if !s.is_empty() {
+                        summarized.insert(
+                            key.clone(),
+                            serde_json::Value::String(
+                                "[screenshot captured and saved to report]".to_string(),
+                            ),
+                        );
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Truncate raw HTML content
+            "html" => {
+                if let Some(s) = value.as_str() {
+                    if s.len() > 2000 {
+                        summarized.insert(
+                            key.clone(),
+                            serde_json::Value::String(format!(
+                                "{}... [truncated, {} chars total]",
+                                &s[..2000],
+                                s.len()
+                            )),
+                        );
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Truncate page text
+            "text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
+                let s = value.as_str().unwrap_or_default();
+                summarized.insert(
+                    key.clone(),
+                    serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
+                );
+            }
+            // Trim large arrays (e.g., "elements", "links", "inputs")
+            "elements" | "links" | "inputs" => {
+                if let Some(arr) = value.as_array() {
+                    if arr.len() > 15 {
+                        let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
+                        trimmed.push(serde_json::json!(format!(
+                            "... and {} more",
+                            arr.len() - 15
+                        )));
+                        summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Recursively summarize nested objects (e.g., "page" in get_content)
+            _ if value.is_object() => {
+                summarized.insert(key.clone(), summarize_tool_output(value));
+            }
+            // Keep everything else as-is
+            _ => {
+                summarized.insert(key.clone(), value.clone());
+            }
+        }
+    }
+    serde_json::Value::Object(summarized)
+}
@@ -285,15 +285,34 @@ impl PentestOrchestrator {
 1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
 2. Run the OpenAPI parser to discover API endpoints from specs.
 3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
-4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
-5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
-6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
-7. Test rate limiting on critical endpoints (login, API).
-8. Check for console.log leakage in frontend JavaScript.
-9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
-10. When testing is complete, provide a structured summary with severity and remediation.
-11. Always explain your reasoning before invoking each tool.
-12. When done, say "Testing complete" followed by a final summary.
+4. If the target requires authentication (auto-register mode), use the browser tool to:
+   a. Navigate to the target — it will redirect to the login page.
+   b. Click the "Register" link to reach the registration form.
+   c. Fill all form fields (username, email with plus-addressing, password, name) one by one.
+   d. Click submit. If a Terms & Conditions page appears, accept it.
+   e. After registration, use the browser to navigate through the application pages.
+   f. **Take a screenshot after each major page** for evidence in the report.
+5. Use the browser tool to explore the authenticated application — navigate to each section,
+   use get_content to understand the page structure, and take screenshots.
+6. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
+7. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
+8. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
+9. Test rate limiting on critical endpoints (login, API).
+10. Check for console.log leakage in frontend JavaScript.
+11. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
+12. When testing is complete, provide a structured summary with severity and remediation.
+13. Always explain your reasoning before invoking each tool.
+14. When done, say "Testing complete" followed by a final summary.
+
+## Browser Tool Usage
+- The browser tab **persists** between calls — cookies and login state are preserved.
+- After navigate, the response includes `elements` (links, inputs, buttons on the page).
+- Use `get_content` to see forms, links, buttons, headings, and page text.
+- Use `click` with CSS selectors to interact (e.g., `a:text('Register')`, `input[type='submit']`).
+- Use `fill` with selector + value to fill form fields (e.g., `input[name='email']`).
+- **Take screenshots** (`action: screenshot`) after important actions for evidence.
+- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
+  page content with the browser tool to verify if it shows real data or a login redirect.

 ## Important
 - This is an authorized penetration test. All testing is permitted within the target scope.
@@ -149,6 +149,23 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
                )
            };

+            // Render inline screenshot if this is a browser screenshot action
+            let screenshot_html = if node.tool_name == "browser" {
+                node.tool_output
+                    .as_ref()
+                    .and_then(|out| out.get("screenshot_base64"))
+                    .and_then(|v| v.as_str())
+                    .filter(|s| !s.is_empty())
+                    .map(|b64| {
+                        format!(
+                            r#"<div class="step-screenshot"><img src="data:image/png;base64,{b64}" alt="Browser screenshot" style="max-width:100%;border:1px solid #e2e8f0;border-radius:6px;margin-top:8px;"/></div>"#
+                        )
+                    })
+                    .unwrap_or_default()
+            } else {
+                String::new()
+            };
+
            chain_html.push_str(&format!(
                r#"<div class="step-row">
                        <div class="step-num">{num}</div>
@@ -161,6 +178,7 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
                                {risk_badge}
                            </div>
                            {reasoning_html}
+                            {screenshot_html}
                        </div>
                    </div>"#,
                num = i + 1,
@@ -7,7 +7,18 @@ pub(super) fn cover(
    target_url: &str,
    requester_name: &str,
    requester_email: &str,
+    app_screenshot_b64: Option<&str>,
 ) -> String {
+    let screenshot_html = app_screenshot_b64
+        .filter(|s| !s.is_empty())
+        .map(|b64| {
+            format!(
+                r#"<div style="margin: 20px auto; max-width: 560px; border: 1px solid #cbd5e1; border-radius: 8px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.08);">
+    <img src="data:image/png;base64,{b64}" alt="Application screenshot" style="width:100%;display:block;"/>
+</div>"#
+            )
+        })
+        .unwrap_or_default();
    format!(
        r##"<!-- ═══════════════ COVER PAGE ═══════════════ -->
 <div class="cover">
@@ -42,6 +53,8 @@ pub(super) fn cover(
        <strong>Prepared for:</strong> {requester_name} ({requester_email})
    </div>

+    {screenshot_html}
+
    <div class="cover-footer">
        Compliance Scanner &mdash; AI-Powered Security Assessment Platform
    </div>
@@ -37,6 +37,50 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
        names
    };

+    // Find the best app screenshot for the cover page:
+    // prefer the first navigate to the target URL that has a screenshot,
+    // falling back to any navigate with a screenshot
+    let app_screenshot: Option<String> = ctx
+        .attack_chain
+        .iter()
+        .filter(|n| n.tool_name == "browser")
+        .filter_map(|n| {
+            n.tool_output
+                .as_ref()?
+                .get("screenshot_base64")?
+                .as_str()
+                .filter(|s| !s.is_empty())
+                .map(|s| s.to_string())
+        })
+        // Skip the Keycloak login page screenshots — prefer one that shows the actual app
+        .find(|_| {
+            ctx.attack_chain
+                .iter()
+                .filter(|n| n.tool_name == "browser")
+                .any(|n| {
+                    n.tool_output
+                        .as_ref()
+                        .and_then(|o| o.get("title"))
+                        .and_then(|t| t.as_str())
+                        .is_some_and(|t| t.contains("Compliance") || t.contains("Dashboard"))
+                })
+        })
+        .or_else(|| {
+            // Fallback: any screenshot
+            ctx.attack_chain
+                .iter()
+                .filter(|n| n.tool_name == "browser")
+                .filter_map(|n| {
+                    n.tool_output
+                        .as_ref()?
+                        .get("screenshot_base64")?
+                        .as_str()
+                        .filter(|s| !s.is_empty())
+                        .map(|s| s.to_string())
+                })
+                .next()
+        });
+
    let styles_html = styles::styles();
    let cover_html = cover::cover(
        &ctx.target_name,
@@ -45,6 +89,7 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
        &ctx.target_url,
        &ctx.requester_name,
        &ctx.requester_email,
+        app_screenshot.as_deref(),
    );
    let exec_html = executive_summary::executive_summary(
        &ctx.findings,
@@ -27,6 +27,14 @@ pub struct AgentConfig {
    pub ssh_key_path: String,
    pub keycloak_url: Option<String>,
    pub keycloak_realm: Option<String>,
+    pub keycloak_admin_username: Option<String>,
+    pub keycloak_admin_password: Option<SecretString>,
+    // Pentest defaults
+    pub pentest_verification_email: Option<String>,
+    pub pentest_imap_host: Option<String>,
+    pub pentest_imap_port: Option<u16>,
+    pub pentest_imap_username: Option<String>,
+    pub pentest_imap_password: Option<SecretString>,
 }

 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -28,9 +28,10 @@ pub use graph::{
 pub use issue::{IssueStatus, TrackerIssue, TrackerType};
 pub use mcp::{McpServerConfig, McpServerStatus, McpTransport};
 pub use pentest::{
-    AttackChainNode, AttackNodeStatus, AuthMode, CodeContextHint, Environment, PentestAuthConfig,
-    PentestConfig, PentestEvent, PentestMessage, PentestSession, PentestStats, PentestStatus,
-    PentestStrategy, SeverityDistribution, TesterInfo, ToolCallRecord,
+    AttackChainNode, AttackNodeStatus, AuthMode, CodeContextHint, Environment, IdentityProvider,
+    PentestAuthConfig, PentestConfig, PentestEvent, PentestMessage, PentestSession, PentestStats,
+    PentestStatus, PentestStrategy, SeverityDistribution, TestUserRecord, TesterInfo,
+    ToolCallRecord,
 };
 pub use repository::{ScanTrigger, TrackedRepository};
 pub use sbom::{SbomEntry, VulnRef};
@@ -150,6 +150,34 @@ pub struct PentestConfig {
    pub skip_mode: bool,
 }

+/// Identity provider type for cleanup routing
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum IdentityProvider {
+    Keycloak,
+    Auth0,
+    Okta,
+    Firebase,
+    Custom,
+}
+
+/// Details of a test user created during a pentest session.
+/// Stored so the cleanup step knows exactly what to delete and where.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct TestUserRecord {
+    /// Username used to register
+    pub username: Option<String>,
+    /// Email used to register
+    pub email: Option<String>,
+    /// User ID returned by the identity provider (if known)
+    pub provider_user_id: Option<String>,
+    /// Which identity provider holds this user
+    pub provider: Option<IdentityProvider>,
+    /// Whether cleanup has been completed
+    #[serde(default)]
+    pub cleaned_up: bool,
+}
+
 /// A pentest session initiated via the chat interface
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct PentestSession {
@@ -163,6 +191,9 @@ pub struct PentestSession {
    /// Wizard configuration (None for legacy sessions)
    pub config: Option<PentestConfig>,
    pub created_by: Option<String>,
+    /// Test user created during auto-register (for cleanup)
+    #[serde(default)]
+    pub test_user: Option<TestUserRecord>,
    /// Total number of tool invocations in this session
    pub tool_invocations: u32,
    /// Total successful tool invocations
@@ -187,6 +218,7 @@ impl PentestSession {
            strategy,
            config: None,
            created_by: None,
+            test_user: None,
            tool_invocations: 0,
            tool_successes: 0,
            findings_count: 0,
@@ -118,9 +118,12 @@ pub(crate) fn cat_label(cat: &str) -> &'static str {
    }
 }

-/// Phase name heuristic based on depth
-pub(crate) fn phase_name(depth: usize) -> &'static str {
-    match depth {
+/// Maximum number of display phases — deeper iterations are merged into the last.
+const MAX_PHASES: usize = 8;
+
+/// Phase name heuristic based on phase index (not raw BFS depth)
+pub(crate) fn phase_name(phase_idx: usize) -> &'static str {
+    match phase_idx {
        0 => "Reconnaissance",
        1 => "Analysis",
        2 => "Boundary Testing",
@@ -133,8 +136,8 @@ pub(crate) fn phase_name(depth: usize) -> &'static str {
 }

 /// Short label for phase rail
-pub(crate) fn phase_short_name(depth: usize) -> &'static str {
-    match depth {
+pub(crate) fn phase_short_name(phase_idx: usize) -> &'static str {
+    match phase_idx {
        0 => "Recon",
        1 => "Analysis",
        2 => "Boundary",
@@ -214,7 +217,14 @@ pub(crate) fn compute_phases(steps: &[serde_json::Value]) -> Vec<Vec<usize>> {
        }
    }

-    // Group by depth
+    // Cap depths at MAX_PHASES - 1 so deeper iterations merge into the last phase
+    for d in depths.iter_mut() {
+        if *d >= MAX_PHASES {
+            *d = MAX_PHASES - 1;
+        }
+    }
+
+    // Group by (capped) depth
    let max_depth = depths.iter().copied().max().unwrap_or(0);
    let mut phases: Vec<Vec<usize>> = Vec::new();
    for d in 0..=max_depth {
@@ -1,4 +1,6 @@
+use std::collections::HashMap;
 use std::pin::Pin;
+use std::sync::Arc;
 use std::time::Duration;

 use base64::Engine;
@@ -6,17 +8,26 @@ use compliance_core::error::CoreError;
 use compliance_core::traits::pentest_tool::{PentestTool, PentestToolContext, PentestToolResult};
 use futures_util::{SinkExt, StreamExt};
 use serde_json::json;
+use tokio::sync::Mutex;
 use tokio_tungstenite::tungstenite::Message;
 use tracing::info;

 type WsStream =
    tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;

+/// Global pool of persistent browser sessions keyed by pentest session ID.
+/// Each pentest session gets one Chrome tab that stays alive across tool calls.
+static BROWSER_SESSIONS: std::sync::LazyLock<Arc<Mutex<HashMap<String, BrowserSession>>>> =
+    std::sync::LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
+
 /// A browser automation tool that exposes headless Chrome actions to the LLM
-/// via the Chrome DevTools Protocol. Reuses the same `CHROME_WS_URL` used for
-/// PDF generation.
+/// via the Chrome DevTools Protocol.
 ///
-/// Supported actions: navigate, screenshot, click, fill, get_content, evaluate.
+/// **Session-persistent**: the same Chrome tab is reused across all invocations
+/// within a pentest session, so cookies, auth state, and page context are
+/// preserved between navigate → click → fill → screenshot calls.
+///
+/// Supported actions: navigate, screenshot, click, fill, get_content, evaluate, close.
 pub struct BrowserTool;

 impl Default for BrowserTool {
@@ -31,11 +42,13 @@ impl PentestTool for BrowserTool {
    }

    fn description(&self) -> &str {
-        "Headless browser automation via Chrome DevTools Protocol. \
-         Supports navigating to URLs, taking screenshots, clicking elements, \
+        "Headless browser automation via Chrome DevTools Protocol. The browser tab persists \
+         across calls within the same pentest session — cookies, login state, and page context \
+         are preserved. Supports navigating to URLs, taking screenshots, clicking elements, \
         filling form fields, reading page content, and evaluating JavaScript. \
-         Use CSS selectors to target elements. Useful for discovering registration pages, \
-         filling out forms, extracting verification links, and visual inspection."
+         Use CSS selectors to target elements. After navigating, use get_content to read the \
+         page HTML and find elements to click or fill. Use this to discover registration pages, \
+         fill out signup forms, complete email verification, and test authenticated flows."
    }

    fn input_schema(&self) -> serde_json::Value {
@@ -44,8 +57,8 @@ impl PentestTool for BrowserTool {
            "properties": {
                "action": {
                    "type": "string",
-                    "enum": ["navigate", "screenshot", "click", "fill", "get_content", "evaluate"],
-                    "description": "Action to perform"
+                    "enum": ["navigate", "screenshot", "click", "fill", "get_content", "evaluate", "close"],
+                    "description": "Action to perform. The browser tab persists between calls — use navigate first, then get_content to see the page, then click/fill to interact."
                },
                "url": {
                    "type": "string",
@@ -53,7 +66,7 @@ impl PentestTool for BrowserTool {
                },
                "selector": {
                    "type": "string",
-                    "description": "CSS selector for click/fill actions"
+                    "description": "CSS selector for click/fill actions (e.g. '#username', 'a[href*=register]', 'button[type=submit]')"
                },
                "value": {
                    "type": "string",
@@ -61,7 +74,7 @@ impl PentestTool for BrowserTool {
                },
                "wait_ms": {
                    "type": "integer",
-                    "description": "Milliseconds to wait after action (default: 500)"
+                    "description": "Milliseconds to wait after action (default: 1000)"
                }
            },
            "required": ["action"]
@@ -71,7 +84,7 @@ impl PentestTool for BrowserTool {
    fn execute<'a>(
        &'a self,
        input: serde_json::Value,
-        _context: &'a PentestToolContext,
+        context: &'a PentestToolContext,
    ) -> Pin<Box<dyn std::future::Future<Output = Result<PentestToolResult, CoreError>> + Send + 'a>>
    {
        Box::pin(async move {
@@ -79,11 +92,42 @@ impl PentestTool for BrowserTool {
            let url = input.get("url").and_then(|v| v.as_str()).unwrap_or("");
            let selector = input.get("selector").and_then(|v| v.as_str()).unwrap_or("");
            let value = input.get("value").and_then(|v| v.as_str()).unwrap_or("");
-            let wait_ms = input.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(500);
+            let wait_ms = input
+                .get("wait_ms")
+                .and_then(|v| v.as_u64())
+                .unwrap_or(1000);
+            let session_key = context.session_id.clone();

-            let mut session = BrowserSession::connect()
-                .await
-                .map_err(|e| CoreError::Other(format!("Browser connect failed: {e}")))?;
+            // Handle close action — tear down the persistent session
+            if action == "close" {
+                let mut pool = BROWSER_SESSIONS.lock().await;
+                if let Some(mut sess) = pool.remove(&session_key) {
+                    let _ = sess.close().await;
+                }
+                return Ok(PentestToolResult {
+                    summary: "Browser session closed".to_string(),
+                    findings: Vec::new(),
+                    data: json!({ "closed": true }),
+                });
+            }
+
+            // Get or create persistent session for this pentest
+            let mut pool = BROWSER_SESSIONS.lock().await;
+            if !pool.contains_key(&session_key) {
+                match BrowserSession::connect().await {
+                    Ok(sess) => {
+                        pool.insert(session_key.clone(), sess);
+                    }
+                    Err(e) => {
+                        return Err(CoreError::Other(format!("Browser connect failed: {e}")));
+                    }
+                }
+            }
+
+            let session = pool.get_mut(&session_key);
+            let Some(session) = session else {
+                return Err(CoreError::Other("Browser session not found".to_string()));
+            };

            let result = match action {
                "navigate" => session.navigate(url, wait_ms).await,
@@ -95,8 +139,15 @@ impl PentestTool for BrowserTool {
                _ => Err(format!("Unknown browser action: {action}")),
            };

-            // Always try to clean up
-            let _ = session.close().await;
+            // If the session errored, remove it so the next call creates a fresh one
+            if result.is_err() {
+                if let Some(mut dead) = pool.remove(&session_key) {
+                    let _ = dead.close().await;
+                }
+            }
+
+            // Release the lock before building the response
+            drop(pool);

            match result {
                Ok(data) => {
@@ -214,7 +265,7 @@ impl BrowserSession {
    }

    async fn navigate(&mut self, url: &str, wait_ms: u64) -> Result<serde_json::Value, String> {
-        let resp = cdp_send_session(
+        cdp_send_session(
            &mut self.ws,
            self.next_id,
            &self.session_id,
@@ -226,19 +277,44 @@ impl BrowserSession {

        tokio::time::sleep(Duration::from_millis(wait_ms)).await;

-        // Get page title
-        let title_resp = self.evaluate_raw("document.title").await?;
-        let page_url_resp = self.evaluate_raw("window.location.href").await?;
+        // Get page title and current URL (may have redirected)
+        let title = self
+            .evaluate_raw("document.title")
+            .await
+            .unwrap_or_default();
+        let page_url = self
+            .evaluate_raw("window.location.href")
+            .await
+            .unwrap_or_default();
+
+        // Auto-get a summary of interactive elements on the page
+        let links_js = r#"(function(){
+            var items = [];
+            document.querySelectorAll('a[href]').forEach(function(a, i) {
+                if (i < 20) items.push({tag:'a', text:a.textContent.trim().substring(0,60), href:a.href});
+            });
+            document.querySelectorAll('input,select,textarea,button[type=submit]').forEach(function(el, i) {
+                if (i < 20) items.push({tag:el.tagName.toLowerCase(), type:el.type||'', name:el.name||'', id:el.id||'', placeholder:el.placeholder||''});
+            });
+            return JSON.stringify(items);
+        })()"#;
+        let elements_json = self.evaluate_raw(links_js).await.unwrap_or_default();
+        let elements: serde_json::Value = serde_json::from_str(&elements_json).unwrap_or(json!([]));
+
+        // Auto-capture screenshot after every navigation
+        let screenshot_b64 = self.capture_screenshot_b64().await.unwrap_or_default();

        Ok(json!({
            "navigated": true,
-            "url": page_url_resp,
-            "title": title_resp,
-            "frame_id": resp.get("result").and_then(|r| r.get("frameId")),
+            "url": page_url,
+            "title": title,
+            "elements": elements,
+            "screenshot_base64": screenshot_b64,
        }))
    }

-    async fn screenshot(&mut self) -> Result<serde_json::Value, String> {
+    /// Capture a screenshot and return the base64 string (empty on failure).
+    async fn capture_screenshot_b64(&mut self) -> Result<String, String> {
        let resp = cdp_send_session(
            &mut self.ws,
            self.next_id,
@@ -249,14 +325,19 @@ impl BrowserSession {
        .await?;
        self.next_id += 1;

-        let b64 = resp
+        Ok(resp
            .get("result")
            .and_then(|r| r.get("data"))
            .and_then(|d| d.as_str())
-            .unwrap_or("");
+            .unwrap_or("")
+            .to_string())
+    }
+
+    async fn screenshot(&mut self) -> Result<serde_json::Value, String> {
+        let b64 = self.capture_screenshot_b64().await?;

        let size_kb = base64::engine::general_purpose::STANDARD
-            .decode(b64)
+            .decode(&b64)
            .map(|b| b.len() / 1024)
            .unwrap_or(0);

@@ -267,7 +348,6 @@ impl BrowserSession {
    }

    async fn click(&mut self, selector: &str, wait_ms: u64) -> Result<serde_json::Value, String> {
-        // Use JS to find element and get its bounding box, then click
        let js = format!(
            r#"(function() {{
                var el = document.querySelector({sel});
@@ -289,9 +369,29 @@ impl BrowserSession {
        let result = self.evaluate_raw(&js).await?;
        tokio::time::sleep(Duration::from_millis(wait_ms)).await;

-        serde_json::from_str::<serde_json::Value>(&result)
-            .unwrap_or_else(|_| json!({ "result": result }));
-        Ok(serde_json::from_str(&result).unwrap_or(json!({ "result": result })))
+        // After click, get current URL (may have navigated)
+        let current_url = self
+            .evaluate_raw("window.location.href")
+            .await
+            .unwrap_or_default();
+        let title = self
+            .evaluate_raw("document.title")
+            .await
+            .unwrap_or_default();
+
+        // Auto-capture screenshot after click
+        let screenshot_b64 = self.capture_screenshot_b64().await.unwrap_or_default();
+
+        let mut click_result: serde_json::Value =
+            serde_json::from_str(&result).unwrap_or(json!({ "result": result }));
+        if let Some(obj) = click_result.as_object_mut() {
+            obj.insert("current_url".to_string(), json!(current_url));
+            obj.insert("page_title".to_string(), json!(title));
+            if !screenshot_b64.is_empty() {
+                obj.insert("screenshot_base64".to_string(), json!(screenshot_b64));
+            }
+        }
+        Ok(click_result)
    }

    async fn fill(
@@ -300,62 +400,83 @@ impl BrowserSession {
        value: &str,
        wait_ms: u64,
    ) -> Result<serde_json::Value, String> {
-        let js = format!(
-            r#"(function() {{
-                var el = document.querySelector({sel});
-                if (!el) return JSON.stringify({{error: "Element not found: {raw}"}});
-                el.focus();
-                el.value = {val};
-                el.dispatchEvent(new Event('input', {{bubbles: true}}));
-                el.dispatchEvent(new Event('change', {{bubbles: true}}));
-                return JSON.stringify({{filled: true, tag: el.tagName, selector: {sel}}});
-            }})()"#,
+        // Step 1: Focus the element via JS
+        let focus_js = format!(
+            "(function(){{var e=document.querySelector({sel});\
+             if(!e)return 'notfound';e.focus();e.select();return 'ok'}})()",
            sel = serde_json::to_string(selector).unwrap_or_default(),
-            raw = selector.replace('"', r#"\""#),
-            val = serde_json::to_string(value).unwrap_or_default(),
        );
+        let found = self.evaluate_raw(&focus_js).await?;
+        if found == "notfound" {
+            return Ok(json!({ "error": format!("Element not found: {selector}") }));
+        }
+
+        // Step 2: Clear existing content with Select All + Delete
+        cdp_send_session(
+            &mut self.ws,
+            self.next_id,
+            &self.session_id,
+            "Input.dispatchKeyEvent",
+            json!({"type": "keyDown", "key": "a", "code": "KeyA", "modifiers": 2}),
+        )
+        .await?;
+        self.next_id += 1;
+        cdp_send_session(
+            &mut self.ws,
+            self.next_id,
+            &self.session_id,
+            "Input.dispatchKeyEvent",
+            json!({"type": "keyUp", "key": "a", "code": "KeyA", "modifiers": 2}),
+        )
+        .await?;
+        self.next_id += 1;
+        cdp_send_session(
+            &mut self.ws,
+            self.next_id,
+            &self.session_id,
+            "Input.dispatchKeyEvent",
+            json!({"type": "keyDown", "key": "Backspace", "code": "Backspace"}),
+        )
+        .await?;
+        self.next_id += 1;
+        cdp_send_session(
+            &mut self.ws,
+            self.next_id,
+            &self.session_id,
+            "Input.dispatchKeyEvent",
+            json!({"type": "keyUp", "key": "Backspace", "code": "Backspace"}),
+        )
+        .await?;
+        self.next_id += 1;
+
+        // Step 3: Insert the text using Input.insertText (single CDP command, no JS eval)
+        cdp_send_session(
+            &mut self.ws,
+            self.next_id,
+            &self.session_id,
+            "Input.insertText",
+            json!({"text": value}),
+        )
+        .await?;
+        self.next_id += 1;
+
+        // Step 4: Verify the value was set
+        let verify_js = format!(
+            "(function(){{var e=document.querySelector({sel});return e?e.value:''}})()",
+            sel = serde_json::to_string(selector).unwrap_or_default(),
+        );
+        let final_value = self.evaluate_raw(&verify_js).await.unwrap_or_default();

-        let result = self.evaluate_raw(&js).await?;
        tokio::time::sleep(Duration::from_millis(wait_ms)).await;

-        Ok(serde_json::from_str(&result).unwrap_or(json!({ "result": result })))
+        Ok(json!({
+            "filled": true,
+            "selector": selector,
+            "value": final_value,
+        }))
    }

    async fn get_content(&mut self) -> Result<serde_json::Value, String> {
-        let resp = cdp_send_session(
-            &mut self.ws,
-            self.next_id,
-            &self.session_id,
-            "DOM.getDocument",
-            json!({ "depth": 0 }),
-        )
-        .await?;
-        self.next_id += 1;
-
-        let root_id = resp
-            .get("result")
-            .and_then(|r| r.get("root"))
-            .and_then(|n| n.get("nodeId"))
-            .and_then(|n| n.as_i64())
-            .unwrap_or(1);
-
-        let html_resp = cdp_send_session(
-            &mut self.ws,
-            self.next_id,
-            &self.session_id,
-            "DOM.getOuterHTML",
-            json!({ "nodeId": root_id }),
-        )
-        .await?;
-        self.next_id += 1;
-
-        let html = html_resp
-            .get("result")
-            .and_then(|r| r.get("outerHTML"))
-            .and_then(|h| h.as_str())
-            .unwrap_or("");
-
-        // Also get page title and URL for context
        let title = self
            .evaluate_raw("document.title")
            .await
@@ -365,22 +486,55 @@ impl BrowserSession {
            .await
            .unwrap_or_default();

-        // Truncate HTML to avoid massive payloads to the LLM
-        let truncated = if html.len() > 50_000 {
-            format!(
-                "{}... [truncated, {} total chars]",
-                &html[..50_000],
-                html.len()
-            )
-        } else {
-            html.to_string()
-        };
+        // Get a structured summary instead of raw HTML (more useful for LLM)
+        let summary_js = r#"(function(){
+            var result = {forms:[], links:[], inputs:[], buttons:[], headings:[], text:''};
+
+            // Forms
+            document.querySelectorAll('form').forEach(function(f,i){
+                if(i<10) result.forms.push({action:f.action, method:f.method, id:f.id});
+            });
+
+            // Links
+            document.querySelectorAll('a[href]').forEach(function(a,i){
+                if(i<30) result.links.push({text:a.textContent.trim().substring(0,80), href:a.href});
+            });
+
+            // Inputs
+            document.querySelectorAll('input,select,textarea').forEach(function(el,i){
+                if(i<30) result.inputs.push({
+                    tag:el.tagName.toLowerCase(),
+                    type:el.type||'',
+                    name:el.name||'',
+                    id:el.id||'',
+                    placeholder:el.placeholder||'',
+                    value:el.type==='password'?'***':el.value.substring(0,50)
+                });
+            });
+
+            // Buttons
+            document.querySelectorAll('button,[type=submit],[role=button]').forEach(function(b,i){
+                if(i<20) result.buttons.push({text:b.textContent.trim().substring(0,60), type:b.type||'', id:b.id||''});
+            });
+
+            // Headings
+            document.querySelectorAll('h1,h2,h3').forEach(function(h,i){
+                if(i<10) result.headings.push(h.textContent.trim().substring(0,100));
+            });
+
+            // Page text (truncated)
+            result.text = document.body ? document.body.innerText.substring(0, 3000) : '';
+
+            return JSON.stringify(result);
+        })()"#;
+
+        let summary = self.evaluate_raw(summary_js).await.unwrap_or_default();
+        let page_data: serde_json::Value = serde_json::from_str(&summary).unwrap_or(json!({}));

        Ok(json!({
            "url": url,
            "title": title,
-            "html": truncated,
-            "html_length": html.len(),
+            "page": page_data,
        }))
    }

@@ -431,7 +585,15 @@ impl BrowserSession {
    }
 }

-// ── CDP helpers (same pattern as compliance-agent/src/pentest/report/pdf.rs) ──
+/// Clean up the browser session for a pentest session (call when session ends).
+pub async fn cleanup_browser_session(session_id: &str) {
+    let mut pool = BROWSER_SESSIONS.lock().await;
+    if let Some(mut sess) = pool.remove(session_id) {
+        let _ = sess.close().await;
+    }
+}
+
+// ── CDP helpers ──

 async fn cdp_send(
    ws: &mut WsStream,
@@ -0,0 +1,63 @@
+version: "3.8"
+
+services:
+  mailserver:
+    image: ghcr.io/docker-mailserver/docker-mailserver:14
+    hostname: mail.scanner.meghsakha.com
+    domainname: scanner.meghsakha.com
+    container_name: mailserver
+    ports:
+      - "25:25"      # SMTP (inbound mail)
+      - "143:143"    # IMAP (orchestrator reads mail)
+      - "993:993"    # IMAPS (TLS)
+      - "587:587"    # Submission (outbound, if needed)
+    volumes:
+      - maildata:/var/mail
+      - mailstate:/var/mail-state
+      - maillogs:/var/log/mail
+      - /etc/localtime:/etc/localtime:ro
+    environment:
+      # Hostname
+      - OVERRIDE_HOSTNAME=mail.scanner.meghsakha.com
+
+      # Disable features we don't need
+      - ENABLE_SPAMASSASSIN=0
+      - ENABLE_CLAMAV=0
+      - ENABLE_FAIL2BAN=0
+      - ENABLE_POSTGREY=0
+      - ENABLE_AMAVIS=0
+
+      # Enable what we need
+      - ENABLE_IMAP=1
+      - ENABLE_POP3=0
+
+      # Plus-addressing (critical for pentest)
+      - POSTFIX_RECIPIENT_DELIMITER=+
+
+      # SSL (start with no TLS, add Let's Encrypt later)
+      - SSL_TYPE=
+
+      # Accept mail for our domain
+      - PERMIT_DOCKER=none
+
+      # Disable inbound SPF checking — we need to accept verification
+      # emails from Keycloak and other external senders
+      - ENABLE_OPENDKIM=0
+      - ENABLE_OPENDMARC=0
+      - ENABLE_POLICYD_SPF=0
+      - SPOOF_PROTECTION=0
+
+      # One domain
+      - POSTFIX_MYDESTINATION=scanner.meghsakha.com, localhost
+
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "ss", "-tlnp", "|", "grep", "25"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+volumes:
+  maildata:
+  mailstate:
+  maillogs:
@@ -1,6 +1,6 @@
 services:
  mongo:
-    image: mongo:latest
+    image: mongo:7
    ports:
      - "27017:27017"
    environment: