feat: browser session persistence, auto-screenshots, context optimization, user cleanup

Browser tool: - Session-persistent Chrome tab (same tab reused across all calls in a pentest) - Auto-screenshot on every navigate and click (stored in attack chain for report) - Fill uses CDP Input.insertText (fixes WebSocket corruption on special chars) - Switched from browserless/chromium to chromedp/headless-shell (stable WS) Context window optimization: - Strip screenshot_base64 from LLM conversation (kept in DB for report) - Truncate HTML to 2KB, page text to 1.5KB in LLM messages - Cap element/link arrays at 15 items - SAST triage: batch 30 findings per LLM call instead of all at once Report improvements: - Auto-embed screenshots in attack chain timeline (navigate + click nodes) - Cover page shows best app screenshot - Attack chain phases capped at 8 (no more 20x "Final") User cleanup: - TestUserRecord model tracks created test users per session - cleanup.rs: Keycloak (Admin REST API), Auth0 (Management API), Okta (Users API) - Auto-cleanup on session completion when cleanup_test_user is enabled - Env vars: KEYCLOAK_ADMIN_USERNAME, KEYCLOAK_ADMIN_PASSWORD System prompt: - Explicit browser usage instructions (navigate → get_content → click → fill) - SPA auth bypass guidance (check page content, not HTTP status) - Screenshot instructions for evidence collection Other: - Pin mongo:7 in docker-compose (mongo:latest/8 segfaults on kernel 6.19) - Add deploy/docker-compose.mailserver.yml for Postfix + Dovecot Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 19:53:55 +01:00
parent a737c36bc9
commit 37690ce734
18 changed files with 1122 additions and 215 deletions
@@ -139,6 +139,61 @@ pub async fn create_session(
        let event_tx = agent.register_session_stream(&session_id_str);
        let pause_rx = agent.register_pause_control(&session_id_str);

+        // Merge server-default IMAP/email settings where wizard left blanks
+        if let Some(ref mut cfg) = session.config {
+            if cfg.auth.mode == AuthMode::AutoRegister {
+                if cfg.auth.verification_email.is_none() {
+                    cfg.auth.verification_email = agent.config.pentest_verification_email.clone();
+                }
+                if cfg.auth.imap_host.is_none() {
+                    cfg.auth.imap_host = agent.config.pentest_imap_host.clone();
+                }
+                if cfg.auth.imap_port.is_none() {
+                    cfg.auth.imap_port = agent.config.pentest_imap_port;
+                }
+                if cfg.auth.imap_username.is_none() {
+                    cfg.auth.imap_username = agent.config.pentest_imap_username.clone();
+                }
+                if cfg.auth.imap_password.is_none() {
+                    cfg.auth.imap_password = agent.config.pentest_imap_password.as_ref().map(|s| {
+                        use secrecy::ExposeSecret;
+                        s.expose_secret().to_string()
+                    });
+                }
+            }
+        }
+
+        // Pre-populate test user record for auto-register sessions
+        if let Some(ref cfg) = session.config {
+            if cfg.auth.mode == AuthMode::AutoRegister {
+                let verification_email = cfg.auth.verification_email.clone();
+                // Build plus-addressed email for this session
+                let test_email = verification_email.as_deref().map(|email| {
+                    let parts: Vec<&str> = email.splitn(2, '@').collect();
+                    if parts.len() == 2 {
+                        format!("{}+{}@{}", parts[0], session_id_str, parts[1])
+                    } else {
+                        email.to_string()
+                    }
+                });
+
+                // Detect identity provider from keycloak config
+                let provider = if agent.config.keycloak_url.is_some() {
+                    Some(compliance_core::models::pentest::IdentityProvider::Keycloak)
+                } else {
+                    None
+                };
+
+                session.test_user = Some(compliance_core::models::pentest::TestUserRecord {
+                    username: None, // LLM will choose; updated after registration
+                    email: test_email,
+                    provider_user_id: None,
+                    provider,
+                    cleaned_up: false,
+                });
+            }
+        }
+
        // Encrypt credentials before they linger in memory
        let mut session_for_task = session.clone();
        if let Some(ref mut cfg) = session_for_task.config {
@@ -49,5 +49,12 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
            .unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
        keycloak_url: env_var_opt("KEYCLOAK_URL"),
        keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
+        keycloak_admin_username: env_var_opt("KEYCLOAK_ADMIN_USERNAME"),
+        keycloak_admin_password: env_secret_opt("KEYCLOAK_ADMIN_PASSWORD"),
+        pentest_verification_email: env_var_opt("PENTEST_VERIFICATION_EMAIL"),
+        pentest_imap_host: env_var_opt("PENTEST_IMAP_HOST"),
+        pentest_imap_port: env_var_opt("PENTEST_IMAP_PORT").and_then(|p| p.parse().ok()),
+        pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
+        pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
    })
 }
@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
 use crate::llm::LlmClient;
 use crate::pipeline::orchestrator::GraphContext;

-const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
+/// Maximum number of findings to include in a single LLM triage call.
+const TRIAGE_CHUNK_SIZE: usize = 30;
+
+const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.

 Actions:
 - "confirm": The finding is a true positive at the reported severity. Keep as-is.
@@ -19,8 +22,8 @@ Consider:
 - Is the finding actionable by a developer?
 - Would a real attacker be able to exploit this?

-Respond in JSON format:
-{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
+Respond with a JSON array, one entry per finding in the same order they were presented:
+[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;

 pub async fn triage_findings(
    llm: &Arc<LlmClient>,
@@ -29,60 +32,76 @@ pub async fn triage_findings(
 ) -> usize {
    let mut passed = 0;

-    for finding in findings.iter_mut() {
-        let file_classification = classify_file_path(finding.file_path.as_deref());
+    // Process findings in chunks to avoid overflowing the LLM context window.
+    for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
+        let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
+        let chunk = &mut findings[chunk_start..chunk_end];

-        let mut user_prompt = format!(
-            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
-            finding.scanner,
-            finding.rule_id.as_deref().unwrap_or("N/A"),
-            finding.severity,
-            finding.title,
-            finding.description,
-            finding.file_path.as_deref().unwrap_or("N/A"),
-            finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
-            finding.code_snippet.as_deref().unwrap_or("N/A"),
-            file_classification,
-        );
+        // Build a combined prompt for the entire chunk.
+        let mut user_prompt = String::new();
+        let mut file_classifications: Vec<String> = Vec::new();
+
+        for (i, finding) in chunk.iter().enumerate() {
+            let file_classification = classify_file_path(finding.file_path.as_deref());

-        // Enrich with surrounding code context if possible
-        if let Some(context) = read_surrounding_context(finding) {
            user_prompt.push_str(&format!(
-                "\n\n--- Surrounding Code (50 lines) ---\n{context}"
+                "\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
+                i + 1,
+                finding.fingerprint,
+                finding.scanner,
+                finding.rule_id.as_deref().unwrap_or("N/A"),
+                finding.severity,
+                finding.title,
+                finding.description,
+                finding.file_path.as_deref().unwrap_or("N/A"),
+                finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
+                finding.code_snippet.as_deref().unwrap_or("N/A"),
+                file_classification,
            ));
-        }

-        // Enrich with graph context if available
-        if let Some(ctx) = graph_context {
-            if let Some(impact) = ctx
-                .impacts
-                .iter()
-                .find(|i| i.finding_id == finding.fingerprint)
-            {
+            // Enrich with surrounding code context if possible
+            if let Some(context) = read_surrounding_context(finding) {
                user_prompt.push_str(&format!(
-                    "\n\n--- Code Graph Context ---\n\
-                     Blast radius: {} nodes affected\n\
-                     Entry points affected: {}\n\
-                     Direct callers: {}\n\
-                     Communities affected: {}\n\
-                     Call chains: {}",
-                    impact.blast_radius,
-                    if impact.affected_entry_points.is_empty() {
-                        "none".to_string()
-                    } else {
-                        impact.affected_entry_points.join(", ")
-                    },
-                    if impact.direct_callers.is_empty() {
-                        "none".to_string()
-                    } else {
-                        impact.direct_callers.join(", ")
-                    },
-                    impact.affected_communities.len(),
-                    impact.call_chains.len(),
+                    "\n\n--- Surrounding Code (50 lines) ---\n{context}"
                ));
            }
+
+            // Enrich with graph context if available
+            if let Some(ctx) = graph_context {
+                if let Some(impact) = ctx
+                    .impacts
+                    .iter()
+                    .find(|im| im.finding_id == finding.fingerprint)
+                {
+                    user_prompt.push_str(&format!(
+                        "\n\n--- Code Graph Context ---\n\
+                         Blast radius: {} nodes affected\n\
+                         Entry points affected: {}\n\
+                         Direct callers: {}\n\
+                         Communities affected: {}\n\
+                         Call chains: {}",
+                        impact.blast_radius,
+                        if impact.affected_entry_points.is_empty() {
+                            "none".to_string()
+                        } else {
+                            impact.affected_entry_points.join(", ")
+                        },
+                        if impact.direct_callers.is_empty() {
+                            "none".to_string()
+                        } else {
+                            impact.direct_callers.join(", ")
+                        },
+                        impact.affected_communities.len(),
+                        impact.call_chains.len(),
+                    ));
+                }
+            }
+
+            user_prompt.push('\n');
+            file_classifications.push(file_classification);
        }

+        // Send the batch to the LLM.
        match llm
            .chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
            .await
@@ -98,58 +117,77 @@ pub async fn triage_findings(
                } else {
                    cleaned
                };
-                if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
-                    // Apply file-path confidence adjustment
-                    let adjusted_confidence =
-                        adjust_confidence(result.confidence, &file_classification);
-                    finding.confidence = Some(adjusted_confidence);
-                    finding.triage_action = Some(result.action.clone());
-                    finding.triage_rationale = Some(result.rationale);

-                    if let Some(remediation) = result.remediation {
-                        finding.remediation = Some(remediation);
-                    }
-
-                    match result.action.as_str() {
-                        "dismiss" => {
-                            finding.status = FindingStatus::FalsePositive;
-                        }
-                        "downgrade" => {
-                            // Downgrade severity by one level
-                            finding.severity = downgrade_severity(&finding.severity);
-                            finding.status = FindingStatus::Triaged;
-                            passed += 1;
-                        }
-                        "upgrade" => {
-                            finding.severity = upgrade_severity(&finding.severity);
-                            finding.status = FindingStatus::Triaged;
-                            passed += 1;
-                        }
-                        _ => {
-                            // "confirm" or unknown — keep as-is
-                            if adjusted_confidence >= 3.0 {
+                match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
+                    Ok(results) => {
+                        for (idx, finding) in chunk.iter_mut().enumerate() {
+                            // Match result by position; fall back to keeping the finding.
+                            let Some(result) = results.get(idx) else {
                                finding.status = FindingStatus::Triaged;
                                passed += 1;
-                            } else {
-                                finding.status = FindingStatus::FalsePositive;
+                                continue;
+                            };
+
+                            let file_classification = file_classifications
+                                .get(idx)
+                                .map(|s| s.as_str())
+                                .unwrap_or("unknown");
+
+                            let adjusted_confidence =
+                                adjust_confidence(result.confidence, file_classification);
+                            finding.confidence = Some(adjusted_confidence);
+                            finding.triage_action = Some(result.action.clone());
+                            finding.triage_rationale = Some(result.rationale.clone());
+
+                            if let Some(ref remediation) = result.remediation {
+                                finding.remediation = Some(remediation.clone());
+                            }
+
+                            match result.action.as_str() {
+                                "dismiss" => {
+                                    finding.status = FindingStatus::FalsePositive;
+                                }
+                                "downgrade" => {
+                                    finding.severity = downgrade_severity(&finding.severity);
+                                    finding.status = FindingStatus::Triaged;
+                                    passed += 1;
+                                }
+                                "upgrade" => {
+                                    finding.severity = upgrade_severity(&finding.severity);
+                                    finding.status = FindingStatus::Triaged;
+                                    passed += 1;
+                                }
+                                _ => {
+                                    // "confirm" or unknown — keep as-is
+                                    if adjusted_confidence >= 3.0 {
+                                        finding.status = FindingStatus::Triaged;
+                                        passed += 1;
+                                    } else {
+                                        finding.status = FindingStatus::FalsePositive;
+                                    }
+                                }
                            }
                        }
                    }
-                } else {
-                    // Parse failure — keep the finding
-                    finding.status = FindingStatus::Triaged;
-                    passed += 1;
-                    tracing::warn!(
-                        "Failed to parse triage response for {}: {response}",
-                        finding.fingerprint
-                    );
+                    Err(_) => {
+                        // Batch parse failure — keep all findings in the chunk.
+                        tracing::warn!(
+                            "Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
+                        );
+                        for finding in chunk.iter_mut() {
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                    }
                }
            }
            Err(e) => {
-                // On LLM error, keep the finding
-                tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
-                finding.status = FindingStatus::Triaged;
-                passed += 1;
+                // On LLM error, keep all findings in the chunk.
+                tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
+                for finding in chunk.iter_mut() {
+                    finding.status = FindingStatus::Triaged;
+                    passed += 1;
+                }
            }
        }
    }
@@ -266,6 +304,10 @@ fn upgrade_severity(

 #[derive(serde::Deserialize)]
 struct TriageResult {
+    /// Finding fingerprint echoed back by the LLM (optional).
+    #[serde(default)]
+    #[allow(dead_code)]
+    id: String,
    #[serde(default = "default_action")]
    action: String,
    #[serde(default)]
@@ -1,6 +1,6 @@
 mod agent;
 mod api;
-mod config;
+pub(crate) mod config;
 mod database;
 mod error;
 mod llm;
@@ -15,11 +15,20 @@ mod webhooks;

 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    dotenvy::dotenv().ok();
+    match dotenvy::dotenv() {
+        Ok(path) => eprintln!("[dotenv] Loaded from: {}", path.display()),
+        Err(e) => eprintln!("[dotenv] FAILED: {e}"),
+    }

    let _telemetry_guard = compliance_core::telemetry::init_telemetry("compliance-agent");

-    tracing::info!("Loading configuration...");
+    // Log critical env vars at startup
+    tracing::info!(
+        chrome_ws_url = std::env::var("CHROME_WS_URL").ok().as_deref(),
+        pentest_email = std::env::var("PENTEST_VERIFICATION_EMAIL").ok().as_deref(),
+        encryption_key_set = std::env::var("PENTEST_ENCRYPTION_KEY").is_ok(),
+        "Loading configuration..."
+    );
    let config = config::load_config()?;

    // Ensure SSH key pair exists for cloning private repos
@@ -0,0 +1,300 @@
+use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
+use compliance_core::AgentConfig;
+use secrecy::ExposeSecret;
+use tracing::{info, warn};
+
+/// Attempt to delete a test user created during a pentest session.
+///
+/// Routes to the appropriate identity provider based on `TestUserRecord.provider`.
+/// Falls back to browser-based cleanup if no API credentials are available.
+///
+/// Returns `Ok(true)` if the user was deleted, `Ok(false)` if skipped, `Err` on failure.
+pub async fn cleanup_test_user(
+    user: &TestUserRecord,
+    config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    if user.cleaned_up {
+        return Ok(false);
+    }
+
+    let provider = user.provider.as_ref();
+
+    match provider {
+        Some(IdentityProvider::Keycloak) => cleanup_keycloak(user, config, http).await,
+        Some(IdentityProvider::Auth0) => cleanup_auth0(user, config, http).await,
+        Some(IdentityProvider::Okta) => cleanup_okta(user, config, http).await,
+        Some(IdentityProvider::Firebase) => {
+            warn!("Firebase user cleanup not yet implemented");
+            Ok(false)
+        }
+        Some(IdentityProvider::Custom) | None => {
+            // For custom/unknown providers, try Keycloak if configured, else skip
+            if config.keycloak_url.is_some() && config.keycloak_admin_username.is_some() {
+                cleanup_keycloak(user, config, http).await
+            } else {
+                warn!(
+                    username = user.username.as_deref(),
+                    "No identity provider configured for cleanup — skipping"
+                );
+                Ok(false)
+            }
+        }
+    }
+}
+
+/// Delete a user from Keycloak via the Admin REST API.
+///
+/// Flow: get admin token → search user by username → delete by ID.
+async fn cleanup_keycloak(
+    user: &TestUserRecord,
+    config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    let base_url = config
+        .keycloak_url
+        .as_deref()
+        .ok_or("KEYCLOAK_URL not configured")?;
+    let realm = config
+        .keycloak_realm
+        .as_deref()
+        .ok_or("KEYCLOAK_REALM not configured")?;
+    let admin_user = config
+        .keycloak_admin_username
+        .as_deref()
+        .ok_or("KEYCLOAK_ADMIN_USERNAME not configured")?;
+    let admin_pass = config
+        .keycloak_admin_password
+        .as_ref()
+        .ok_or("KEYCLOAK_ADMIN_PASSWORD not configured")?;
+
+    let username = user
+        .username
+        .as_deref()
+        .ok_or("No username in test user record")?;
+
+    info!(username, realm, "Cleaning up Keycloak test user");
+
+    // Step 1: Get admin access token
+    let token_url = format!("{base_url}/realms/master/protocol/openid-connect/token");
+    let token_resp = http
+        .post(&token_url)
+        .form(&[
+            ("grant_type", "password"),
+            ("client_id", "admin-cli"),
+            ("username", admin_user),
+            ("password", admin_pass.expose_secret()),
+        ])
+        .send()
+        .await
+        .map_err(|e| format!("Keycloak token request failed: {e}"))?;
+
+    if !token_resp.status().is_success() {
+        let status = token_resp.status();
+        let body = token_resp.text().await.unwrap_or_default();
+        return Err(format!("Keycloak admin auth failed ({status}): {body}"));
+    }
+
+    let token_body: serde_json::Value = token_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Keycloak token: {e}"))?;
+    let access_token = token_body
+        .get("access_token")
+        .and_then(|v| v.as_str())
+        .ok_or("No access_token in Keycloak response")?;
+
+    // Step 2: Search for user by username
+    let search_url =
+        format!("{base_url}/admin/realms/{realm}/users?username={username}&exact=true");
+    let search_resp = http
+        .get(&search_url)
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Keycloak user search failed: {e}"))?;
+
+    if !search_resp.status().is_success() {
+        let status = search_resp.status();
+        let body = search_resp.text().await.unwrap_or_default();
+        return Err(format!("Keycloak user search failed ({status}): {body}"));
+    }
+
+    let users: Vec<serde_json::Value> = search_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Keycloak users: {e}"))?;
+
+    let user_id = users
+        .first()
+        .and_then(|u| u.get("id"))
+        .and_then(|v| v.as_str())
+        .ok_or_else(|| format!("User '{username}' not found in Keycloak realm '{realm}'"))?;
+
+    // Step 3: Delete the user
+    let delete_url = format!("{base_url}/admin/realms/{realm}/users/{user_id}");
+    let delete_resp = http
+        .delete(&delete_url)
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Keycloak user delete failed: {e}"))?;
+
+    if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
+        info!(username, user_id, "Keycloak test user deleted");
+        Ok(true)
+    } else {
+        let status = delete_resp.status();
+        let body = delete_resp.text().await.unwrap_or_default();
+        Err(format!("Keycloak delete failed ({status}): {body}"))
+    }
+}
+
+/// Delete a user from Auth0 via the Management API.
+///
+/// Requires `AUTH0_DOMAIN`, `AUTH0_CLIENT_ID`, `AUTH0_CLIENT_SECRET` env vars.
+async fn cleanup_auth0(
+    user: &TestUserRecord,
+    _config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    let domain = std::env::var("AUTH0_DOMAIN").map_err(|_| "AUTH0_DOMAIN not set")?;
+    let client_id = std::env::var("AUTH0_CLIENT_ID").map_err(|_| "AUTH0_CLIENT_ID not set")?;
+    let client_secret =
+        std::env::var("AUTH0_CLIENT_SECRET").map_err(|_| "AUTH0_CLIENT_SECRET not set")?;
+
+    let email = user
+        .email
+        .as_deref()
+        .ok_or("No email in test user record for Auth0 lookup")?;
+
+    info!(email, "Cleaning up Auth0 test user");
+
+    // Get management API token
+    let token_resp = http
+        .post(format!("https://{domain}/oauth/token"))
+        .json(&serde_json::json!({
+            "grant_type": "client_credentials",
+            "client_id": client_id,
+            "client_secret": client_secret,
+            "audience": format!("https://{domain}/api/v2/"),
+        }))
+        .send()
+        .await
+        .map_err(|e| format!("Auth0 token request failed: {e}"))?;
+
+    let token_body: serde_json::Value = token_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Auth0 token: {e}"))?;
+    let access_token = token_body
+        .get("access_token")
+        .and_then(|v| v.as_str())
+        .ok_or("No access_token in Auth0 response")?;
+
+    // Search for user by email
+    let encoded_email = urlencoding::encode(email);
+    let search_url = format!("https://{domain}/api/v2/users-by-email?email={encoded_email}");
+    let search_resp = http
+        .get(&search_url)
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Auth0 user search failed: {e}"))?;
+
+    let users: Vec<serde_json::Value> = search_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Auth0 users: {e}"))?;
+
+    let user_id = users
+        .first()
+        .and_then(|u| u.get("user_id"))
+        .and_then(|v| v.as_str())
+        .ok_or_else(|| format!("User with email '{email}' not found in Auth0"))?;
+
+    // Delete
+    let encoded_id = urlencoding::encode(user_id);
+    let delete_resp = http
+        .delete(format!("https://{domain}/api/v2/users/{encoded_id}"))
+        .bearer_auth(access_token)
+        .send()
+        .await
+        .map_err(|e| format!("Auth0 user delete failed: {e}"))?;
+
+    if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
+        info!(email, user_id, "Auth0 test user deleted");
+        Ok(true)
+    } else {
+        let status = delete_resp.status();
+        let body = delete_resp.text().await.unwrap_or_default();
+        Err(format!("Auth0 delete failed ({status}): {body}"))
+    }
+}
+
+/// Delete a user from Okta via the Users API.
+///
+/// Requires `OKTA_DOMAIN`, `OKTA_API_TOKEN` env vars.
+async fn cleanup_okta(
+    user: &TestUserRecord,
+    _config: &AgentConfig,
+    http: &reqwest::Client,
+) -> Result<bool, String> {
+    let domain = std::env::var("OKTA_DOMAIN").map_err(|_| "OKTA_DOMAIN not set")?;
+    let api_token = std::env::var("OKTA_API_TOKEN").map_err(|_| "OKTA_API_TOKEN not set")?;
+
+    let username = user
+        .username
+        .as_deref()
+        .or(user.email.as_deref())
+        .ok_or("No username/email in test user record for Okta lookup")?;
+
+    info!(username, "Cleaning up Okta test user");
+
+    // Search user
+    let encoded = urlencoding::encode(username);
+    let search_url = format!("https://{domain}/api/v1/users?search=profile.login+eq+\"{encoded}\"");
+    let search_resp = http
+        .get(&search_url)
+        .header("Authorization", format!("SSWS {api_token}"))
+        .send()
+        .await
+        .map_err(|e| format!("Okta user search failed: {e}"))?;
+
+    let users: Vec<serde_json::Value> = search_resp
+        .json()
+        .await
+        .map_err(|e| format!("Failed to parse Okta users: {e}"))?;
+
+    let user_id = users
+        .first()
+        .and_then(|u| u.get("id"))
+        .and_then(|v| v.as_str())
+        .ok_or_else(|| format!("User '{username}' not found in Okta"))?;
+
+    // Deactivate first (required by Okta before delete)
+    let _ = http
+        .post(format!(
+            "https://{domain}/api/v1/users/{user_id}/lifecycle/deactivate"
+        ))
+        .header("Authorization", format!("SSWS {api_token}"))
+        .send()
+        .await;
+
+    // Delete
+    let delete_resp = http
+        .delete(format!("https://{domain}/api/v1/users/{user_id}"))
+        .header("Authorization", format!("SSWS {api_token}"))
+        .send()
+        .await
+        .map_err(|e| format!("Okta user delete failed: {e}"))?;
+
+    if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
+        info!(username, user_id, "Okta test user deleted");
+        Ok(true)
+    } else {
+        let status = delete_resp.status();
+        let body = delete_resp.text().await.unwrap_or_default();
+        Err(format!("Okta delete failed ({status}): {body}"))
+    }
+}
@@ -1,3 +1,4 @@
+pub mod cleanup;
 mod context;
 pub mod crypto;
 pub mod orchestrator;
@@ -390,10 +390,13 @@ impl PentestOrchestrator {
                                        )
                                        .await;

+                                    // Build LLM-facing summary: strip large fields
+                                    // (screenshots, raw HTML) to save context window
+                                    let llm_data = summarize_tool_output(&result.data);
                                    serde_json::json!({
                                        "summary": result.summary,
                                        "findings_count": findings_count,
-                                        "data": result.data,
+                                        "data": llm_data,
                                    })
                                    .to_string()
                                }
@@ -465,21 +468,61 @@ impl PentestOrchestrator {
                .await;
        }

-        // If cleanup_test_user is requested, append a cleanup instruction
+        // Clean up test user via identity provider API if requested
        if session
            .config
            .as_ref()
            .is_some_and(|c| c.auth.cleanup_test_user)
        {
-            let cleanup_msg = PentestMessage::user(
-                session_id.clone(),
-                "Testing is complete. Now please clean up: navigate to the application and delete \
-                 the test user account that was created during this session. Confirm once done."
-                    .to_string(),
-            );
-            let _ = self.db.pentest_messages().insert_one(&cleanup_msg).await;
+            if let Some(ref test_user) = session.test_user {
+                let http = reqwest::Client::new();
+                // We need the AgentConfig — read from env since orchestrator doesn't hold it
+                let config = crate::config::load_config();
+                match config {
+                    Ok(cfg) => {
+                        match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
+                            .await
+                        {
+                            Ok(true) => {
+                                tracing::info!(
+                                    username = test_user.username.as_deref(),
+                                    "Test user cleaned up via provider API"
+                                );
+                                // Mark as cleaned up in DB
+                                if let Some(sid) = session.id {
+                                    let _ = self
+                                        .db
+                                        .pentest_sessions()
+                                        .update_one(
+                                            doc! { "_id": sid },
+                                            doc! { "$set": { "test_user.cleaned_up": true } },
+                                        )
+                                        .await;
+                                }
+                            }
+                            Ok(false) => {
+                                tracing::info!(
+                                    "Test user cleanup skipped (no provider configured)"
+                                );
+                            }
+                            Err(e) => {
+                                tracing::warn!(error = %e, "Test user cleanup failed");
+                                let _ = self.event_tx.send(PentestEvent::Error {
+                                    message: format!("Test user cleanup failed: {e}"),
+                                });
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        tracing::warn!(error = %e, "Could not load config for cleanup");
+                    }
+                }
+            }
        }

+        // Clean up the persistent browser session for this pentest
+        compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
+
        let _ = self.event_tx.send(PentestEvent::Complete {
            summary: format!(
                "Pentest complete. {} findings from {} tool invocations.",
@@ -490,3 +533,82 @@ impl PentestOrchestrator {
        Ok(())
    }
 }
+
+/// Strip large fields from tool output before sending to the LLM.
+/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
+/// The full data is still stored in the DB for the report.
+fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
+    let Some(obj) = data.as_object() else {
+        return data.clone();
+    };
+
+    let mut summarized = serde_json::Map::new();
+    for (key, value) in obj {
+        match key.as_str() {
+            // Replace screenshot base64 with a placeholder
+            "screenshot_base64" => {
+                if let Some(s) = value.as_str() {
+                    if !s.is_empty() {
+                        summarized.insert(
+                            key.clone(),
+                            serde_json::Value::String(
+                                "[screenshot captured and saved to report]".to_string(),
+                            ),
+                        );
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Truncate raw HTML content
+            "html" => {
+                if let Some(s) = value.as_str() {
+                    if s.len() > 2000 {
+                        summarized.insert(
+                            key.clone(),
+                            serde_json::Value::String(format!(
+                                "{}... [truncated, {} chars total]",
+                                &s[..2000],
+                                s.len()
+                            )),
+                        );
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Truncate page text
+            "text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
+                let s = value.as_str().unwrap_or_default();
+                summarized.insert(
+                    key.clone(),
+                    serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
+                );
+            }
+            // Trim large arrays (e.g., "elements", "links", "inputs")
+            "elements" | "links" | "inputs" => {
+                if let Some(arr) = value.as_array() {
+                    if arr.len() > 15 {
+                        let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
+                        trimmed.push(serde_json::json!(format!(
+                            "... and {} more",
+                            arr.len() - 15
+                        )));
+                        summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
+                        continue;
+                    }
+                }
+                summarized.insert(key.clone(), value.clone());
+            }
+            // Recursively summarize nested objects (e.g., "page" in get_content)
+            _ if value.is_object() => {
+                summarized.insert(key.clone(), summarize_tool_output(value));
+            }
+            // Keep everything else as-is
+            _ => {
+                summarized.insert(key.clone(), value.clone());
+            }
+        }
+    }
+    serde_json::Value::Object(summarized)
+}
@@ -285,15 +285,34 @@ impl PentestOrchestrator {
 1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
 2. Run the OpenAPI parser to discover API endpoints from specs.
 3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
-4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
-5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
-6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
-7. Test rate limiting on critical endpoints (login, API).
-8. Check for console.log leakage in frontend JavaScript.
-9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
-10. When testing is complete, provide a structured summary with severity and remediation.
-11. Always explain your reasoning before invoking each tool.
-12. When done, say "Testing complete" followed by a final summary.
+4. If the target requires authentication (auto-register mode), use the browser tool to:
+   a. Navigate to the target — it will redirect to the login page.
+   b. Click the "Register" link to reach the registration form.
+   c. Fill all form fields (username, email with plus-addressing, password, name) one by one.
+   d. Click submit. If a Terms & Conditions page appears, accept it.
+   e. After registration, use the browser to navigate through the application pages.
+   f. **Take a screenshot after each major page** for evidence in the report.
+5. Use the browser tool to explore the authenticated application — navigate to each section,
+   use get_content to understand the page structure, and take screenshots.
+6. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
+7. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
+8. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
+9. Test rate limiting on critical endpoints (login, API).
+10. Check for console.log leakage in frontend JavaScript.
+11. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
+12. When testing is complete, provide a structured summary with severity and remediation.
+13. Always explain your reasoning before invoking each tool.
+14. When done, say "Testing complete" followed by a final summary.
+
+## Browser Tool Usage
+- The browser tab **persists** between calls — cookies and login state are preserved.
+- After navigate, the response includes `elements` (links, inputs, buttons on the page).
+- Use `get_content` to see forms, links, buttons, headings, and page text.
+- Use `click` with CSS selectors to interact (e.g., `a:text('Register')`, `input[type='submit']`).
+- Use `fill` with selector + value to fill form fields (e.g., `input[name='email']`).
+- **Take screenshots** (`action: screenshot`) after important actions for evidence.
+- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
+  page content with the browser tool to verify if it shows real data or a login redirect.

 ## Important
 - This is an authorized penetration test. All testing is permitted within the target scope.
@@ -149,6 +149,23 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
                )
            };

+            // Render inline screenshot if this is a browser screenshot action
+            let screenshot_html = if node.tool_name == "browser" {
+                node.tool_output
+                    .as_ref()
+                    .and_then(|out| out.get("screenshot_base64"))
+                    .and_then(|v| v.as_str())
+                    .filter(|s| !s.is_empty())
+                    .map(|b64| {
+                        format!(
+                            r#"<div class="step-screenshot"><img src="data:image/png;base64,{b64}" alt="Browser screenshot" style="max-width:100%;border:1px solid #e2e8f0;border-radius:6px;margin-top:8px;"/></div>"#
+                        )
+                    })
+                    .unwrap_or_default()
+            } else {
+                String::new()
+            };
+
            chain_html.push_str(&format!(
                r#"<div class="step-row">
                        <div class="step-num">{num}</div>
@@ -161,6 +178,7 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
                                {risk_badge}
                            </div>
                            {reasoning_html}
+                            {screenshot_html}
                        </div>
                    </div>"#,
                num = i + 1,
@@ -7,7 +7,18 @@ pub(super) fn cover(
    target_url: &str,
    requester_name: &str,
    requester_email: &str,
+    app_screenshot_b64: Option<&str>,
 ) -> String {
+    let screenshot_html = app_screenshot_b64
+        .filter(|s| !s.is_empty())
+        .map(|b64| {
+            format!(
+                r#"<div style="margin: 20px auto; max-width: 560px; border: 1px solid #cbd5e1; border-radius: 8px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.08);">
+    <img src="data:image/png;base64,{b64}" alt="Application screenshot" style="width:100%;display:block;"/>
+</div>"#
+            )
+        })
+        .unwrap_or_default();
    format!(
        r##"<!-- ═══════════════ COVER PAGE ═══════════════ -->
 <div class="cover">
@@ -42,6 +53,8 @@ pub(super) fn cover(
        <strong>Prepared for:</strong> {requester_name} ({requester_email})
    </div>

+    {screenshot_html}
+
    <div class="cover-footer">
        Compliance Scanner &mdash; AI-Powered Security Assessment Platform
    </div>
@@ -37,6 +37,50 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
        names
    };

+    // Find the best app screenshot for the cover page:
+    // prefer the first navigate to the target URL that has a screenshot,
+    // falling back to any navigate with a screenshot
+    let app_screenshot: Option<String> = ctx
+        .attack_chain
+        .iter()
+        .filter(|n| n.tool_name == "browser")
+        .filter_map(|n| {
+            n.tool_output
+                .as_ref()?
+                .get("screenshot_base64")?
+                .as_str()
+                .filter(|s| !s.is_empty())
+                .map(|s| s.to_string())
+        })
+        // Skip the Keycloak login page screenshots — prefer one that shows the actual app
+        .find(|_| {
+            ctx.attack_chain
+                .iter()
+                .filter(|n| n.tool_name == "browser")
+                .any(|n| {
+                    n.tool_output
+                        .as_ref()
+                        .and_then(|o| o.get("title"))
+                        .and_then(|t| t.as_str())
+                        .is_some_and(|t| t.contains("Compliance") || t.contains("Dashboard"))
+                })
+        })
+        .or_else(|| {
+            // Fallback: any screenshot
+            ctx.attack_chain
+                .iter()
+                .filter(|n| n.tool_name == "browser")
+                .filter_map(|n| {
+                    n.tool_output
+                        .as_ref()?
+                        .get("screenshot_base64")?
+                        .as_str()
+                        .filter(|s| !s.is_empty())
+                        .map(|s| s.to_string())
+                })
+                .next()
+        });
+
    let styles_html = styles::styles();
    let cover_html = cover::cover(
        &ctx.target_name,
@@ -45,6 +89,7 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
        &ctx.target_url,
        &ctx.requester_name,
        &ctx.requester_email,
+        app_screenshot.as_deref(),
    );
    let exec_html = executive_summary::executive_summary(
        &ctx.findings,