feat: browser session persistence, auto-screenshots, context optimization, user cleanup
Some checks failed
CI / Check (pull_request) Failing after 5m55s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped

Browser tool:
- Session-persistent Chrome tab (same tab reused across all calls in a pentest)
- Auto-screenshot on every navigate and click (stored in attack chain for report)
- Fill uses CDP Input.insertText (fixes WebSocket corruption on special chars)
- Switched from browserless/chromium to chromedp/headless-shell (stable WS)

Context window optimization:
- Strip screenshot_base64 from LLM conversation (kept in DB for report)
- Truncate HTML to 2KB, page text to 1.5KB in LLM messages
- Cap element/link arrays at 15 items
- SAST triage: batch 30 findings per LLM call instead of all at once

Report improvements:
- Auto-embed screenshots in attack chain timeline (navigate + click nodes)
- Cover page shows best app screenshot
- Attack chain phases capped at 8 (no more 20x "Final")

User cleanup:
- TestUserRecord model tracks created test users per session
- cleanup.rs: Keycloak (Admin REST API), Auth0 (Management API), Okta (Users API)
- Auto-cleanup on session completion when cleanup_test_user is enabled
- Env vars: KEYCLOAK_ADMIN_USERNAME, KEYCLOAK_ADMIN_PASSWORD

System prompt:
- Explicit browser usage instructions (navigate → get_content → click → fill)
- SPA auth bypass guidance (check page content, not HTTP status)
- Screenshot instructions for evidence collection

Other:
- Pin mongo:7 in docker-compose (mongo:latest/8 segfaults on kernel 6.19)
- Add deploy/docker-compose.mailserver.yml for Postfix + Dovecot

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-17 19:53:55 +01:00
parent a737c36bc9
commit 37690ce734
18 changed files with 1122 additions and 215 deletions

View File

@@ -139,6 +139,61 @@ pub async fn create_session(
let event_tx = agent.register_session_stream(&session_id_str);
let pause_rx = agent.register_pause_control(&session_id_str);
// Merge server-default IMAP/email settings where wizard left blanks
if let Some(ref mut cfg) = session.config {
if cfg.auth.mode == AuthMode::AutoRegister {
if cfg.auth.verification_email.is_none() {
cfg.auth.verification_email = agent.config.pentest_verification_email.clone();
}
if cfg.auth.imap_host.is_none() {
cfg.auth.imap_host = agent.config.pentest_imap_host.clone();
}
if cfg.auth.imap_port.is_none() {
cfg.auth.imap_port = agent.config.pentest_imap_port;
}
if cfg.auth.imap_username.is_none() {
cfg.auth.imap_username = agent.config.pentest_imap_username.clone();
}
if cfg.auth.imap_password.is_none() {
cfg.auth.imap_password = agent.config.pentest_imap_password.as_ref().map(|s| {
use secrecy::ExposeSecret;
s.expose_secret().to_string()
});
}
}
}
// Pre-populate test user record for auto-register sessions
if let Some(ref cfg) = session.config {
if cfg.auth.mode == AuthMode::AutoRegister {
let verification_email = cfg.auth.verification_email.clone();
// Build plus-addressed email for this session
let test_email = verification_email.as_deref().map(|email| {
let parts: Vec<&str> = email.splitn(2, '@').collect();
if parts.len() == 2 {
format!("{}+{}@{}", parts[0], session_id_str, parts[1])
} else {
email.to_string()
}
});
// Detect identity provider from keycloak config
let provider = if agent.config.keycloak_url.is_some() {
Some(compliance_core::models::pentest::IdentityProvider::Keycloak)
} else {
None
};
session.test_user = Some(compliance_core::models::pentest::TestUserRecord {
username: None, // LLM will choose; updated after registration
email: test_email,
provider_user_id: None,
provider,
cleaned_up: false,
});
}
}
// Encrypt credentials before they linger in memory
let mut session_for_task = session.clone();
if let Some(ref mut cfg) = session_for_task.config {

View File

@@ -49,5 +49,12 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
.unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
keycloak_url: env_var_opt("KEYCLOAK_URL"),
keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
keycloak_admin_username: env_var_opt("KEYCLOAK_ADMIN_USERNAME"),
keycloak_admin_password: env_secret_opt("KEYCLOAK_ADMIN_PASSWORD"),
pentest_verification_email: env_var_opt("PENTEST_VERIFICATION_EMAIL"),
pentest_imap_host: env_var_opt("PENTEST_IMAP_HOST"),
pentest_imap_port: env_var_opt("PENTEST_IMAP_PORT").and_then(|p| p.parse().ok()),
pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
})
}

View File

@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::GraphContext;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
/// Maximum number of findings to include in a single LLM triage call.
const TRIAGE_CHUNK_SIZE: usize = 30;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.
Actions:
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
@@ -19,8 +22,8 @@ Consider:
- Is the finding actionable by a developer?
- Would a real attacker be able to exploit this?
Respond in JSON format:
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
Respond with a JSON array, one entry per finding in the same order they were presented:
[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;
pub async fn triage_findings(
llm: &Arc<LlmClient>,
@@ -29,60 +32,76 @@ pub async fn triage_findings(
) -> usize {
let mut passed = 0;
for finding in findings.iter_mut() {
let file_classification = classify_file_path(finding.file_path.as_deref());
// Process findings in chunks to avoid overflowing the LLM context window.
for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
let chunk = &mut findings[chunk_start..chunk_end];
let mut user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
);
// Build a combined prompt for the entire chunk.
let mut user_prompt = String::new();
let mut file_classifications: Vec<String> = Vec::new();
for (i, finding) in chunk.iter().enumerate() {
let file_classification = classify_file_path(finding.file_path.as_deref());
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
"\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
i + 1,
finding.fingerprint,
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|i| i.finding_id == finding.fingerprint)
{
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|im| im.finding_id == finding.fingerprint)
{
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
));
}
}
user_prompt.push('\n');
file_classifications.push(file_classification);
}
// Send the batch to the LLM.
match llm
.chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
.await
@@ -98,58 +117,77 @@ pub async fn triage_findings(
} else {
cleaned
};
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
// Apply file-path confidence adjustment
let adjusted_confidence =
adjust_confidence(result.confidence, &file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
// Downgrade severity by one level
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
Ok(results) => {
for (idx, finding) in chunk.iter_mut().enumerate() {
// Match result by position; fall back to keeping the finding.
let Some(result) = results.get(idx) else {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
continue;
};
let file_classification = file_classifications
.get(idx)
.map(|s| s.as_str())
.unwrap_or("unknown");
let adjusted_confidence =
adjust_confidence(result.confidence, file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale.clone());
if let Some(ref remediation) = result.remediation {
finding.remediation = Some(remediation.clone());
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
}
}
}
}
} else {
// Parse failure — keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!(
"Failed to parse triage response for {}: {response}",
finding.fingerprint
);
Err(_) => {
// Batch parse failure — keep all findings in the chunk.
tracing::warn!(
"Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
);
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
}
Err(e) => {
// On LLM error, keep the finding
tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
finding.status = FindingStatus::Triaged;
passed += 1;
// On LLM error, keep all findings in the chunk.
tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
}
@@ -266,6 +304,10 @@ fn upgrade_severity(
#[derive(serde::Deserialize)]
struct TriageResult {
/// Finding fingerprint echoed back by the LLM (optional).
#[serde(default)]
#[allow(dead_code)]
id: String,
#[serde(default = "default_action")]
action: String,
#[serde(default)]

View File

@@ -1,6 +1,6 @@
mod agent;
mod api;
mod config;
pub(crate) mod config;
mod database;
mod error;
mod llm;
@@ -15,11 +15,20 @@ mod webhooks;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
dotenvy::dotenv().ok();
match dotenvy::dotenv() {
Ok(path) => eprintln!("[dotenv] Loaded from: {}", path.display()),
Err(e) => eprintln!("[dotenv] FAILED: {e}"),
}
let _telemetry_guard = compliance_core::telemetry::init_telemetry("compliance-agent");
tracing::info!("Loading configuration...");
// Log critical env vars at startup
tracing::info!(
chrome_ws_url = std::env::var("CHROME_WS_URL").ok().as_deref(),
pentest_email = std::env::var("PENTEST_VERIFICATION_EMAIL").ok().as_deref(),
encryption_key_set = std::env::var("PENTEST_ENCRYPTION_KEY").is_ok(),
"Loading configuration..."
);
let config = config::load_config()?;
// Ensure SSH key pair exists for cloning private repos

View File

@@ -0,0 +1,300 @@
use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
use compliance_core::AgentConfig;
use secrecy::ExposeSecret;
use tracing::{info, warn};
/// Attempt to delete a test user created during a pentest session.
///
/// Routes to the appropriate identity provider based on `TestUserRecord.provider`.
/// Falls back to browser-based cleanup if no API credentials are available.
///
/// Returns `Ok(true)` if the user was deleted, `Ok(false)` if skipped, `Err` on failure.
pub async fn cleanup_test_user(
user: &TestUserRecord,
config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
if user.cleaned_up {
return Ok(false);
}
let provider = user.provider.as_ref();
match provider {
Some(IdentityProvider::Keycloak) => cleanup_keycloak(user, config, http).await,
Some(IdentityProvider::Auth0) => cleanup_auth0(user, config, http).await,
Some(IdentityProvider::Okta) => cleanup_okta(user, config, http).await,
Some(IdentityProvider::Firebase) => {
warn!("Firebase user cleanup not yet implemented");
Ok(false)
}
Some(IdentityProvider::Custom) | None => {
// For custom/unknown providers, try Keycloak if configured, else skip
if config.keycloak_url.is_some() && config.keycloak_admin_username.is_some() {
cleanup_keycloak(user, config, http).await
} else {
warn!(
username = user.username.as_deref(),
"No identity provider configured for cleanup — skipping"
);
Ok(false)
}
}
}
}
/// Delete a user from Keycloak via the Admin REST API.
///
/// Flow: get admin token → search user by username → delete by ID.
async fn cleanup_keycloak(
user: &TestUserRecord,
config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let base_url = config
.keycloak_url
.as_deref()
.ok_or("KEYCLOAK_URL not configured")?;
let realm = config
.keycloak_realm
.as_deref()
.ok_or("KEYCLOAK_REALM not configured")?;
let admin_user = config
.keycloak_admin_username
.as_deref()
.ok_or("KEYCLOAK_ADMIN_USERNAME not configured")?;
let admin_pass = config
.keycloak_admin_password
.as_ref()
.ok_or("KEYCLOAK_ADMIN_PASSWORD not configured")?;
let username = user
.username
.as_deref()
.ok_or("No username in test user record")?;
info!(username, realm, "Cleaning up Keycloak test user");
// Step 1: Get admin access token
let token_url = format!("{base_url}/realms/master/protocol/openid-connect/token");
let token_resp = http
.post(&token_url)
.form(&[
("grant_type", "password"),
("client_id", "admin-cli"),
("username", admin_user),
("password", admin_pass.expose_secret()),
])
.send()
.await
.map_err(|e| format!("Keycloak token request failed: {e}"))?;
if !token_resp.status().is_success() {
let status = token_resp.status();
let body = token_resp.text().await.unwrap_or_default();
return Err(format!("Keycloak admin auth failed ({status}): {body}"));
}
let token_body: serde_json::Value = token_resp
.json()
.await
.map_err(|e| format!("Failed to parse Keycloak token: {e}"))?;
let access_token = token_body
.get("access_token")
.and_then(|v| v.as_str())
.ok_or("No access_token in Keycloak response")?;
// Step 2: Search for user by username
let search_url =
format!("{base_url}/admin/realms/{realm}/users?username={username}&exact=true");
let search_resp = http
.get(&search_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Keycloak user search failed: {e}"))?;
if !search_resp.status().is_success() {
let status = search_resp.status();
let body = search_resp.text().await.unwrap_or_default();
return Err(format!("Keycloak user search failed ({status}): {body}"));
}
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Keycloak users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User '{username}' not found in Keycloak realm '{realm}'"))?;
// Step 3: Delete the user
let delete_url = format!("{base_url}/admin/realms/{realm}/users/{user_id}");
let delete_resp = http
.delete(&delete_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Keycloak user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(username, user_id, "Keycloak test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Keycloak delete failed ({status}): {body}"))
}
}
/// Delete a user from Auth0 via the Management API.
///
/// Requires `AUTH0_DOMAIN`, `AUTH0_CLIENT_ID`, `AUTH0_CLIENT_SECRET` env vars.
async fn cleanup_auth0(
user: &TestUserRecord,
_config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let domain = std::env::var("AUTH0_DOMAIN").map_err(|_| "AUTH0_DOMAIN not set")?;
let client_id = std::env::var("AUTH0_CLIENT_ID").map_err(|_| "AUTH0_CLIENT_ID not set")?;
let client_secret =
std::env::var("AUTH0_CLIENT_SECRET").map_err(|_| "AUTH0_CLIENT_SECRET not set")?;
let email = user
.email
.as_deref()
.ok_or("No email in test user record for Auth0 lookup")?;
info!(email, "Cleaning up Auth0 test user");
// Get management API token
let token_resp = http
.post(format!("https://{domain}/oauth/token"))
.json(&serde_json::json!({
"grant_type": "client_credentials",
"client_id": client_id,
"client_secret": client_secret,
"audience": format!("https://{domain}/api/v2/"),
}))
.send()
.await
.map_err(|e| format!("Auth0 token request failed: {e}"))?;
let token_body: serde_json::Value = token_resp
.json()
.await
.map_err(|e| format!("Failed to parse Auth0 token: {e}"))?;
let access_token = token_body
.get("access_token")
.and_then(|v| v.as_str())
.ok_or("No access_token in Auth0 response")?;
// Search for user by email
let encoded_email = urlencoding::encode(email);
let search_url = format!("https://{domain}/api/v2/users-by-email?email={encoded_email}");
let search_resp = http
.get(&search_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Auth0 user search failed: {e}"))?;
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Auth0 users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("user_id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User with email '{email}' not found in Auth0"))?;
// Delete
let encoded_id = urlencoding::encode(user_id);
let delete_resp = http
.delete(format!("https://{domain}/api/v2/users/{encoded_id}"))
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Auth0 user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(email, user_id, "Auth0 test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Auth0 delete failed ({status}): {body}"))
}
}
/// Delete a user from Okta via the Users API.
///
/// Requires `OKTA_DOMAIN`, `OKTA_API_TOKEN` env vars.
async fn cleanup_okta(
user: &TestUserRecord,
_config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let domain = std::env::var("OKTA_DOMAIN").map_err(|_| "OKTA_DOMAIN not set")?;
let api_token = std::env::var("OKTA_API_TOKEN").map_err(|_| "OKTA_API_TOKEN not set")?;
let username = user
.username
.as_deref()
.or(user.email.as_deref())
.ok_or("No username/email in test user record for Okta lookup")?;
info!(username, "Cleaning up Okta test user");
// Search user
let encoded = urlencoding::encode(username);
let search_url = format!("https://{domain}/api/v1/users?search=profile.login+eq+\"{encoded}\"");
let search_resp = http
.get(&search_url)
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await
.map_err(|e| format!("Okta user search failed: {e}"))?;
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Okta users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User '{username}' not found in Okta"))?;
// Deactivate first (required by Okta before delete)
let _ = http
.post(format!(
"https://{domain}/api/v1/users/{user_id}/lifecycle/deactivate"
))
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await;
// Delete
let delete_resp = http
.delete(format!("https://{domain}/api/v1/users/{user_id}"))
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await
.map_err(|e| format!("Okta user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(username, user_id, "Okta test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Okta delete failed ({status}): {body}"))
}
}

View File

@@ -1,3 +1,4 @@
pub mod cleanup;
mod context;
pub mod crypto;
pub mod orchestrator;

View File

@@ -390,10 +390,13 @@ impl PentestOrchestrator {
)
.await;
// Build LLM-facing summary: strip large fields
// (screenshots, raw HTML) to save context window
let llm_data = summarize_tool_output(&result.data);
serde_json::json!({
"summary": result.summary,
"findings_count": findings_count,
"data": result.data,
"data": llm_data,
})
.to_string()
}
@@ -465,21 +468,61 @@ impl PentestOrchestrator {
.await;
}
// If cleanup_test_user is requested, append a cleanup instruction
// Clean up test user via identity provider API if requested
if session
.config
.as_ref()
.is_some_and(|c| c.auth.cleanup_test_user)
{
let cleanup_msg = PentestMessage::user(
session_id.clone(),
"Testing is complete. Now please clean up: navigate to the application and delete \
the test user account that was created during this session. Confirm once done."
.to_string(),
);
let _ = self.db.pentest_messages().insert_one(&cleanup_msg).await;
if let Some(ref test_user) = session.test_user {
let http = reqwest::Client::new();
// We need the AgentConfig — read from env since orchestrator doesn't hold it
let config = crate::config::load_config();
match config {
Ok(cfg) => {
match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
.await
{
Ok(true) => {
tracing::info!(
username = test_user.username.as_deref(),
"Test user cleaned up via provider API"
);
// Mark as cleaned up in DB
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": { "test_user.cleaned_up": true } },
)
.await;
}
}
Ok(false) => {
tracing::info!(
"Test user cleanup skipped (no provider configured)"
);
}
Err(e) => {
tracing::warn!(error = %e, "Test user cleanup failed");
let _ = self.event_tx.send(PentestEvent::Error {
message: format!("Test user cleanup failed: {e}"),
});
}
}
}
Err(e) => {
tracing::warn!(error = %e, "Could not load config for cleanup");
}
}
}
}
// Clean up the persistent browser session for this pentest
compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
let _ = self.event_tx.send(PentestEvent::Complete {
summary: format!(
"Pentest complete. {} findings from {} tool invocations.",
@@ -490,3 +533,82 @@ impl PentestOrchestrator {
Ok(())
}
}
/// Strip large fields from tool output before sending to the LLM.
/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
/// The full data is still stored in the DB for the report.
fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
let Some(obj) = data.as_object() else {
return data.clone();
};
let mut summarized = serde_json::Map::new();
for (key, value) in obj {
match key.as_str() {
// Replace screenshot base64 with a placeholder
"screenshot_base64" => {
if let Some(s) = value.as_str() {
if !s.is_empty() {
summarized.insert(
key.clone(),
serde_json::Value::String(
"[screenshot captured and saved to report]".to_string(),
),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate raw HTML content
"html" => {
if let Some(s) = value.as_str() {
if s.len() > 2000 {
summarized.insert(
key.clone(),
serde_json::Value::String(format!(
"{}... [truncated, {} chars total]",
&s[..2000],
s.len()
)),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate page text
"text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
let s = value.as_str().unwrap_or_default();
summarized.insert(
key.clone(),
serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
);
}
// Trim large arrays (e.g., "elements", "links", "inputs")
"elements" | "links" | "inputs" => {
if let Some(arr) = value.as_array() {
if arr.len() > 15 {
let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
trimmed.push(serde_json::json!(format!(
"... and {} more",
arr.len() - 15
)));
summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Recursively summarize nested objects (e.g., "page" in get_content)
_ if value.is_object() => {
summarized.insert(key.clone(), summarize_tool_output(value));
}
// Keep everything else as-is
_ => {
summarized.insert(key.clone(), value.clone());
}
}
}
serde_json::Value::Object(summarized)
}

View File

@@ -285,15 +285,34 @@ impl PentestOrchestrator {
1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
2. Run the OpenAPI parser to discover API endpoints from specs.
3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
7. Test rate limiting on critical endpoints (login, API).
8. Check for console.log leakage in frontend JavaScript.
9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
10. When testing is complete, provide a structured summary with severity and remediation.
11. Always explain your reasoning before invoking each tool.
12. When done, say "Testing complete" followed by a final summary.
4. If the target requires authentication (auto-register mode), use the browser tool to:
a. Navigate to the target — it will redirect to the login page.
b. Click the "Register" link to reach the registration form.
c. Fill all form fields (username, email with plus-addressing, password, name) one by one.
d. Click submit. If a Terms & Conditions page appears, accept it.
e. After registration, use the browser to navigate through the application pages.
f. **Take a screenshot after each major page** for evidence in the report.
5. Use the browser tool to explore the authenticated application — navigate to each section,
use get_content to understand the page structure, and take screenshots.
6. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
7. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
8. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
9. Test rate limiting on critical endpoints (login, API).
10. Check for console.log leakage in frontend JavaScript.
11. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
12. When testing is complete, provide a structured summary with severity and remediation.
13. Always explain your reasoning before invoking each tool.
14. When done, say "Testing complete" followed by a final summary.
## Browser Tool Usage
- The browser tab **persists** between calls — cookies and login state are preserved.
- After navigate, the response includes `elements` (links, inputs, buttons on the page).
- Use `get_content` to see forms, links, buttons, headings, and page text.
- Use `click` with CSS selectors to interact (e.g., `a:text('Register')`, `input[type='submit']`).
- Use `fill` with selector + value to fill form fields (e.g., `input[name='email']`).
- **Take screenshots** (`action: screenshot`) after important actions for evidence.
- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
page content with the browser tool to verify if it shows real data or a login redirect.
## Important
- This is an authorized penetration test. All testing is permitted within the target scope.

View File

@@ -149,6 +149,23 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
)
};
// Render inline screenshot if this is a browser screenshot action
let screenshot_html = if node.tool_name == "browser" {
node.tool_output
.as_ref()
.and_then(|out| out.get("screenshot_base64"))
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.map(|b64| {
format!(
r#"<div class="step-screenshot"><img src="data:image/png;base64,{b64}" alt="Browser screenshot" style="max-width:100%;border:1px solid #e2e8f0;border-radius:6px;margin-top:8px;"/></div>"#
)
})
.unwrap_or_default()
} else {
String::new()
};
chain_html.push_str(&format!(
r#"<div class="step-row">
<div class="step-num">{num}</div>
@@ -161,6 +178,7 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
{risk_badge}
</div>
{reasoning_html}
{screenshot_html}
</div>
</div>"#,
num = i + 1,

View File

@@ -7,7 +7,18 @@ pub(super) fn cover(
target_url: &str,
requester_name: &str,
requester_email: &str,
app_screenshot_b64: Option<&str>,
) -> String {
let screenshot_html = app_screenshot_b64
.filter(|s| !s.is_empty())
.map(|b64| {
format!(
r#"<div style="margin: 20px auto; max-width: 560px; border: 1px solid #cbd5e1; border-radius: 8px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.08);">
<img src="data:image/png;base64,{b64}" alt="Application screenshot" style="width:100%;display:block;"/>
</div>"#
)
})
.unwrap_or_default();
format!(
r##"<!-- ═══════════════ COVER PAGE ═══════════════ -->
<div class="cover">
@@ -42,6 +53,8 @@ pub(super) fn cover(
<strong>Prepared for:</strong> {requester_name} ({requester_email})
</div>
{screenshot_html}
<div class="cover-footer">
Compliance Scanner &mdash; AI-Powered Security Assessment Platform
</div>

View File

@@ -37,6 +37,50 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
names
};
// Find the best app screenshot for the cover page:
// prefer the first navigate to the target URL that has a screenshot,
// falling back to any navigate with a screenshot
let app_screenshot: Option<String> = ctx
.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.filter_map(|n| {
n.tool_output
.as_ref()?
.get("screenshot_base64")?
.as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
})
// Skip the Keycloak login page screenshots — prefer one that shows the actual app
.find(|_| {
ctx.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.any(|n| {
n.tool_output
.as_ref()
.and_then(|o| o.get("title"))
.and_then(|t| t.as_str())
.is_some_and(|t| t.contains("Compliance") || t.contains("Dashboard"))
})
})
.or_else(|| {
// Fallback: any screenshot
ctx.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.filter_map(|n| {
n.tool_output
.as_ref()?
.get("screenshot_base64")?
.as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
})
.next()
});
let styles_html = styles::styles();
let cover_html = cover::cover(
&ctx.target_name,
@@ -45,6 +89,7 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
&ctx.target_url,
&ctx.requester_name,
&ctx.requester_email,
app_screenshot.as_deref(),
);
let exec_html = executive_summary::executive_summary(
&ctx.findings,

View File

@@ -27,6 +27,14 @@ pub struct AgentConfig {
pub ssh_key_path: String,
pub keycloak_url: Option<String>,
pub keycloak_realm: Option<String>,
pub keycloak_admin_username: Option<String>,
pub keycloak_admin_password: Option<SecretString>,
// Pentest defaults
pub pentest_verification_email: Option<String>,
pub pentest_imap_host: Option<String>,
pub pentest_imap_port: Option<u16>,
pub pentest_imap_username: Option<String>,
pub pentest_imap_password: Option<SecretString>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]

View File

@@ -28,9 +28,10 @@ pub use graph::{
pub use issue::{IssueStatus, TrackerIssue, TrackerType};
pub use mcp::{McpServerConfig, McpServerStatus, McpTransport};
pub use pentest::{
AttackChainNode, AttackNodeStatus, AuthMode, CodeContextHint, Environment, PentestAuthConfig,
PentestConfig, PentestEvent, PentestMessage, PentestSession, PentestStats, PentestStatus,
PentestStrategy, SeverityDistribution, TesterInfo, ToolCallRecord,
AttackChainNode, AttackNodeStatus, AuthMode, CodeContextHint, Environment, IdentityProvider,
PentestAuthConfig, PentestConfig, PentestEvent, PentestMessage, PentestSession, PentestStats,
PentestStatus, PentestStrategy, SeverityDistribution, TestUserRecord, TesterInfo,
ToolCallRecord,
};
pub use repository::{ScanTrigger, TrackedRepository};
pub use sbom::{SbomEntry, VulnRef};

View File

@@ -150,6 +150,34 @@ pub struct PentestConfig {
pub skip_mode: bool,
}
/// Identity provider type for cleanup routing
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum IdentityProvider {
Keycloak,
Auth0,
Okta,
Firebase,
Custom,
}
/// Details of a test user created during a pentest session.
/// Stored so the cleanup step knows exactly what to delete and where.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TestUserRecord {
/// Username used to register
pub username: Option<String>,
/// Email used to register
pub email: Option<String>,
/// User ID returned by the identity provider (if known)
pub provider_user_id: Option<String>,
/// Which identity provider holds this user
pub provider: Option<IdentityProvider>,
/// Whether cleanup has been completed
#[serde(default)]
pub cleaned_up: bool,
}
/// A pentest session initiated via the chat interface
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PentestSession {
@@ -163,6 +191,9 @@ pub struct PentestSession {
/// Wizard configuration (None for legacy sessions)
pub config: Option<PentestConfig>,
pub created_by: Option<String>,
/// Test user created during auto-register (for cleanup)
#[serde(default)]
pub test_user: Option<TestUserRecord>,
/// Total number of tool invocations in this session
pub tool_invocations: u32,
/// Total successful tool invocations
@@ -187,6 +218,7 @@ impl PentestSession {
strategy,
config: None,
created_by: None,
test_user: None,
tool_invocations: 0,
tool_successes: 0,
findings_count: 0,

View File

@@ -118,9 +118,12 @@ pub(crate) fn cat_label(cat: &str) -> &'static str {
}
}
/// Phase name heuristic based on depth
pub(crate) fn phase_name(depth: usize) -> &'static str {
match depth {
/// Maximum number of display phases — deeper iterations are merged into the last.
const MAX_PHASES: usize = 8;
/// Phase name heuristic based on phase index (not raw BFS depth)
pub(crate) fn phase_name(phase_idx: usize) -> &'static str {
match phase_idx {
0 => "Reconnaissance",
1 => "Analysis",
2 => "Boundary Testing",
@@ -133,8 +136,8 @@ pub(crate) fn phase_name(depth: usize) -> &'static str {
}
/// Short label for phase rail
pub(crate) fn phase_short_name(depth: usize) -> &'static str {
match depth {
pub(crate) fn phase_short_name(phase_idx: usize) -> &'static str {
match phase_idx {
0 => "Recon",
1 => "Analysis",
2 => "Boundary",
@@ -214,7 +217,14 @@ pub(crate) fn compute_phases(steps: &[serde_json::Value]) -> Vec<Vec<usize>> {
}
}
// Group by depth
// Cap depths at MAX_PHASES - 1 so deeper iterations merge into the last phase
for d in depths.iter_mut() {
if *d >= MAX_PHASES {
*d = MAX_PHASES - 1;
}
}
// Group by (capped) depth
let max_depth = depths.iter().copied().max().unwrap_or(0);
let mut phases: Vec<Vec<usize>> = Vec::new();
for d in 0..=max_depth {

View File

@@ -1,4 +1,6 @@
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use base64::Engine;
@@ -6,17 +8,26 @@ use compliance_core::error::CoreError;
use compliance_core::traits::pentest_tool::{PentestTool, PentestToolContext, PentestToolResult};
use futures_util::{SinkExt, StreamExt};
use serde_json::json;
use tokio::sync::Mutex;
use tokio_tungstenite::tungstenite::Message;
use tracing::info;
type WsStream =
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
/// Global pool of persistent browser sessions keyed by pentest session ID.
/// Each pentest session gets one Chrome tab that stays alive across tool calls.
static BROWSER_SESSIONS: std::sync::LazyLock<Arc<Mutex<HashMap<String, BrowserSession>>>> =
std::sync::LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
/// A browser automation tool that exposes headless Chrome actions to the LLM
/// via the Chrome DevTools Protocol. Reuses the same `CHROME_WS_URL` used for
/// PDF generation.
/// via the Chrome DevTools Protocol.
///
/// Supported actions: navigate, screenshot, click, fill, get_content, evaluate.
/// **Session-persistent**: the same Chrome tab is reused across all invocations
/// within a pentest session, so cookies, auth state, and page context are
/// preserved between navigate → click → fill → screenshot calls.
///
/// Supported actions: navigate, screenshot, click, fill, get_content, evaluate, close.
pub struct BrowserTool;
impl Default for BrowserTool {
@@ -31,11 +42,13 @@ impl PentestTool for BrowserTool {
}
fn description(&self) -> &str {
"Headless browser automation via Chrome DevTools Protocol. \
Supports navigating to URLs, taking screenshots, clicking elements, \
"Headless browser automation via Chrome DevTools Protocol. The browser tab persists \
across calls within the same pentest session — cookies, login state, and page context \
are preserved. Supports navigating to URLs, taking screenshots, clicking elements, \
filling form fields, reading page content, and evaluating JavaScript. \
Use CSS selectors to target elements. Useful for discovering registration pages, \
filling out forms, extracting verification links, and visual inspection."
Use CSS selectors to target elements. After navigating, use get_content to read the \
page HTML and find elements to click or fill. Use this to discover registration pages, \
fill out signup forms, complete email verification, and test authenticated flows."
}
fn input_schema(&self) -> serde_json::Value {
@@ -44,8 +57,8 @@ impl PentestTool for BrowserTool {
"properties": {
"action": {
"type": "string",
"enum": ["navigate", "screenshot", "click", "fill", "get_content", "evaluate"],
"description": "Action to perform"
"enum": ["navigate", "screenshot", "click", "fill", "get_content", "evaluate", "close"],
"description": "Action to perform. The browser tab persists between calls — use navigate first, then get_content to see the page, then click/fill to interact."
},
"url": {
"type": "string",
@@ -53,7 +66,7 @@ impl PentestTool for BrowserTool {
},
"selector": {
"type": "string",
"description": "CSS selector for click/fill actions"
"description": "CSS selector for click/fill actions (e.g. '#username', 'a[href*=register]', 'button[type=submit]')"
},
"value": {
"type": "string",
@@ -61,7 +74,7 @@ impl PentestTool for BrowserTool {
},
"wait_ms": {
"type": "integer",
"description": "Milliseconds to wait after action (default: 500)"
"description": "Milliseconds to wait after action (default: 1000)"
}
},
"required": ["action"]
@@ -71,7 +84,7 @@ impl PentestTool for BrowserTool {
fn execute<'a>(
&'a self,
input: serde_json::Value,
_context: &'a PentestToolContext,
context: &'a PentestToolContext,
) -> Pin<Box<dyn std::future::Future<Output = Result<PentestToolResult, CoreError>> + Send + 'a>>
{
Box::pin(async move {
@@ -79,11 +92,42 @@ impl PentestTool for BrowserTool {
let url = input.get("url").and_then(|v| v.as_str()).unwrap_or("");
let selector = input.get("selector").and_then(|v| v.as_str()).unwrap_or("");
let value = input.get("value").and_then(|v| v.as_str()).unwrap_or("");
let wait_ms = input.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(500);
let wait_ms = input
.get("wait_ms")
.and_then(|v| v.as_u64())
.unwrap_or(1000);
let session_key = context.session_id.clone();
let mut session = BrowserSession::connect()
.await
.map_err(|e| CoreError::Other(format!("Browser connect failed: {e}")))?;
// Handle close action — tear down the persistent session
if action == "close" {
let mut pool = BROWSER_SESSIONS.lock().await;
if let Some(mut sess) = pool.remove(&session_key) {
let _ = sess.close().await;
}
return Ok(PentestToolResult {
summary: "Browser session closed".to_string(),
findings: Vec::new(),
data: json!({ "closed": true }),
});
}
// Get or create persistent session for this pentest
let mut pool = BROWSER_SESSIONS.lock().await;
if !pool.contains_key(&session_key) {
match BrowserSession::connect().await {
Ok(sess) => {
pool.insert(session_key.clone(), sess);
}
Err(e) => {
return Err(CoreError::Other(format!("Browser connect failed: {e}")));
}
}
}
let session = pool.get_mut(&session_key);
let Some(session) = session else {
return Err(CoreError::Other("Browser session not found".to_string()));
};
let result = match action {
"navigate" => session.navigate(url, wait_ms).await,
@@ -95,8 +139,15 @@ impl PentestTool for BrowserTool {
_ => Err(format!("Unknown browser action: {action}")),
};
// Always try to clean up
let _ = session.close().await;
// If the session errored, remove it so the next call creates a fresh one
if result.is_err() {
if let Some(mut dead) = pool.remove(&session_key) {
let _ = dead.close().await;
}
}
// Release the lock before building the response
drop(pool);
match result {
Ok(data) => {
@@ -214,7 +265,7 @@ impl BrowserSession {
}
async fn navigate(&mut self, url: &str, wait_ms: u64) -> Result<serde_json::Value, String> {
let resp = cdp_send_session(
cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
@@ -226,19 +277,44 @@ impl BrowserSession {
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
// Get page title
let title_resp = self.evaluate_raw("document.title").await?;
let page_url_resp = self.evaluate_raw("window.location.href").await?;
// Get page title and current URL (may have redirected)
let title = self
.evaluate_raw("document.title")
.await
.unwrap_or_default();
let page_url = self
.evaluate_raw("window.location.href")
.await
.unwrap_or_default();
// Auto-get a summary of interactive elements on the page
let links_js = r#"(function(){
var items = [];
document.querySelectorAll('a[href]').forEach(function(a, i) {
if (i < 20) items.push({tag:'a', text:a.textContent.trim().substring(0,60), href:a.href});
});
document.querySelectorAll('input,select,textarea,button[type=submit]').forEach(function(el, i) {
if (i < 20) items.push({tag:el.tagName.toLowerCase(), type:el.type||'', name:el.name||'', id:el.id||'', placeholder:el.placeholder||''});
});
return JSON.stringify(items);
})()"#;
let elements_json = self.evaluate_raw(links_js).await.unwrap_or_default();
let elements: serde_json::Value = serde_json::from_str(&elements_json).unwrap_or(json!([]));
// Auto-capture screenshot after every navigation
let screenshot_b64 = self.capture_screenshot_b64().await.unwrap_or_default();
Ok(json!({
"navigated": true,
"url": page_url_resp,
"title": title_resp,
"frame_id": resp.get("result").and_then(|r| r.get("frameId")),
"url": page_url,
"title": title,
"elements": elements,
"screenshot_base64": screenshot_b64,
}))
}
async fn screenshot(&mut self) -> Result<serde_json::Value, String> {
/// Capture a screenshot and return the base64 string (empty on failure).
async fn capture_screenshot_b64(&mut self) -> Result<String, String> {
let resp = cdp_send_session(
&mut self.ws,
self.next_id,
@@ -249,14 +325,19 @@ impl BrowserSession {
.await?;
self.next_id += 1;
let b64 = resp
Ok(resp
.get("result")
.and_then(|r| r.get("data"))
.and_then(|d| d.as_str())
.unwrap_or("");
.unwrap_or("")
.to_string())
}
async fn screenshot(&mut self) -> Result<serde_json::Value, String> {
let b64 = self.capture_screenshot_b64().await?;
let size_kb = base64::engine::general_purpose::STANDARD
.decode(b64)
.decode(&b64)
.map(|b| b.len() / 1024)
.unwrap_or(0);
@@ -267,7 +348,6 @@ impl BrowserSession {
}
async fn click(&mut self, selector: &str, wait_ms: u64) -> Result<serde_json::Value, String> {
// Use JS to find element and get its bounding box, then click
let js = format!(
r#"(function() {{
var el = document.querySelector({sel});
@@ -289,9 +369,29 @@ impl BrowserSession {
let result = self.evaluate_raw(&js).await?;
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
serde_json::from_str::<serde_json::Value>(&result)
.unwrap_or_else(|_| json!({ "result": result }));
Ok(serde_json::from_str(&result).unwrap_or(json!({ "result": result })))
// After click, get current URL (may have navigated)
let current_url = self
.evaluate_raw("window.location.href")
.await
.unwrap_or_default();
let title = self
.evaluate_raw("document.title")
.await
.unwrap_or_default();
// Auto-capture screenshot after click
let screenshot_b64 = self.capture_screenshot_b64().await.unwrap_or_default();
let mut click_result: serde_json::Value =
serde_json::from_str(&result).unwrap_or(json!({ "result": result }));
if let Some(obj) = click_result.as_object_mut() {
obj.insert("current_url".to_string(), json!(current_url));
obj.insert("page_title".to_string(), json!(title));
if !screenshot_b64.is_empty() {
obj.insert("screenshot_base64".to_string(), json!(screenshot_b64));
}
}
Ok(click_result)
}
async fn fill(
@@ -300,62 +400,83 @@ impl BrowserSession {
value: &str,
wait_ms: u64,
) -> Result<serde_json::Value, String> {
let js = format!(
r#"(function() {{
var el = document.querySelector({sel});
if (!el) return JSON.stringify({{error: "Element not found: {raw}"}});
el.focus();
el.value = {val};
el.dispatchEvent(new Event('input', {{bubbles: true}}));
el.dispatchEvent(new Event('change', {{bubbles: true}}));
return JSON.stringify({{filled: true, tag: el.tagName, selector: {sel}}});
}})()"#,
// Step 1: Focus the element via JS
let focus_js = format!(
"(function(){{var e=document.querySelector({sel});\
if(!e)return 'notfound';e.focus();e.select();return 'ok'}})()",
sel = serde_json::to_string(selector).unwrap_or_default(),
raw = selector.replace('"', r#"\""#),
val = serde_json::to_string(value).unwrap_or_default(),
);
let found = self.evaluate_raw(&focus_js).await?;
if found == "notfound" {
return Ok(json!({ "error": format!("Element not found: {selector}") }));
}
// Step 2: Clear existing content with Select All + Delete
cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"Input.dispatchKeyEvent",
json!({"type": "keyDown", "key": "a", "code": "KeyA", "modifiers": 2}),
)
.await?;
self.next_id += 1;
cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"Input.dispatchKeyEvent",
json!({"type": "keyUp", "key": "a", "code": "KeyA", "modifiers": 2}),
)
.await?;
self.next_id += 1;
cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"Input.dispatchKeyEvent",
json!({"type": "keyDown", "key": "Backspace", "code": "Backspace"}),
)
.await?;
self.next_id += 1;
cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"Input.dispatchKeyEvent",
json!({"type": "keyUp", "key": "Backspace", "code": "Backspace"}),
)
.await?;
self.next_id += 1;
// Step 3: Insert the text using Input.insertText (single CDP command, no JS eval)
cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"Input.insertText",
json!({"text": value}),
)
.await?;
self.next_id += 1;
// Step 4: Verify the value was set
let verify_js = format!(
"(function(){{var e=document.querySelector({sel});return e?e.value:''}})()",
sel = serde_json::to_string(selector).unwrap_or_default(),
);
let final_value = self.evaluate_raw(&verify_js).await.unwrap_or_default();
let result = self.evaluate_raw(&js).await?;
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
Ok(serde_json::from_str(&result).unwrap_or(json!({ "result": result })))
Ok(json!({
"filled": true,
"selector": selector,
"value": final_value,
}))
}
async fn get_content(&mut self) -> Result<serde_json::Value, String> {
let resp = cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"DOM.getDocument",
json!({ "depth": 0 }),
)
.await?;
self.next_id += 1;
let root_id = resp
.get("result")
.and_then(|r| r.get("root"))
.and_then(|n| n.get("nodeId"))
.and_then(|n| n.as_i64())
.unwrap_or(1);
let html_resp = cdp_send_session(
&mut self.ws,
self.next_id,
&self.session_id,
"DOM.getOuterHTML",
json!({ "nodeId": root_id }),
)
.await?;
self.next_id += 1;
let html = html_resp
.get("result")
.and_then(|r| r.get("outerHTML"))
.and_then(|h| h.as_str())
.unwrap_or("");
// Also get page title and URL for context
let title = self
.evaluate_raw("document.title")
.await
@@ -365,22 +486,55 @@ impl BrowserSession {
.await
.unwrap_or_default();
// Truncate HTML to avoid massive payloads to the LLM
let truncated = if html.len() > 50_000 {
format!(
"{}... [truncated, {} total chars]",
&html[..50_000],
html.len()
)
} else {
html.to_string()
};
// Get a structured summary instead of raw HTML (more useful for LLM)
let summary_js = r#"(function(){
var result = {forms:[], links:[], inputs:[], buttons:[], headings:[], text:''};
// Forms
document.querySelectorAll('form').forEach(function(f,i){
if(i<10) result.forms.push({action:f.action, method:f.method, id:f.id});
});
// Links
document.querySelectorAll('a[href]').forEach(function(a,i){
if(i<30) result.links.push({text:a.textContent.trim().substring(0,80), href:a.href});
});
// Inputs
document.querySelectorAll('input,select,textarea').forEach(function(el,i){
if(i<30) result.inputs.push({
tag:el.tagName.toLowerCase(),
type:el.type||'',
name:el.name||'',
id:el.id||'',
placeholder:el.placeholder||'',
value:el.type==='password'?'***':el.value.substring(0,50)
});
});
// Buttons
document.querySelectorAll('button,[type=submit],[role=button]').forEach(function(b,i){
if(i<20) result.buttons.push({text:b.textContent.trim().substring(0,60), type:b.type||'', id:b.id||''});
});
// Headings
document.querySelectorAll('h1,h2,h3').forEach(function(h,i){
if(i<10) result.headings.push(h.textContent.trim().substring(0,100));
});
// Page text (truncated)
result.text = document.body ? document.body.innerText.substring(0, 3000) : '';
return JSON.stringify(result);
})()"#;
let summary = self.evaluate_raw(summary_js).await.unwrap_or_default();
let page_data: serde_json::Value = serde_json::from_str(&summary).unwrap_or(json!({}));
Ok(json!({
"url": url,
"title": title,
"html": truncated,
"html_length": html.len(),
"page": page_data,
}))
}
@@ -431,7 +585,15 @@ impl BrowserSession {
}
}
// ── CDP helpers (same pattern as compliance-agent/src/pentest/report/pdf.rs) ──
/// Clean up the browser session for a pentest session (call when session ends).
pub async fn cleanup_browser_session(session_id: &str) {
let mut pool = BROWSER_SESSIONS.lock().await;
if let Some(mut sess) = pool.remove(session_id) {
let _ = sess.close().await;
}
}
// ── CDP helpers ──
async fn cdp_send(
ws: &mut WsStream,

View File

@@ -0,0 +1,63 @@
version: "3.8"
services:
mailserver:
image: ghcr.io/docker-mailserver/docker-mailserver:14
hostname: mail.scanner.meghsakha.com
domainname: scanner.meghsakha.com
container_name: mailserver
ports:
- "25:25" # SMTP (inbound mail)
- "143:143" # IMAP (orchestrator reads mail)
- "993:993" # IMAPS (TLS)
- "587:587" # Submission (outbound, if needed)
volumes:
- maildata:/var/mail
- mailstate:/var/mail-state
- maillogs:/var/log/mail
- /etc/localtime:/etc/localtime:ro
environment:
# Hostname
- OVERRIDE_HOSTNAME=mail.scanner.meghsakha.com
# Disable features we don't need
- ENABLE_SPAMASSASSIN=0
- ENABLE_CLAMAV=0
- ENABLE_FAIL2BAN=0
- ENABLE_POSTGREY=0
- ENABLE_AMAVIS=0
# Enable what we need
- ENABLE_IMAP=1
- ENABLE_POP3=0
# Plus-addressing (critical for pentest)
- POSTFIX_RECIPIENT_DELIMITER=+
# SSL (start with no TLS, add Let's Encrypt later)
- SSL_TYPE=
# Accept mail for our domain
- PERMIT_DOCKER=none
# Disable inbound SPF checking — we need to accept verification
# emails from Keycloak and other external senders
- ENABLE_OPENDKIM=0
- ENABLE_OPENDMARC=0
- ENABLE_POLICYD_SPF=0
- SPOOF_PROTECTION=0
# One domain
- POSTFIX_MYDESTINATION=scanner.meghsakha.com, localhost
restart: unless-stopped
healthcheck:
test: ["CMD", "ss", "-tlnp", "|", "grep", "25"]
interval: 30s
timeout: 10s
retries: 3
volumes:
maildata:
mailstate:
maillogs:

View File

@@ -1,6 +1,6 @@
services:
mongo:
image: mongo:latest
image: mongo:7
ports:
- "27017:27017"
environment: