feat: browser session persistence, auto-screenshots, context optimization, user cleanup
Some checks failed
CI / Check (pull_request) Failing after 5m55s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
Some checks failed
CI / Check (pull_request) Failing after 5m55s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
Browser tool: - Session-persistent Chrome tab (same tab reused across all calls in a pentest) - Auto-screenshot on every navigate and click (stored in attack chain for report) - Fill uses CDP Input.insertText (fixes WebSocket corruption on special chars) - Switched from browserless/chromium to chromedp/headless-shell (stable WS) Context window optimization: - Strip screenshot_base64 from LLM conversation (kept in DB for report) - Truncate HTML to 2KB, page text to 1.5KB in LLM messages - Cap element/link arrays at 15 items - SAST triage: batch 30 findings per LLM call instead of all at once Report improvements: - Auto-embed screenshots in attack chain timeline (navigate + click nodes) - Cover page shows best app screenshot - Attack chain phases capped at 8 (no more 20x "Final") User cleanup: - TestUserRecord model tracks created test users per session - cleanup.rs: Keycloak (Admin REST API), Auth0 (Management API), Okta (Users API) - Auto-cleanup on session completion when cleanup_test_user is enabled - Env vars: KEYCLOAK_ADMIN_USERNAME, KEYCLOAK_ADMIN_PASSWORD System prompt: - Explicit browser usage instructions (navigate → get_content → click → fill) - SPA auth bypass guidance (check page content, not HTTP status) - Screenshot instructions for evidence collection Other: - Pin mongo:7 in docker-compose (mongo:latest/8 segfaults on kernel 6.19) - Add deploy/docker-compose.mailserver.yml for Postfix + Dovecot Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -139,6 +139,61 @@ pub async fn create_session(
|
||||
let event_tx = agent.register_session_stream(&session_id_str);
|
||||
let pause_rx = agent.register_pause_control(&session_id_str);
|
||||
|
||||
// Merge server-default IMAP/email settings where wizard left blanks
|
||||
if let Some(ref mut cfg) = session.config {
|
||||
if cfg.auth.mode == AuthMode::AutoRegister {
|
||||
if cfg.auth.verification_email.is_none() {
|
||||
cfg.auth.verification_email = agent.config.pentest_verification_email.clone();
|
||||
}
|
||||
if cfg.auth.imap_host.is_none() {
|
||||
cfg.auth.imap_host = agent.config.pentest_imap_host.clone();
|
||||
}
|
||||
if cfg.auth.imap_port.is_none() {
|
||||
cfg.auth.imap_port = agent.config.pentest_imap_port;
|
||||
}
|
||||
if cfg.auth.imap_username.is_none() {
|
||||
cfg.auth.imap_username = agent.config.pentest_imap_username.clone();
|
||||
}
|
||||
if cfg.auth.imap_password.is_none() {
|
||||
cfg.auth.imap_password = agent.config.pentest_imap_password.as_ref().map(|s| {
|
||||
use secrecy::ExposeSecret;
|
||||
s.expose_secret().to_string()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-populate test user record for auto-register sessions
|
||||
if let Some(ref cfg) = session.config {
|
||||
if cfg.auth.mode == AuthMode::AutoRegister {
|
||||
let verification_email = cfg.auth.verification_email.clone();
|
||||
// Build plus-addressed email for this session
|
||||
let test_email = verification_email.as_deref().map(|email| {
|
||||
let parts: Vec<&str> = email.splitn(2, '@').collect();
|
||||
if parts.len() == 2 {
|
||||
format!("{}+{}@{}", parts[0], session_id_str, parts[1])
|
||||
} else {
|
||||
email.to_string()
|
||||
}
|
||||
});
|
||||
|
||||
// Detect identity provider from keycloak config
|
||||
let provider = if agent.config.keycloak_url.is_some() {
|
||||
Some(compliance_core::models::pentest::IdentityProvider::Keycloak)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
session.test_user = Some(compliance_core::models::pentest::TestUserRecord {
|
||||
username: None, // LLM will choose; updated after registration
|
||||
email: test_email,
|
||||
provider_user_id: None,
|
||||
provider,
|
||||
cleaned_up: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Encrypt credentials before they linger in memory
|
||||
let mut session_for_task = session.clone();
|
||||
if let Some(ref mut cfg) = session_for_task.config {
|
||||
|
||||
@@ -49,5 +49,12 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
|
||||
.unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
|
||||
keycloak_url: env_var_opt("KEYCLOAK_URL"),
|
||||
keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
|
||||
keycloak_admin_username: env_var_opt("KEYCLOAK_ADMIN_USERNAME"),
|
||||
keycloak_admin_password: env_secret_opt("KEYCLOAK_ADMIN_PASSWORD"),
|
||||
pentest_verification_email: env_var_opt("PENTEST_VERIFICATION_EMAIL"),
|
||||
pentest_imap_host: env_var_opt("PENTEST_IMAP_HOST"),
|
||||
pentest_imap_port: env_var_opt("PENTEST_IMAP_PORT").and_then(|p| p.parse().ok()),
|
||||
pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
|
||||
pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
|
||||
use crate::llm::LlmClient;
|
||||
use crate::pipeline::orchestrator::GraphContext;
|
||||
|
||||
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
|
||||
/// Maximum number of findings to include in a single LLM triage call.
|
||||
const TRIAGE_CHUNK_SIZE: usize = 30;
|
||||
|
||||
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.
|
||||
|
||||
Actions:
|
||||
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
|
||||
@@ -19,8 +22,8 @@ Consider:
|
||||
- Is the finding actionable by a developer?
|
||||
- Would a real attacker be able to exploit this?
|
||||
|
||||
Respond in JSON format:
|
||||
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
|
||||
Respond with a JSON array, one entry per finding in the same order they were presented:
|
||||
[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;
|
||||
|
||||
pub async fn triage_findings(
|
||||
llm: &Arc<LlmClient>,
|
||||
@@ -29,60 +32,76 @@ pub async fn triage_findings(
|
||||
) -> usize {
|
||||
let mut passed = 0;
|
||||
|
||||
for finding in findings.iter_mut() {
|
||||
let file_classification = classify_file_path(finding.file_path.as_deref());
|
||||
// Process findings in chunks to avoid overflowing the LLM context window.
|
||||
for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
|
||||
let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
|
||||
let chunk = &mut findings[chunk_start..chunk_end];
|
||||
|
||||
let mut user_prompt = format!(
|
||||
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
|
||||
finding.scanner,
|
||||
finding.rule_id.as_deref().unwrap_or("N/A"),
|
||||
finding.severity,
|
||||
finding.title,
|
||||
finding.description,
|
||||
finding.file_path.as_deref().unwrap_or("N/A"),
|
||||
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
|
||||
finding.code_snippet.as_deref().unwrap_or("N/A"),
|
||||
file_classification,
|
||||
);
|
||||
// Build a combined prompt for the entire chunk.
|
||||
let mut user_prompt = String::new();
|
||||
let mut file_classifications: Vec<String> = Vec::new();
|
||||
|
||||
for (i, finding) in chunk.iter().enumerate() {
|
||||
let file_classification = classify_file_path(finding.file_path.as_deref());
|
||||
|
||||
// Enrich with surrounding code context if possible
|
||||
if let Some(context) = read_surrounding_context(finding) {
|
||||
user_prompt.push_str(&format!(
|
||||
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
|
||||
"\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
|
||||
i + 1,
|
||||
finding.fingerprint,
|
||||
finding.scanner,
|
||||
finding.rule_id.as_deref().unwrap_or("N/A"),
|
||||
finding.severity,
|
||||
finding.title,
|
||||
finding.description,
|
||||
finding.file_path.as_deref().unwrap_or("N/A"),
|
||||
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
|
||||
finding.code_snippet.as_deref().unwrap_or("N/A"),
|
||||
file_classification,
|
||||
));
|
||||
}
|
||||
|
||||
// Enrich with graph context if available
|
||||
if let Some(ctx) = graph_context {
|
||||
if let Some(impact) = ctx
|
||||
.impacts
|
||||
.iter()
|
||||
.find(|i| i.finding_id == finding.fingerprint)
|
||||
{
|
||||
// Enrich with surrounding code context if possible
|
||||
if let Some(context) = read_surrounding_context(finding) {
|
||||
user_prompt.push_str(&format!(
|
||||
"\n\n--- Code Graph Context ---\n\
|
||||
Blast radius: {} nodes affected\n\
|
||||
Entry points affected: {}\n\
|
||||
Direct callers: {}\n\
|
||||
Communities affected: {}\n\
|
||||
Call chains: {}",
|
||||
impact.blast_radius,
|
||||
if impact.affected_entry_points.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
impact.affected_entry_points.join(", ")
|
||||
},
|
||||
if impact.direct_callers.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
impact.direct_callers.join(", ")
|
||||
},
|
||||
impact.affected_communities.len(),
|
||||
impact.call_chains.len(),
|
||||
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
|
||||
));
|
||||
}
|
||||
|
||||
// Enrich with graph context if available
|
||||
if let Some(ctx) = graph_context {
|
||||
if let Some(impact) = ctx
|
||||
.impacts
|
||||
.iter()
|
||||
.find(|im| im.finding_id == finding.fingerprint)
|
||||
{
|
||||
user_prompt.push_str(&format!(
|
||||
"\n\n--- Code Graph Context ---\n\
|
||||
Blast radius: {} nodes affected\n\
|
||||
Entry points affected: {}\n\
|
||||
Direct callers: {}\n\
|
||||
Communities affected: {}\n\
|
||||
Call chains: {}",
|
||||
impact.blast_radius,
|
||||
if impact.affected_entry_points.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
impact.affected_entry_points.join(", ")
|
||||
},
|
||||
if impact.direct_callers.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
impact.direct_callers.join(", ")
|
||||
},
|
||||
impact.affected_communities.len(),
|
||||
impact.call_chains.len(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
user_prompt.push('\n');
|
||||
file_classifications.push(file_classification);
|
||||
}
|
||||
|
||||
// Send the batch to the LLM.
|
||||
match llm
|
||||
.chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
|
||||
.await
|
||||
@@ -98,58 +117,77 @@ pub async fn triage_findings(
|
||||
} else {
|
||||
cleaned
|
||||
};
|
||||
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
|
||||
// Apply file-path confidence adjustment
|
||||
let adjusted_confidence =
|
||||
adjust_confidence(result.confidence, &file_classification);
|
||||
finding.confidence = Some(adjusted_confidence);
|
||||
finding.triage_action = Some(result.action.clone());
|
||||
finding.triage_rationale = Some(result.rationale);
|
||||
|
||||
if let Some(remediation) = result.remediation {
|
||||
finding.remediation = Some(remediation);
|
||||
}
|
||||
|
||||
match result.action.as_str() {
|
||||
"dismiss" => {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
}
|
||||
"downgrade" => {
|
||||
// Downgrade severity by one level
|
||||
finding.severity = downgrade_severity(&finding.severity);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
"upgrade" => {
|
||||
finding.severity = upgrade_severity(&finding.severity);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
_ => {
|
||||
// "confirm" or unknown — keep as-is
|
||||
if adjusted_confidence >= 3.0 {
|
||||
match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
|
||||
Ok(results) => {
|
||||
for (idx, finding) in chunk.iter_mut().enumerate() {
|
||||
// Match result by position; fall back to keeping the finding.
|
||||
let Some(result) = results.get(idx) else {
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
} else {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
continue;
|
||||
};
|
||||
|
||||
let file_classification = file_classifications
|
||||
.get(idx)
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
let adjusted_confidence =
|
||||
adjust_confidence(result.confidence, file_classification);
|
||||
finding.confidence = Some(adjusted_confidence);
|
||||
finding.triage_action = Some(result.action.clone());
|
||||
finding.triage_rationale = Some(result.rationale.clone());
|
||||
|
||||
if let Some(ref remediation) = result.remediation {
|
||||
finding.remediation = Some(remediation.clone());
|
||||
}
|
||||
|
||||
match result.action.as_str() {
|
||||
"dismiss" => {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
}
|
||||
"downgrade" => {
|
||||
finding.severity = downgrade_severity(&finding.severity);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
"upgrade" => {
|
||||
finding.severity = upgrade_severity(&finding.severity);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
_ => {
|
||||
// "confirm" or unknown — keep as-is
|
||||
if adjusted_confidence >= 3.0 {
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
} else {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Parse failure — keep the finding
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
tracing::warn!(
|
||||
"Failed to parse triage response for {}: {response}",
|
||||
finding.fingerprint
|
||||
);
|
||||
Err(_) => {
|
||||
// Batch parse failure — keep all findings in the chunk.
|
||||
tracing::warn!(
|
||||
"Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
|
||||
);
|
||||
for finding in chunk.iter_mut() {
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// On LLM error, keep the finding
|
||||
tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
// On LLM error, keep all findings in the chunk.
|
||||
tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
|
||||
for finding in chunk.iter_mut() {
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -266,6 +304,10 @@ fn upgrade_severity(
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct TriageResult {
|
||||
/// Finding fingerprint echoed back by the LLM (optional).
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)]
|
||||
id: String,
|
||||
#[serde(default = "default_action")]
|
||||
action: String,
|
||||
#[serde(default)]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
mod agent;
|
||||
mod api;
|
||||
mod config;
|
||||
pub(crate) mod config;
|
||||
mod database;
|
||||
mod error;
|
||||
mod llm;
|
||||
@@ -15,11 +15,20 @@ mod webhooks;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
dotenvy::dotenv().ok();
|
||||
match dotenvy::dotenv() {
|
||||
Ok(path) => eprintln!("[dotenv] Loaded from: {}", path.display()),
|
||||
Err(e) => eprintln!("[dotenv] FAILED: {e}"),
|
||||
}
|
||||
|
||||
let _telemetry_guard = compliance_core::telemetry::init_telemetry("compliance-agent");
|
||||
|
||||
tracing::info!("Loading configuration...");
|
||||
// Log critical env vars at startup
|
||||
tracing::info!(
|
||||
chrome_ws_url = std::env::var("CHROME_WS_URL").ok().as_deref(),
|
||||
pentest_email = std::env::var("PENTEST_VERIFICATION_EMAIL").ok().as_deref(),
|
||||
encryption_key_set = std::env::var("PENTEST_ENCRYPTION_KEY").is_ok(),
|
||||
"Loading configuration..."
|
||||
);
|
||||
let config = config::load_config()?;
|
||||
|
||||
// Ensure SSH key pair exists for cloning private repos
|
||||
|
||||
300
compliance-agent/src/pentest/cleanup.rs
Normal file
300
compliance-agent/src/pentest/cleanup.rs
Normal file
@@ -0,0 +1,300 @@
|
||||
use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
|
||||
use compliance_core::AgentConfig;
|
||||
use secrecy::ExposeSecret;
|
||||
use tracing::{info, warn};
|
||||
|
||||
/// Attempt to delete a test user created during a pentest session.
|
||||
///
|
||||
/// Routes to the appropriate identity provider based on `TestUserRecord.provider`.
|
||||
/// Falls back to browser-based cleanup if no API credentials are available.
|
||||
///
|
||||
/// Returns `Ok(true)` if the user was deleted, `Ok(false)` if skipped, `Err` on failure.
|
||||
pub async fn cleanup_test_user(
|
||||
user: &TestUserRecord,
|
||||
config: &AgentConfig,
|
||||
http: &reqwest::Client,
|
||||
) -> Result<bool, String> {
|
||||
if user.cleaned_up {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let provider = user.provider.as_ref();
|
||||
|
||||
match provider {
|
||||
Some(IdentityProvider::Keycloak) => cleanup_keycloak(user, config, http).await,
|
||||
Some(IdentityProvider::Auth0) => cleanup_auth0(user, config, http).await,
|
||||
Some(IdentityProvider::Okta) => cleanup_okta(user, config, http).await,
|
||||
Some(IdentityProvider::Firebase) => {
|
||||
warn!("Firebase user cleanup not yet implemented");
|
||||
Ok(false)
|
||||
}
|
||||
Some(IdentityProvider::Custom) | None => {
|
||||
// For custom/unknown providers, try Keycloak if configured, else skip
|
||||
if config.keycloak_url.is_some() && config.keycloak_admin_username.is_some() {
|
||||
cleanup_keycloak(user, config, http).await
|
||||
} else {
|
||||
warn!(
|
||||
username = user.username.as_deref(),
|
||||
"No identity provider configured for cleanup — skipping"
|
||||
);
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a user from Keycloak via the Admin REST API.
|
||||
///
|
||||
/// Flow: get admin token → search user by username → delete by ID.
|
||||
async fn cleanup_keycloak(
|
||||
user: &TestUserRecord,
|
||||
config: &AgentConfig,
|
||||
http: &reqwest::Client,
|
||||
) -> Result<bool, String> {
|
||||
let base_url = config
|
||||
.keycloak_url
|
||||
.as_deref()
|
||||
.ok_or("KEYCLOAK_URL not configured")?;
|
||||
let realm = config
|
||||
.keycloak_realm
|
||||
.as_deref()
|
||||
.ok_or("KEYCLOAK_REALM not configured")?;
|
||||
let admin_user = config
|
||||
.keycloak_admin_username
|
||||
.as_deref()
|
||||
.ok_or("KEYCLOAK_ADMIN_USERNAME not configured")?;
|
||||
let admin_pass = config
|
||||
.keycloak_admin_password
|
||||
.as_ref()
|
||||
.ok_or("KEYCLOAK_ADMIN_PASSWORD not configured")?;
|
||||
|
||||
let username = user
|
||||
.username
|
||||
.as_deref()
|
||||
.ok_or("No username in test user record")?;
|
||||
|
||||
info!(username, realm, "Cleaning up Keycloak test user");
|
||||
|
||||
// Step 1: Get admin access token
|
||||
let token_url = format!("{base_url}/realms/master/protocol/openid-connect/token");
|
||||
let token_resp = http
|
||||
.post(&token_url)
|
||||
.form(&[
|
||||
("grant_type", "password"),
|
||||
("client_id", "admin-cli"),
|
||||
("username", admin_user),
|
||||
("password", admin_pass.expose_secret()),
|
||||
])
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Keycloak token request failed: {e}"))?;
|
||||
|
||||
if !token_resp.status().is_success() {
|
||||
let status = token_resp.status();
|
||||
let body = token_resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Keycloak admin auth failed ({status}): {body}"));
|
||||
}
|
||||
|
||||
let token_body: serde_json::Value = token_resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Keycloak token: {e}"))?;
|
||||
let access_token = token_body
|
||||
.get("access_token")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("No access_token in Keycloak response")?;
|
||||
|
||||
// Step 2: Search for user by username
|
||||
let search_url =
|
||||
format!("{base_url}/admin/realms/{realm}/users?username={username}&exact=true");
|
||||
let search_resp = http
|
||||
.get(&search_url)
|
||||
.bearer_auth(access_token)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Keycloak user search failed: {e}"))?;
|
||||
|
||||
if !search_resp.status().is_success() {
|
||||
let status = search_resp.status();
|
||||
let body = search_resp.text().await.unwrap_or_default();
|
||||
return Err(format!("Keycloak user search failed ({status}): {body}"));
|
||||
}
|
||||
|
||||
let users: Vec<serde_json::Value> = search_resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Keycloak users: {e}"))?;
|
||||
|
||||
let user_id = users
|
||||
.first()
|
||||
.and_then(|u| u.get("id"))
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| format!("User '{username}' not found in Keycloak realm '{realm}'"))?;
|
||||
|
||||
// Step 3: Delete the user
|
||||
let delete_url = format!("{base_url}/admin/realms/{realm}/users/{user_id}");
|
||||
let delete_resp = http
|
||||
.delete(&delete_url)
|
||||
.bearer_auth(access_token)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Keycloak user delete failed: {e}"))?;
|
||||
|
||||
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
|
||||
info!(username, user_id, "Keycloak test user deleted");
|
||||
Ok(true)
|
||||
} else {
|
||||
let status = delete_resp.status();
|
||||
let body = delete_resp.text().await.unwrap_or_default();
|
||||
Err(format!("Keycloak delete failed ({status}): {body}"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a user from Auth0 via the Management API.
|
||||
///
|
||||
/// Requires `AUTH0_DOMAIN`, `AUTH0_CLIENT_ID`, `AUTH0_CLIENT_SECRET` env vars.
|
||||
async fn cleanup_auth0(
|
||||
user: &TestUserRecord,
|
||||
_config: &AgentConfig,
|
||||
http: &reqwest::Client,
|
||||
) -> Result<bool, String> {
|
||||
let domain = std::env::var("AUTH0_DOMAIN").map_err(|_| "AUTH0_DOMAIN not set")?;
|
||||
let client_id = std::env::var("AUTH0_CLIENT_ID").map_err(|_| "AUTH0_CLIENT_ID not set")?;
|
||||
let client_secret =
|
||||
std::env::var("AUTH0_CLIENT_SECRET").map_err(|_| "AUTH0_CLIENT_SECRET not set")?;
|
||||
|
||||
let email = user
|
||||
.email
|
||||
.as_deref()
|
||||
.ok_or("No email in test user record for Auth0 lookup")?;
|
||||
|
||||
info!(email, "Cleaning up Auth0 test user");
|
||||
|
||||
// Get management API token
|
||||
let token_resp = http
|
||||
.post(format!("https://{domain}/oauth/token"))
|
||||
.json(&serde_json::json!({
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"audience": format!("https://{domain}/api/v2/"),
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Auth0 token request failed: {e}"))?;
|
||||
|
||||
let token_body: serde_json::Value = token_resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Auth0 token: {e}"))?;
|
||||
let access_token = token_body
|
||||
.get("access_token")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("No access_token in Auth0 response")?;
|
||||
|
||||
// Search for user by email
|
||||
let encoded_email = urlencoding::encode(email);
|
||||
let search_url = format!("https://{domain}/api/v2/users-by-email?email={encoded_email}");
|
||||
let search_resp = http
|
||||
.get(&search_url)
|
||||
.bearer_auth(access_token)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Auth0 user search failed: {e}"))?;
|
||||
|
||||
let users: Vec<serde_json::Value> = search_resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Auth0 users: {e}"))?;
|
||||
|
||||
let user_id = users
|
||||
.first()
|
||||
.and_then(|u| u.get("user_id"))
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| format!("User with email '{email}' not found in Auth0"))?;
|
||||
|
||||
// Delete
|
||||
let encoded_id = urlencoding::encode(user_id);
|
||||
let delete_resp = http
|
||||
.delete(format!("https://{domain}/api/v2/users/{encoded_id}"))
|
||||
.bearer_auth(access_token)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Auth0 user delete failed: {e}"))?;
|
||||
|
||||
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
|
||||
info!(email, user_id, "Auth0 test user deleted");
|
||||
Ok(true)
|
||||
} else {
|
||||
let status = delete_resp.status();
|
||||
let body = delete_resp.text().await.unwrap_or_default();
|
||||
Err(format!("Auth0 delete failed ({status}): {body}"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a user from Okta via the Users API.
|
||||
///
|
||||
/// Requires `OKTA_DOMAIN`, `OKTA_API_TOKEN` env vars.
|
||||
async fn cleanup_okta(
|
||||
user: &TestUserRecord,
|
||||
_config: &AgentConfig,
|
||||
http: &reqwest::Client,
|
||||
) -> Result<bool, String> {
|
||||
let domain = std::env::var("OKTA_DOMAIN").map_err(|_| "OKTA_DOMAIN not set")?;
|
||||
let api_token = std::env::var("OKTA_API_TOKEN").map_err(|_| "OKTA_API_TOKEN not set")?;
|
||||
|
||||
let username = user
|
||||
.username
|
||||
.as_deref()
|
||||
.or(user.email.as_deref())
|
||||
.ok_or("No username/email in test user record for Okta lookup")?;
|
||||
|
||||
info!(username, "Cleaning up Okta test user");
|
||||
|
||||
// Search user
|
||||
let encoded = urlencoding::encode(username);
|
||||
let search_url = format!("https://{domain}/api/v1/users?search=profile.login+eq+\"{encoded}\"");
|
||||
let search_resp = http
|
||||
.get(&search_url)
|
||||
.header("Authorization", format!("SSWS {api_token}"))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Okta user search failed: {e}"))?;
|
||||
|
||||
let users: Vec<serde_json::Value> = search_resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse Okta users: {e}"))?;
|
||||
|
||||
let user_id = users
|
||||
.first()
|
||||
.and_then(|u| u.get("id"))
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| format!("User '{username}' not found in Okta"))?;
|
||||
|
||||
// Deactivate first (required by Okta before delete)
|
||||
let _ = http
|
||||
.post(format!(
|
||||
"https://{domain}/api/v1/users/{user_id}/lifecycle/deactivate"
|
||||
))
|
||||
.header("Authorization", format!("SSWS {api_token}"))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
// Delete
|
||||
let delete_resp = http
|
||||
.delete(format!("https://{domain}/api/v1/users/{user_id}"))
|
||||
.header("Authorization", format!("SSWS {api_token}"))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Okta user delete failed: {e}"))?;
|
||||
|
||||
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
|
||||
info!(username, user_id, "Okta test user deleted");
|
||||
Ok(true)
|
||||
} else {
|
||||
let status = delete_resp.status();
|
||||
let body = delete_resp.text().await.unwrap_or_default();
|
||||
Err(format!("Okta delete failed ({status}): {body}"))
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod cleanup;
|
||||
mod context;
|
||||
pub mod crypto;
|
||||
pub mod orchestrator;
|
||||
|
||||
@@ -390,10 +390,13 @@ impl PentestOrchestrator {
|
||||
)
|
||||
.await;
|
||||
|
||||
// Build LLM-facing summary: strip large fields
|
||||
// (screenshots, raw HTML) to save context window
|
||||
let llm_data = summarize_tool_output(&result.data);
|
||||
serde_json::json!({
|
||||
"summary": result.summary,
|
||||
"findings_count": findings_count,
|
||||
"data": result.data,
|
||||
"data": llm_data,
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
@@ -465,21 +468,61 @@ impl PentestOrchestrator {
|
||||
.await;
|
||||
}
|
||||
|
||||
// If cleanup_test_user is requested, append a cleanup instruction
|
||||
// Clean up test user via identity provider API if requested
|
||||
if session
|
||||
.config
|
||||
.as_ref()
|
||||
.is_some_and(|c| c.auth.cleanup_test_user)
|
||||
{
|
||||
let cleanup_msg = PentestMessage::user(
|
||||
session_id.clone(),
|
||||
"Testing is complete. Now please clean up: navigate to the application and delete \
|
||||
the test user account that was created during this session. Confirm once done."
|
||||
.to_string(),
|
||||
);
|
||||
let _ = self.db.pentest_messages().insert_one(&cleanup_msg).await;
|
||||
if let Some(ref test_user) = session.test_user {
|
||||
let http = reqwest::Client::new();
|
||||
// We need the AgentConfig — read from env since orchestrator doesn't hold it
|
||||
let config = crate::config::load_config();
|
||||
match config {
|
||||
Ok(cfg) => {
|
||||
match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
|
||||
.await
|
||||
{
|
||||
Ok(true) => {
|
||||
tracing::info!(
|
||||
username = test_user.username.as_deref(),
|
||||
"Test user cleaned up via provider API"
|
||||
);
|
||||
// Mark as cleaned up in DB
|
||||
if let Some(sid) = session.id {
|
||||
let _ = self
|
||||
.db
|
||||
.pentest_sessions()
|
||||
.update_one(
|
||||
doc! { "_id": sid },
|
||||
doc! { "$set": { "test_user.cleaned_up": true } },
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(false) => {
|
||||
tracing::info!(
|
||||
"Test user cleanup skipped (no provider configured)"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "Test user cleanup failed");
|
||||
let _ = self.event_tx.send(PentestEvent::Error {
|
||||
message: format!("Test user cleanup failed: {e}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "Could not load config for cleanup");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up the persistent browser session for this pentest
|
||||
compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
|
||||
|
||||
let _ = self.event_tx.send(PentestEvent::Complete {
|
||||
summary: format!(
|
||||
"Pentest complete. {} findings from {} tool invocations.",
|
||||
@@ -490,3 +533,82 @@ impl PentestOrchestrator {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip large fields from tool output before sending to the LLM.
|
||||
/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
|
||||
/// The full data is still stored in the DB for the report.
|
||||
fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
|
||||
let Some(obj) = data.as_object() else {
|
||||
return data.clone();
|
||||
};
|
||||
|
||||
let mut summarized = serde_json::Map::new();
|
||||
for (key, value) in obj {
|
||||
match key.as_str() {
|
||||
// Replace screenshot base64 with a placeholder
|
||||
"screenshot_base64" => {
|
||||
if let Some(s) = value.as_str() {
|
||||
if !s.is_empty() {
|
||||
summarized.insert(
|
||||
key.clone(),
|
||||
serde_json::Value::String(
|
||||
"[screenshot captured and saved to report]".to_string(),
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
summarized.insert(key.clone(), value.clone());
|
||||
}
|
||||
// Truncate raw HTML content
|
||||
"html" => {
|
||||
if let Some(s) = value.as_str() {
|
||||
if s.len() > 2000 {
|
||||
summarized.insert(
|
||||
key.clone(),
|
||||
serde_json::Value::String(format!(
|
||||
"{}... [truncated, {} chars total]",
|
||||
&s[..2000],
|
||||
s.len()
|
||||
)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
summarized.insert(key.clone(), value.clone());
|
||||
}
|
||||
// Truncate page text
|
||||
"text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
|
||||
let s = value.as_str().unwrap_or_default();
|
||||
summarized.insert(
|
||||
key.clone(),
|
||||
serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
|
||||
);
|
||||
}
|
||||
// Trim large arrays (e.g., "elements", "links", "inputs")
|
||||
"elements" | "links" | "inputs" => {
|
||||
if let Some(arr) = value.as_array() {
|
||||
if arr.len() > 15 {
|
||||
let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
|
||||
trimmed.push(serde_json::json!(format!(
|
||||
"... and {} more",
|
||||
arr.len() - 15
|
||||
)));
|
||||
summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
summarized.insert(key.clone(), value.clone());
|
||||
}
|
||||
// Recursively summarize nested objects (e.g., "page" in get_content)
|
||||
_ if value.is_object() => {
|
||||
summarized.insert(key.clone(), summarize_tool_output(value));
|
||||
}
|
||||
// Keep everything else as-is
|
||||
_ => {
|
||||
summarized.insert(key.clone(), value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
serde_json::Value::Object(summarized)
|
||||
}
|
||||
|
||||
@@ -285,15 +285,34 @@ impl PentestOrchestrator {
|
||||
1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
|
||||
2. Run the OpenAPI parser to discover API endpoints from specs.
|
||||
3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
|
||||
4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
|
||||
5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
|
||||
6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
|
||||
7. Test rate limiting on critical endpoints (login, API).
|
||||
8. Check for console.log leakage in frontend JavaScript.
|
||||
9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
|
||||
10. When testing is complete, provide a structured summary with severity and remediation.
|
||||
11. Always explain your reasoning before invoking each tool.
|
||||
12. When done, say "Testing complete" followed by a final summary.
|
||||
4. If the target requires authentication (auto-register mode), use the browser tool to:
|
||||
a. Navigate to the target — it will redirect to the login page.
|
||||
b. Click the "Register" link to reach the registration form.
|
||||
c. Fill all form fields (username, email with plus-addressing, password, name) one by one.
|
||||
d. Click submit. If a Terms & Conditions page appears, accept it.
|
||||
e. After registration, use the browser to navigate through the application pages.
|
||||
f. **Take a screenshot after each major page** for evidence in the report.
|
||||
5. Use the browser tool to explore the authenticated application — navigate to each section,
|
||||
use get_content to understand the page structure, and take screenshots.
|
||||
6. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
|
||||
7. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
|
||||
8. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
|
||||
9. Test rate limiting on critical endpoints (login, API).
|
||||
10. Check for console.log leakage in frontend JavaScript.
|
||||
11. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
|
||||
12. When testing is complete, provide a structured summary with severity and remediation.
|
||||
13. Always explain your reasoning before invoking each tool.
|
||||
14. When done, say "Testing complete" followed by a final summary.
|
||||
|
||||
## Browser Tool Usage
|
||||
- The browser tab **persists** between calls — cookies and login state are preserved.
|
||||
- After navigate, the response includes `elements` (links, inputs, buttons on the page).
|
||||
- Use `get_content` to see forms, links, buttons, headings, and page text.
|
||||
- Use `click` with CSS selectors to interact (e.g., `a:text('Register')`, `input[type='submit']`).
|
||||
- Use `fill` with selector + value to fill form fields (e.g., `input[name='email']`).
|
||||
- **Take screenshots** (`action: screenshot`) after important actions for evidence.
|
||||
- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
|
||||
page content with the browser tool to verify if it shows real data or a login redirect.
|
||||
|
||||
## Important
|
||||
- This is an authorized penetration test. All testing is permitted within the target scope.
|
||||
|
||||
@@ -149,6 +149,23 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
|
||||
)
|
||||
};
|
||||
|
||||
// Render inline screenshot if this is a browser screenshot action
|
||||
let screenshot_html = if node.tool_name == "browser" {
|
||||
node.tool_output
|
||||
.as_ref()
|
||||
.and_then(|out| out.get("screenshot_base64"))
|
||||
.and_then(|v| v.as_str())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|b64| {
|
||||
format!(
|
||||
r#"<div class="step-screenshot"><img src="data:image/png;base64,{b64}" alt="Browser screenshot" style="max-width:100%;border:1px solid #e2e8f0;border-radius:6px;margin-top:8px;"/></div>"#
|
||||
)
|
||||
})
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
chain_html.push_str(&format!(
|
||||
r#"<div class="step-row">
|
||||
<div class="step-num">{num}</div>
|
||||
@@ -161,6 +178,7 @@ fn build_chain_html(chain: &[AttackChainNode]) -> String {
|
||||
{risk_badge}
|
||||
</div>
|
||||
{reasoning_html}
|
||||
{screenshot_html}
|
||||
</div>
|
||||
</div>"#,
|
||||
num = i + 1,
|
||||
|
||||
@@ -7,7 +7,18 @@ pub(super) fn cover(
|
||||
target_url: &str,
|
||||
requester_name: &str,
|
||||
requester_email: &str,
|
||||
app_screenshot_b64: Option<&str>,
|
||||
) -> String {
|
||||
let screenshot_html = app_screenshot_b64
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|b64| {
|
||||
format!(
|
||||
r#"<div style="margin: 20px auto; max-width: 560px; border: 1px solid #cbd5e1; border-radius: 8px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.08);">
|
||||
<img src="data:image/png;base64,{b64}" alt="Application screenshot" style="width:100%;display:block;"/>
|
||||
</div>"#
|
||||
)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
format!(
|
||||
r##"<!-- ═══════════════ COVER PAGE ═══════════════ -->
|
||||
<div class="cover">
|
||||
@@ -42,6 +53,8 @@ pub(super) fn cover(
|
||||
<strong>Prepared for:</strong> {requester_name} ({requester_email})
|
||||
</div>
|
||||
|
||||
{screenshot_html}
|
||||
|
||||
<div class="cover-footer">
|
||||
Compliance Scanner — AI-Powered Security Assessment Platform
|
||||
</div>
|
||||
|
||||
@@ -37,6 +37,50 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
|
||||
names
|
||||
};
|
||||
|
||||
// Find the best app screenshot for the cover page:
|
||||
// prefer the first navigate to the target URL that has a screenshot,
|
||||
// falling back to any navigate with a screenshot
|
||||
let app_screenshot: Option<String> = ctx
|
||||
.attack_chain
|
||||
.iter()
|
||||
.filter(|n| n.tool_name == "browser")
|
||||
.filter_map(|n| {
|
||||
n.tool_output
|
||||
.as_ref()?
|
||||
.get("screenshot_base64")?
|
||||
.as_str()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
})
|
||||
// Skip the Keycloak login page screenshots — prefer one that shows the actual app
|
||||
.find(|_| {
|
||||
ctx.attack_chain
|
||||
.iter()
|
||||
.filter(|n| n.tool_name == "browser")
|
||||
.any(|n| {
|
||||
n.tool_output
|
||||
.as_ref()
|
||||
.and_then(|o| o.get("title"))
|
||||
.and_then(|t| t.as_str())
|
||||
.is_some_and(|t| t.contains("Compliance") || t.contains("Dashboard"))
|
||||
})
|
||||
})
|
||||
.or_else(|| {
|
||||
// Fallback: any screenshot
|
||||
ctx.attack_chain
|
||||
.iter()
|
||||
.filter(|n| n.tool_name == "browser")
|
||||
.filter_map(|n| {
|
||||
n.tool_output
|
||||
.as_ref()?
|
||||
.get("screenshot_base64")?
|
||||
.as_str()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
})
|
||||
.next()
|
||||
});
|
||||
|
||||
let styles_html = styles::styles();
|
||||
let cover_html = cover::cover(
|
||||
&ctx.target_name,
|
||||
@@ -45,6 +89,7 @@ pub(super) fn build_html_report(ctx: &ReportContext) -> String {
|
||||
&ctx.target_url,
|
||||
&ctx.requester_name,
|
||||
&ctx.requester_email,
|
||||
app_screenshot.as_deref(),
|
||||
);
|
||||
let exec_html = executive_summary::executive_summary(
|
||||
&ctx.findings,
|
||||
|
||||
@@ -27,6 +27,14 @@ pub struct AgentConfig {
|
||||
pub ssh_key_path: String,
|
||||
pub keycloak_url: Option<String>,
|
||||
pub keycloak_realm: Option<String>,
|
||||
pub keycloak_admin_username: Option<String>,
|
||||
pub keycloak_admin_password: Option<SecretString>,
|
||||
// Pentest defaults
|
||||
pub pentest_verification_email: Option<String>,
|
||||
pub pentest_imap_host: Option<String>,
|
||||
pub pentest_imap_port: Option<u16>,
|
||||
pub pentest_imap_username: Option<String>,
|
||||
pub pentest_imap_password: Option<SecretString>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
|
||||
@@ -28,9 +28,10 @@ pub use graph::{
|
||||
pub use issue::{IssueStatus, TrackerIssue, TrackerType};
|
||||
pub use mcp::{McpServerConfig, McpServerStatus, McpTransport};
|
||||
pub use pentest::{
|
||||
AttackChainNode, AttackNodeStatus, AuthMode, CodeContextHint, Environment, PentestAuthConfig,
|
||||
PentestConfig, PentestEvent, PentestMessage, PentestSession, PentestStats, PentestStatus,
|
||||
PentestStrategy, SeverityDistribution, TesterInfo, ToolCallRecord,
|
||||
AttackChainNode, AttackNodeStatus, AuthMode, CodeContextHint, Environment, IdentityProvider,
|
||||
PentestAuthConfig, PentestConfig, PentestEvent, PentestMessage, PentestSession, PentestStats,
|
||||
PentestStatus, PentestStrategy, SeverityDistribution, TestUserRecord, TesterInfo,
|
||||
ToolCallRecord,
|
||||
};
|
||||
pub use repository::{ScanTrigger, TrackedRepository};
|
||||
pub use sbom::{SbomEntry, VulnRef};
|
||||
|
||||
@@ -150,6 +150,34 @@ pub struct PentestConfig {
|
||||
pub skip_mode: bool,
|
||||
}
|
||||
|
||||
/// Identity provider type for cleanup routing
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum IdentityProvider {
|
||||
Keycloak,
|
||||
Auth0,
|
||||
Okta,
|
||||
Firebase,
|
||||
Custom,
|
||||
}
|
||||
|
||||
/// Details of a test user created during a pentest session.
|
||||
/// Stored so the cleanup step knows exactly what to delete and where.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct TestUserRecord {
|
||||
/// Username used to register
|
||||
pub username: Option<String>,
|
||||
/// Email used to register
|
||||
pub email: Option<String>,
|
||||
/// User ID returned by the identity provider (if known)
|
||||
pub provider_user_id: Option<String>,
|
||||
/// Which identity provider holds this user
|
||||
pub provider: Option<IdentityProvider>,
|
||||
/// Whether cleanup has been completed
|
||||
#[serde(default)]
|
||||
pub cleaned_up: bool,
|
||||
}
|
||||
|
||||
/// A pentest session initiated via the chat interface
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PentestSession {
|
||||
@@ -163,6 +191,9 @@ pub struct PentestSession {
|
||||
/// Wizard configuration (None for legacy sessions)
|
||||
pub config: Option<PentestConfig>,
|
||||
pub created_by: Option<String>,
|
||||
/// Test user created during auto-register (for cleanup)
|
||||
#[serde(default)]
|
||||
pub test_user: Option<TestUserRecord>,
|
||||
/// Total number of tool invocations in this session
|
||||
pub tool_invocations: u32,
|
||||
/// Total successful tool invocations
|
||||
@@ -187,6 +218,7 @@ impl PentestSession {
|
||||
strategy,
|
||||
config: None,
|
||||
created_by: None,
|
||||
test_user: None,
|
||||
tool_invocations: 0,
|
||||
tool_successes: 0,
|
||||
findings_count: 0,
|
||||
|
||||
@@ -118,9 +118,12 @@ pub(crate) fn cat_label(cat: &str) -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase name heuristic based on depth
|
||||
pub(crate) fn phase_name(depth: usize) -> &'static str {
|
||||
match depth {
|
||||
/// Maximum number of display phases — deeper iterations are merged into the last.
|
||||
const MAX_PHASES: usize = 8;
|
||||
|
||||
/// Phase name heuristic based on phase index (not raw BFS depth)
|
||||
pub(crate) fn phase_name(phase_idx: usize) -> &'static str {
|
||||
match phase_idx {
|
||||
0 => "Reconnaissance",
|
||||
1 => "Analysis",
|
||||
2 => "Boundary Testing",
|
||||
@@ -133,8 +136,8 @@ pub(crate) fn phase_name(depth: usize) -> &'static str {
|
||||
}
|
||||
|
||||
/// Short label for phase rail
|
||||
pub(crate) fn phase_short_name(depth: usize) -> &'static str {
|
||||
match depth {
|
||||
pub(crate) fn phase_short_name(phase_idx: usize) -> &'static str {
|
||||
match phase_idx {
|
||||
0 => "Recon",
|
||||
1 => "Analysis",
|
||||
2 => "Boundary",
|
||||
@@ -214,7 +217,14 @@ pub(crate) fn compute_phases(steps: &[serde_json::Value]) -> Vec<Vec<usize>> {
|
||||
}
|
||||
}
|
||||
|
||||
// Group by depth
|
||||
// Cap depths at MAX_PHASES - 1 so deeper iterations merge into the last phase
|
||||
for d in depths.iter_mut() {
|
||||
if *d >= MAX_PHASES {
|
||||
*d = MAX_PHASES - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Group by (capped) depth
|
||||
let max_depth = depths.iter().copied().max().unwrap_or(0);
|
||||
let mut phases: Vec<Vec<usize>> = Vec::new();
|
||||
for d in 0..=max_depth {
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use base64::Engine;
|
||||
@@ -6,17 +8,26 @@ use compliance_core::error::CoreError;
|
||||
use compliance_core::traits::pentest_tool::{PentestTool, PentestToolContext, PentestToolResult};
|
||||
use futures_util::{SinkExt, StreamExt};
|
||||
use serde_json::json;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_tungstenite::tungstenite::Message;
|
||||
use tracing::info;
|
||||
|
||||
type WsStream =
|
||||
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
|
||||
|
||||
/// Global pool of persistent browser sessions keyed by pentest session ID.
|
||||
/// Each pentest session gets one Chrome tab that stays alive across tool calls.
|
||||
static BROWSER_SESSIONS: std::sync::LazyLock<Arc<Mutex<HashMap<String, BrowserSession>>>> =
|
||||
std::sync::LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
|
||||
|
||||
/// A browser automation tool that exposes headless Chrome actions to the LLM
|
||||
/// via the Chrome DevTools Protocol. Reuses the same `CHROME_WS_URL` used for
|
||||
/// PDF generation.
|
||||
/// via the Chrome DevTools Protocol.
|
||||
///
|
||||
/// Supported actions: navigate, screenshot, click, fill, get_content, evaluate.
|
||||
/// **Session-persistent**: the same Chrome tab is reused across all invocations
|
||||
/// within a pentest session, so cookies, auth state, and page context are
|
||||
/// preserved between navigate → click → fill → screenshot calls.
|
||||
///
|
||||
/// Supported actions: navigate, screenshot, click, fill, get_content, evaluate, close.
|
||||
pub struct BrowserTool;
|
||||
|
||||
impl Default for BrowserTool {
|
||||
@@ -31,11 +42,13 @@ impl PentestTool for BrowserTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Headless browser automation via Chrome DevTools Protocol. \
|
||||
Supports navigating to URLs, taking screenshots, clicking elements, \
|
||||
"Headless browser automation via Chrome DevTools Protocol. The browser tab persists \
|
||||
across calls within the same pentest session — cookies, login state, and page context \
|
||||
are preserved. Supports navigating to URLs, taking screenshots, clicking elements, \
|
||||
filling form fields, reading page content, and evaluating JavaScript. \
|
||||
Use CSS selectors to target elements. Useful for discovering registration pages, \
|
||||
filling out forms, extracting verification links, and visual inspection."
|
||||
Use CSS selectors to target elements. After navigating, use get_content to read the \
|
||||
page HTML and find elements to click or fill. Use this to discover registration pages, \
|
||||
fill out signup forms, complete email verification, and test authenticated flows."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> serde_json::Value {
|
||||
@@ -44,8 +57,8 @@ impl PentestTool for BrowserTool {
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"enum": ["navigate", "screenshot", "click", "fill", "get_content", "evaluate"],
|
||||
"description": "Action to perform"
|
||||
"enum": ["navigate", "screenshot", "click", "fill", "get_content", "evaluate", "close"],
|
||||
"description": "Action to perform. The browser tab persists between calls — use navigate first, then get_content to see the page, then click/fill to interact."
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
@@ -53,7 +66,7 @@ impl PentestTool for BrowserTool {
|
||||
},
|
||||
"selector": {
|
||||
"type": "string",
|
||||
"description": "CSS selector for click/fill actions"
|
||||
"description": "CSS selector for click/fill actions (e.g. '#username', 'a[href*=register]', 'button[type=submit]')"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
@@ -61,7 +74,7 @@ impl PentestTool for BrowserTool {
|
||||
},
|
||||
"wait_ms": {
|
||||
"type": "integer",
|
||||
"description": "Milliseconds to wait after action (default: 500)"
|
||||
"description": "Milliseconds to wait after action (default: 1000)"
|
||||
}
|
||||
},
|
||||
"required": ["action"]
|
||||
@@ -71,7 +84,7 @@ impl PentestTool for BrowserTool {
|
||||
fn execute<'a>(
|
||||
&'a self,
|
||||
input: serde_json::Value,
|
||||
_context: &'a PentestToolContext,
|
||||
context: &'a PentestToolContext,
|
||||
) -> Pin<Box<dyn std::future::Future<Output = Result<PentestToolResult, CoreError>> + Send + 'a>>
|
||||
{
|
||||
Box::pin(async move {
|
||||
@@ -79,11 +92,42 @@ impl PentestTool for BrowserTool {
|
||||
let url = input.get("url").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let selector = input.get("selector").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let value = input.get("value").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let wait_ms = input.get("wait_ms").and_then(|v| v.as_u64()).unwrap_or(500);
|
||||
let wait_ms = input
|
||||
.get("wait_ms")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(1000);
|
||||
let session_key = context.session_id.clone();
|
||||
|
||||
let mut session = BrowserSession::connect()
|
||||
.await
|
||||
.map_err(|e| CoreError::Other(format!("Browser connect failed: {e}")))?;
|
||||
// Handle close action — tear down the persistent session
|
||||
if action == "close" {
|
||||
let mut pool = BROWSER_SESSIONS.lock().await;
|
||||
if let Some(mut sess) = pool.remove(&session_key) {
|
||||
let _ = sess.close().await;
|
||||
}
|
||||
return Ok(PentestToolResult {
|
||||
summary: "Browser session closed".to_string(),
|
||||
findings: Vec::new(),
|
||||
data: json!({ "closed": true }),
|
||||
});
|
||||
}
|
||||
|
||||
// Get or create persistent session for this pentest
|
||||
let mut pool = BROWSER_SESSIONS.lock().await;
|
||||
if !pool.contains_key(&session_key) {
|
||||
match BrowserSession::connect().await {
|
||||
Ok(sess) => {
|
||||
pool.insert(session_key.clone(), sess);
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(CoreError::Other(format!("Browser connect failed: {e}")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let session = pool.get_mut(&session_key);
|
||||
let Some(session) = session else {
|
||||
return Err(CoreError::Other("Browser session not found".to_string()));
|
||||
};
|
||||
|
||||
let result = match action {
|
||||
"navigate" => session.navigate(url, wait_ms).await,
|
||||
@@ -95,8 +139,15 @@ impl PentestTool for BrowserTool {
|
||||
_ => Err(format!("Unknown browser action: {action}")),
|
||||
};
|
||||
|
||||
// Always try to clean up
|
||||
let _ = session.close().await;
|
||||
// If the session errored, remove it so the next call creates a fresh one
|
||||
if result.is_err() {
|
||||
if let Some(mut dead) = pool.remove(&session_key) {
|
||||
let _ = dead.close().await;
|
||||
}
|
||||
}
|
||||
|
||||
// Release the lock before building the response
|
||||
drop(pool);
|
||||
|
||||
match result {
|
||||
Ok(data) => {
|
||||
@@ -214,7 +265,7 @@ impl BrowserSession {
|
||||
}
|
||||
|
||||
async fn navigate(&mut self, url: &str, wait_ms: u64) -> Result<serde_json::Value, String> {
|
||||
let resp = cdp_send_session(
|
||||
cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
@@ -226,19 +277,44 @@ impl BrowserSession {
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
|
||||
|
||||
// Get page title
|
||||
let title_resp = self.evaluate_raw("document.title").await?;
|
||||
let page_url_resp = self.evaluate_raw("window.location.href").await?;
|
||||
// Get page title and current URL (may have redirected)
|
||||
let title = self
|
||||
.evaluate_raw("document.title")
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let page_url = self
|
||||
.evaluate_raw("window.location.href")
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
// Auto-get a summary of interactive elements on the page
|
||||
let links_js = r#"(function(){
|
||||
var items = [];
|
||||
document.querySelectorAll('a[href]').forEach(function(a, i) {
|
||||
if (i < 20) items.push({tag:'a', text:a.textContent.trim().substring(0,60), href:a.href});
|
||||
});
|
||||
document.querySelectorAll('input,select,textarea,button[type=submit]').forEach(function(el, i) {
|
||||
if (i < 20) items.push({tag:el.tagName.toLowerCase(), type:el.type||'', name:el.name||'', id:el.id||'', placeholder:el.placeholder||''});
|
||||
});
|
||||
return JSON.stringify(items);
|
||||
})()"#;
|
||||
let elements_json = self.evaluate_raw(links_js).await.unwrap_or_default();
|
||||
let elements: serde_json::Value = serde_json::from_str(&elements_json).unwrap_or(json!([]));
|
||||
|
||||
// Auto-capture screenshot after every navigation
|
||||
let screenshot_b64 = self.capture_screenshot_b64().await.unwrap_or_default();
|
||||
|
||||
Ok(json!({
|
||||
"navigated": true,
|
||||
"url": page_url_resp,
|
||||
"title": title_resp,
|
||||
"frame_id": resp.get("result").and_then(|r| r.get("frameId")),
|
||||
"url": page_url,
|
||||
"title": title,
|
||||
"elements": elements,
|
||||
"screenshot_base64": screenshot_b64,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn screenshot(&mut self) -> Result<serde_json::Value, String> {
|
||||
/// Capture a screenshot and return the base64 string (empty on failure).
|
||||
async fn capture_screenshot_b64(&mut self) -> Result<String, String> {
|
||||
let resp = cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
@@ -249,14 +325,19 @@ impl BrowserSession {
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
|
||||
let b64 = resp
|
||||
Ok(resp
|
||||
.get("result")
|
||||
.and_then(|r| r.get("data"))
|
||||
.and_then(|d| d.as_str())
|
||||
.unwrap_or("");
|
||||
.unwrap_or("")
|
||||
.to_string())
|
||||
}
|
||||
|
||||
async fn screenshot(&mut self) -> Result<serde_json::Value, String> {
|
||||
let b64 = self.capture_screenshot_b64().await?;
|
||||
|
||||
let size_kb = base64::engine::general_purpose::STANDARD
|
||||
.decode(b64)
|
||||
.decode(&b64)
|
||||
.map(|b| b.len() / 1024)
|
||||
.unwrap_or(0);
|
||||
|
||||
@@ -267,7 +348,6 @@ impl BrowserSession {
|
||||
}
|
||||
|
||||
async fn click(&mut self, selector: &str, wait_ms: u64) -> Result<serde_json::Value, String> {
|
||||
// Use JS to find element and get its bounding box, then click
|
||||
let js = format!(
|
||||
r#"(function() {{
|
||||
var el = document.querySelector({sel});
|
||||
@@ -289,9 +369,29 @@ impl BrowserSession {
|
||||
let result = self.evaluate_raw(&js).await?;
|
||||
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
|
||||
|
||||
serde_json::from_str::<serde_json::Value>(&result)
|
||||
.unwrap_or_else(|_| json!({ "result": result }));
|
||||
Ok(serde_json::from_str(&result).unwrap_or(json!({ "result": result })))
|
||||
// After click, get current URL (may have navigated)
|
||||
let current_url = self
|
||||
.evaluate_raw("window.location.href")
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let title = self
|
||||
.evaluate_raw("document.title")
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
// Auto-capture screenshot after click
|
||||
let screenshot_b64 = self.capture_screenshot_b64().await.unwrap_or_default();
|
||||
|
||||
let mut click_result: serde_json::Value =
|
||||
serde_json::from_str(&result).unwrap_or(json!({ "result": result }));
|
||||
if let Some(obj) = click_result.as_object_mut() {
|
||||
obj.insert("current_url".to_string(), json!(current_url));
|
||||
obj.insert("page_title".to_string(), json!(title));
|
||||
if !screenshot_b64.is_empty() {
|
||||
obj.insert("screenshot_base64".to_string(), json!(screenshot_b64));
|
||||
}
|
||||
}
|
||||
Ok(click_result)
|
||||
}
|
||||
|
||||
async fn fill(
|
||||
@@ -300,62 +400,83 @@ impl BrowserSession {
|
||||
value: &str,
|
||||
wait_ms: u64,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let js = format!(
|
||||
r#"(function() {{
|
||||
var el = document.querySelector({sel});
|
||||
if (!el) return JSON.stringify({{error: "Element not found: {raw}"}});
|
||||
el.focus();
|
||||
el.value = {val};
|
||||
el.dispatchEvent(new Event('input', {{bubbles: true}}));
|
||||
el.dispatchEvent(new Event('change', {{bubbles: true}}));
|
||||
return JSON.stringify({{filled: true, tag: el.tagName, selector: {sel}}});
|
||||
}})()"#,
|
||||
// Step 1: Focus the element via JS
|
||||
let focus_js = format!(
|
||||
"(function(){{var e=document.querySelector({sel});\
|
||||
if(!e)return 'notfound';e.focus();e.select();return 'ok'}})()",
|
||||
sel = serde_json::to_string(selector).unwrap_or_default(),
|
||||
raw = selector.replace('"', r#"\""#),
|
||||
val = serde_json::to_string(value).unwrap_or_default(),
|
||||
);
|
||||
let found = self.evaluate_raw(&focus_js).await?;
|
||||
if found == "notfound" {
|
||||
return Ok(json!({ "error": format!("Element not found: {selector}") }));
|
||||
}
|
||||
|
||||
// Step 2: Clear existing content with Select All + Delete
|
||||
cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
json!({"type": "keyDown", "key": "a", "code": "KeyA", "modifiers": 2}),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
json!({"type": "keyUp", "key": "a", "code": "KeyA", "modifiers": 2}),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
json!({"type": "keyDown", "key": "Backspace", "code": "Backspace"}),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"Input.dispatchKeyEvent",
|
||||
json!({"type": "keyUp", "key": "Backspace", "code": "Backspace"}),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
|
||||
// Step 3: Insert the text using Input.insertText (single CDP command, no JS eval)
|
||||
cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"Input.insertText",
|
||||
json!({"text": value}),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
|
||||
// Step 4: Verify the value was set
|
||||
let verify_js = format!(
|
||||
"(function(){{var e=document.querySelector({sel});return e?e.value:''}})()",
|
||||
sel = serde_json::to_string(selector).unwrap_or_default(),
|
||||
);
|
||||
let final_value = self.evaluate_raw(&verify_js).await.unwrap_or_default();
|
||||
|
||||
let result = self.evaluate_raw(&js).await?;
|
||||
tokio::time::sleep(Duration::from_millis(wait_ms)).await;
|
||||
|
||||
Ok(serde_json::from_str(&result).unwrap_or(json!({ "result": result })))
|
||||
Ok(json!({
|
||||
"filled": true,
|
||||
"selector": selector,
|
||||
"value": final_value,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn get_content(&mut self) -> Result<serde_json::Value, String> {
|
||||
let resp = cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"DOM.getDocument",
|
||||
json!({ "depth": 0 }),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
|
||||
let root_id = resp
|
||||
.get("result")
|
||||
.and_then(|r| r.get("root"))
|
||||
.and_then(|n| n.get("nodeId"))
|
||||
.and_then(|n| n.as_i64())
|
||||
.unwrap_or(1);
|
||||
|
||||
let html_resp = cdp_send_session(
|
||||
&mut self.ws,
|
||||
self.next_id,
|
||||
&self.session_id,
|
||||
"DOM.getOuterHTML",
|
||||
json!({ "nodeId": root_id }),
|
||||
)
|
||||
.await?;
|
||||
self.next_id += 1;
|
||||
|
||||
let html = html_resp
|
||||
.get("result")
|
||||
.and_then(|r| r.get("outerHTML"))
|
||||
.and_then(|h| h.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
// Also get page title and URL for context
|
||||
let title = self
|
||||
.evaluate_raw("document.title")
|
||||
.await
|
||||
@@ -365,22 +486,55 @@ impl BrowserSession {
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
// Truncate HTML to avoid massive payloads to the LLM
|
||||
let truncated = if html.len() > 50_000 {
|
||||
format!(
|
||||
"{}... [truncated, {} total chars]",
|
||||
&html[..50_000],
|
||||
html.len()
|
||||
)
|
||||
} else {
|
||||
html.to_string()
|
||||
};
|
||||
// Get a structured summary instead of raw HTML (more useful for LLM)
|
||||
let summary_js = r#"(function(){
|
||||
var result = {forms:[], links:[], inputs:[], buttons:[], headings:[], text:''};
|
||||
|
||||
// Forms
|
||||
document.querySelectorAll('form').forEach(function(f,i){
|
||||
if(i<10) result.forms.push({action:f.action, method:f.method, id:f.id});
|
||||
});
|
||||
|
||||
// Links
|
||||
document.querySelectorAll('a[href]').forEach(function(a,i){
|
||||
if(i<30) result.links.push({text:a.textContent.trim().substring(0,80), href:a.href});
|
||||
});
|
||||
|
||||
// Inputs
|
||||
document.querySelectorAll('input,select,textarea').forEach(function(el,i){
|
||||
if(i<30) result.inputs.push({
|
||||
tag:el.tagName.toLowerCase(),
|
||||
type:el.type||'',
|
||||
name:el.name||'',
|
||||
id:el.id||'',
|
||||
placeholder:el.placeholder||'',
|
||||
value:el.type==='password'?'***':el.value.substring(0,50)
|
||||
});
|
||||
});
|
||||
|
||||
// Buttons
|
||||
document.querySelectorAll('button,[type=submit],[role=button]').forEach(function(b,i){
|
||||
if(i<20) result.buttons.push({text:b.textContent.trim().substring(0,60), type:b.type||'', id:b.id||''});
|
||||
});
|
||||
|
||||
// Headings
|
||||
document.querySelectorAll('h1,h2,h3').forEach(function(h,i){
|
||||
if(i<10) result.headings.push(h.textContent.trim().substring(0,100));
|
||||
});
|
||||
|
||||
// Page text (truncated)
|
||||
result.text = document.body ? document.body.innerText.substring(0, 3000) : '';
|
||||
|
||||
return JSON.stringify(result);
|
||||
})()"#;
|
||||
|
||||
let summary = self.evaluate_raw(summary_js).await.unwrap_or_default();
|
||||
let page_data: serde_json::Value = serde_json::from_str(&summary).unwrap_or(json!({}));
|
||||
|
||||
Ok(json!({
|
||||
"url": url,
|
||||
"title": title,
|
||||
"html": truncated,
|
||||
"html_length": html.len(),
|
||||
"page": page_data,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -431,7 +585,15 @@ impl BrowserSession {
|
||||
}
|
||||
}
|
||||
|
||||
// ── CDP helpers (same pattern as compliance-agent/src/pentest/report/pdf.rs) ──
|
||||
/// Clean up the browser session for a pentest session (call when session ends).
|
||||
pub async fn cleanup_browser_session(session_id: &str) {
|
||||
let mut pool = BROWSER_SESSIONS.lock().await;
|
||||
if let Some(mut sess) = pool.remove(session_id) {
|
||||
let _ = sess.close().await;
|
||||
}
|
||||
}
|
||||
|
||||
// ── CDP helpers ──
|
||||
|
||||
async fn cdp_send(
|
||||
ws: &mut WsStream,
|
||||
|
||||
63
deploy/docker-compose.mailserver.yml
Normal file
63
deploy/docker-compose.mailserver.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
mailserver:
|
||||
image: ghcr.io/docker-mailserver/docker-mailserver:14
|
||||
hostname: mail.scanner.meghsakha.com
|
||||
domainname: scanner.meghsakha.com
|
||||
container_name: mailserver
|
||||
ports:
|
||||
- "25:25" # SMTP (inbound mail)
|
||||
- "143:143" # IMAP (orchestrator reads mail)
|
||||
- "993:993" # IMAPS (TLS)
|
||||
- "587:587" # Submission (outbound, if needed)
|
||||
volumes:
|
||||
- maildata:/var/mail
|
||||
- mailstate:/var/mail-state
|
||||
- maillogs:/var/log/mail
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
# Hostname
|
||||
- OVERRIDE_HOSTNAME=mail.scanner.meghsakha.com
|
||||
|
||||
# Disable features we don't need
|
||||
- ENABLE_SPAMASSASSIN=0
|
||||
- ENABLE_CLAMAV=0
|
||||
- ENABLE_FAIL2BAN=0
|
||||
- ENABLE_POSTGREY=0
|
||||
- ENABLE_AMAVIS=0
|
||||
|
||||
# Enable what we need
|
||||
- ENABLE_IMAP=1
|
||||
- ENABLE_POP3=0
|
||||
|
||||
# Plus-addressing (critical for pentest)
|
||||
- POSTFIX_RECIPIENT_DELIMITER=+
|
||||
|
||||
# SSL (start with no TLS, add Let's Encrypt later)
|
||||
- SSL_TYPE=
|
||||
|
||||
# Accept mail for our domain
|
||||
- PERMIT_DOCKER=none
|
||||
|
||||
# Disable inbound SPF checking — we need to accept verification
|
||||
# emails from Keycloak and other external senders
|
||||
- ENABLE_OPENDKIM=0
|
||||
- ENABLE_OPENDMARC=0
|
||||
- ENABLE_POLICYD_SPF=0
|
||||
- SPOOF_PROTECTION=0
|
||||
|
||||
# One domain
|
||||
- POSTFIX_MYDESTINATION=scanner.meghsakha.com, localhost
|
||||
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "ss", "-tlnp", "|", "grep", "25"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
volumes:
|
||||
maildata:
|
||||
mailstate:
|
||||
maillogs:
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
image: mongo:7
|
||||
ports:
|
||||
- "27017:27017"
|
||||
environment:
|
||||
|
||||
Reference in New Issue
Block a user