feat: pentest onboarding — streaming, browser automation, reports, user cleanup (#16)
All checks were successful
CI / Check (push) Has been skipped
CI / Detect Changes (push) Successful in 7s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Successful in 2s
CI / Deploy MCP (push) Successful in 2s

Complete pentest feature overhaul: SSE streaming, session-persistent browser tool (CDP), AES-256 credential encryption, auto-screenshots in reports, code-level remediation correlation, SAST triage chunking, context window optimization, test user cleanup (Keycloak/Auth0/Okta), wizard dropdowns, attack chain improvements, architecture docs with Mermaid diagrams.

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: #16
This commit was merged in pull request #16.
This commit is contained in:
2026-03-17 20:32:20 +00:00
parent 11e1c5f438
commit c461faa2fb
57 changed files with 8844 additions and 2423 deletions

View File

@@ -1,17 +1,30 @@
use std::sync::Arc;
use dashmap::DashMap;
use tokio::sync::{broadcast, watch, Semaphore};
use compliance_core::models::pentest::PentestEvent;
use compliance_core::AgentConfig;
use crate::database::Database;
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::PipelineOrchestrator;
/// Default maximum concurrent pentest sessions.
const DEFAULT_MAX_CONCURRENT_SESSIONS: usize = 5;
#[derive(Clone)]
pub struct ComplianceAgent {
pub config: AgentConfig,
pub db: Database,
pub llm: Arc<LlmClient>,
pub http: reqwest::Client,
/// Per-session broadcast senders for SSE streaming.
pub session_streams: Arc<DashMap<String, broadcast::Sender<PentestEvent>>>,
/// Per-session pause controls (true = paused).
pub session_pause: Arc<DashMap<String, watch::Sender<bool>>>,
/// Semaphore limiting concurrent pentest sessions.
pub session_semaphore: Arc<Semaphore>,
}
impl ComplianceAgent {
@@ -27,6 +40,9 @@ impl ComplianceAgent {
db,
llm,
http: reqwest::Client::new(),
session_streams: Arc::new(DashMap::new()),
session_pause: Arc::new(DashMap::new()),
session_semaphore: Arc::new(Semaphore::new(DEFAULT_MAX_CONCURRENT_SESSIONS)),
}
}
@@ -74,4 +90,54 @@ impl ComplianceAgent {
.run_pr_review(&repo, repo_id, pr_number, base_sha, head_sha)
.await
}
// ── Session stream management ──────────────────────────────────
/// Register a broadcast sender for a session. Returns the sender.
pub fn register_session_stream(&self, session_id: &str) -> broadcast::Sender<PentestEvent> {
let (tx, _) = broadcast::channel(256);
self.session_streams
.insert(session_id.to_string(), tx.clone());
tx
}
/// Subscribe to a session's broadcast stream.
pub fn subscribe_session(&self, session_id: &str) -> Option<broadcast::Receiver<PentestEvent>> {
self.session_streams
.get(session_id)
.map(|tx| tx.subscribe())
}
// ── Session pause/resume management ────────────────────────────
/// Register a pause control for a session. Returns the watch receiver.
pub fn register_pause_control(&self, session_id: &str) -> watch::Receiver<bool> {
let (tx, rx) = watch::channel(false);
self.session_pause.insert(session_id.to_string(), tx);
rx
}
/// Pause a session.
pub fn pause_session(&self, session_id: &str) -> bool {
if let Some(tx) = self.session_pause.get(session_id) {
tx.send(true).is_ok()
} else {
false
}
}
/// Resume a session.
pub fn resume_session(&self, session_id: &str) -> bool {
if let Some(tx) = self.session_pause.get(session_id) {
tx.send(false).is_ok()
} else {
false
}
}
/// Clean up all per-session resources.
pub fn cleanup_session(&self, session_id: &str) {
self.session_streams.remove(session_id);
self.session_pause.remove(session_id);
}
}

View File

@@ -7,8 +7,12 @@ use axum::Json;
use mongodb::bson::doc;
use serde::Deserialize;
use futures_util::StreamExt;
use compliance_core::models::dast::DastFinding;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::*;
use compliance_core::models::sbom::SbomEntry;
use crate::agent::ComplianceAgent;
@@ -103,6 +107,97 @@ pub async fn export_session_report(
Err(_) => Vec::new(),
};
// Fetch SAST findings, SBOM, and code context for the linked repository
let repo_id = session
.repo_id
.clone()
.or_else(|| target.as_ref().and_then(|t| t.repo_id.clone()));
let (sast_findings, sbom_entries, code_context) = if let Some(ref rid) = repo_id {
let sast: Vec<Finding> = match agent
.db
.findings()
.find(doc! {
"repo_id": rid,
"status": { "$in": ["open", "triaged"] },
})
.sort(doc! { "severity": -1 })
.limit(100)
.await
{
Ok(mut cursor) => {
let mut results = Vec::new();
while let Some(Ok(f)) = cursor.next().await {
results.push(f);
}
results
}
Err(_) => Vec::new(),
};
let sbom: Vec<SbomEntry> = match agent
.db
.sbom_entries()
.find(doc! {
"repo_id": rid,
"known_vulnerabilities": { "$exists": true, "$ne": [] },
})
.limit(50)
.await
{
Ok(mut cursor) => {
let mut results = Vec::new();
while let Some(Ok(e)) = cursor.next().await {
results.push(e);
}
results
}
Err(_) => Vec::new(),
};
// Build code context from graph nodes
let code_ctx: Vec<CodeContextHint> = match agent
.db
.graph_nodes()
.find(doc! { "repo_id": rid, "is_entry_point": true })
.limit(50)
.await
{
Ok(mut cursor) => {
let mut nodes_vec = Vec::new();
while let Some(Ok(n)) = cursor.next().await {
let linked_vulns: Vec<String> = sast
.iter()
.filter(|f| f.file_path.as_deref() == Some(&n.file_path))
.map(|f| {
format!(
"[{}] {}: {} (line {})",
f.severity,
f.scanner,
f.title,
f.line_number.unwrap_or(0)
)
})
.collect();
nodes_vec.push(CodeContextHint {
endpoint_pattern: n.qualified_name.clone(),
handler_function: n.name.clone(),
file_path: n.file_path.clone(),
code_snippet: String::new(),
known_vulnerabilities: linked_vulns,
});
}
nodes_vec
}
Err(_) => Vec::new(),
};
(sast, sbom, code_ctx)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let config = session.config.clone();
let ctx = crate::pentest::report::ReportContext {
session,
target_name,
@@ -115,6 +210,10 @@ pub async fn export_session_report(
body.requester_name
},
requester_email: body.requester_email,
config,
sast_findings,
sbom_entries,
code_context,
};
let report = crate::pentest::generate_encrypted_report(&ctx, &body.password)

View File

@@ -17,10 +17,12 @@ type AgentExt = Extension<Arc<ComplianceAgent>>;
#[derive(Deserialize)]
pub struct CreateSessionRequest {
pub target_id: String,
pub target_id: Option<String>,
#[serde(default = "default_strategy")]
pub strategy: String,
pub message: Option<String>,
/// Wizard configuration — if present, takes precedence over legacy fields
pub config: Option<PentestConfig>,
}
fn default_strategy() -> String {
@@ -32,83 +34,365 @@ pub struct SendMessageRequest {
pub message: String,
}
#[derive(Deserialize)]
pub struct LookupRepoQuery {
pub url: String,
}
/// POST /api/v1/pentest/sessions — Create a new pentest session and start the orchestrator
#[tracing::instrument(skip_all)]
pub async fn create_session(
Extension(agent): AgentExt,
Json(req): Json<CreateSessionRequest>,
) -> Result<Json<ApiResponse<PentestSession>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&req.target_id).map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid target_id format".to_string(),
)
})?;
// Look up the target
let target = agent
.db
.dast_targets()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
// Try to acquire a concurrency permit
let permit = agent
.session_semaphore
.clone()
.try_acquire_owned()
.map_err(|_| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
StatusCode::TOO_MANY_REQUESTS,
"Maximum concurrent pentest sessions reached. Try again later.".to_string(),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Target not found".to_string()))?;
})?;
// Parse strategy
let strategy = match req.strategy.as_str() {
if let Some(ref config) = req.config {
// ── Wizard path ──────────────────────────────────────────────
if !config.disclaimer_accepted {
return Err((
StatusCode::BAD_REQUEST,
"Disclaimer must be accepted".to_string(),
));
}
// Look up or auto-create DastTarget by app_url
let target = match agent
.db
.dast_targets()
.find_one(doc! { "base_url": &config.app_url })
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("DB error: {e}")))?
{
Some(t) => t,
None => {
use compliance_core::models::dast::{DastTarget, DastTargetType};
let mut t = DastTarget::new(
config.app_url.clone(),
config.app_url.clone(),
DastTargetType::WebApp,
);
if let Some(rl) = config.rate_limit {
t.rate_limit = rl;
}
t.allow_destructive = config.allow_destructive;
t.excluded_paths = config.scope_exclusions.clone();
let res = agent.db.dast_targets().insert_one(&t).await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create target: {e}"),
)
})?;
t.id = res.inserted_id.as_object_id();
t
}
};
let target_id = target.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Parse strategy from config or request
let strat_str = config.strategy.as_deref().unwrap_or(req.strategy.as_str());
let strategy = parse_strategy(strat_str);
let mut session = PentestSession::new(target_id, strategy);
session.config = Some(config.clone());
session.repo_id = target.repo_id.clone();
// Resolve repo_id from git_repo_url if provided
if let Some(ref git_url) = config.git_repo_url {
if let Ok(Some(repo)) = agent
.db
.repositories()
.find_one(doc! { "git_url": git_url })
.await
{
session.repo_id = repo.id.map(|oid| oid.to_hex());
}
}
let insert_result = agent
.db
.pentest_sessions()
.insert_one(&session)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create session: {e}"),
)
})?;
session.id = insert_result.inserted_id.as_object_id();
let session_id_str = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Register broadcast stream and pause control
let event_tx = agent.register_session_stream(&session_id_str);
let pause_rx = agent.register_pause_control(&session_id_str);
// Merge server-default IMAP/email settings where wizard left blanks
if let Some(ref mut cfg) = session.config {
if cfg.auth.mode == AuthMode::AutoRegister {
if cfg.auth.verification_email.is_none() {
cfg.auth.verification_email = agent.config.pentest_verification_email.clone();
}
if cfg.auth.imap_host.is_none() {
cfg.auth.imap_host = agent.config.pentest_imap_host.clone();
}
if cfg.auth.imap_port.is_none() {
cfg.auth.imap_port = agent.config.pentest_imap_port;
}
if cfg.auth.imap_username.is_none() {
cfg.auth.imap_username = agent.config.pentest_imap_username.clone();
}
if cfg.auth.imap_password.is_none() {
cfg.auth.imap_password = agent.config.pentest_imap_password.as_ref().map(|s| {
use secrecy::ExposeSecret;
s.expose_secret().to_string()
});
}
}
}
// Pre-populate test user record for auto-register sessions
if let Some(ref cfg) = session.config {
if cfg.auth.mode == AuthMode::AutoRegister {
let verification_email = cfg.auth.verification_email.clone();
// Build plus-addressed email for this session
let test_email = verification_email.as_deref().map(|email| {
let parts: Vec<&str> = email.splitn(2, '@').collect();
if parts.len() == 2 {
format!("{}+{}@{}", parts[0], session_id_str, parts[1])
} else {
email.to_string()
}
});
// Detect identity provider from keycloak config
let provider = if agent.config.keycloak_url.is_some() {
Some(compliance_core::models::pentest::IdentityProvider::Keycloak)
} else {
None
};
session.test_user = Some(compliance_core::models::pentest::TestUserRecord {
username: None, // LLM will choose; updated after registration
email: test_email,
provider_user_id: None,
provider,
cleaned_up: false,
});
}
}
// Encrypt credentials before they linger in memory
let mut session_for_task = session.clone();
if let Some(ref mut cfg) = session_for_task.config {
cfg.auth.username = cfg
.auth
.username
.as_ref()
.map(|u| crate::pentest::crypto::encrypt(u));
cfg.auth.password = cfg
.auth
.password
.as_ref()
.map(|p| crate::pentest::crypto::encrypt(p));
}
// Persist encrypted credentials to DB
if session_for_task.config.is_some() {
if let Some(sid) = session.id {
let _ = agent
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"config.auth.username": session_for_task.config.as_ref()
.and_then(|c| c.auth.username.as_deref())
.map(|s| mongodb::bson::Bson::String(s.to_string()))
.unwrap_or(mongodb::bson::Bson::Null),
"config.auth.password": session_for_task.config.as_ref()
.and_then(|c| c.auth.password.as_deref())
.map(|s| mongodb::bson::Bson::String(s.to_string()))
.unwrap_or(mongodb::bson::Bson::Null),
}},
)
.await;
}
}
let initial_message = config
.initial_instructions
.clone()
.or(req.message.clone())
.unwrap_or_else(|| {
format!(
"Begin a {} penetration test against {} ({}). \
Identify vulnerabilities and provide evidence for each finding.",
session.strategy, target.name, target.base_url,
)
});
let llm = agent.llm.clone();
let db = agent.db.clone();
let session_clone = session.clone();
let target_clone = target.clone();
let agent_ref = agent.clone();
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db, event_tx, Some(pause_rx));
orchestrator
.run_session_guarded(&session_clone, &target_clone, &initial_message)
.await;
// Clean up session resources
agent_ref.cleanup_session(&session_id_str);
// Release concurrency permit
drop(permit);
});
// Redact credentials in response
let mut response_session = session;
if let Some(ref mut cfg) = response_session.config {
if cfg.auth.username.is_some() {
cfg.auth.username = Some("********".to_string());
}
if cfg.auth.password.is_some() {
cfg.auth.password = Some("********".to_string());
}
}
Ok(Json(ApiResponse {
data: response_session,
total: None,
page: None,
}))
} else {
// ── Legacy path ──────────────────────────────────────────────
let target_id = req.target_id.clone().ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
"target_id is required for legacy creation".to_string(),
)
})?;
let oid = mongodb::bson::oid::ObjectId::parse_str(&target_id).map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid target_id format".to_string(),
)
})?;
let target = agent
.db
.dast_targets()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Target not found".to_string()))?;
let strategy = parse_strategy(&req.strategy);
let mut session = PentestSession::new(target_id, strategy);
session.repo_id = target.repo_id.clone();
let insert_result = agent
.db
.pentest_sessions()
.insert_one(&session)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create session: {e}"),
)
})?;
session.id = insert_result.inserted_id.as_object_id();
let session_id_str = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Register broadcast stream and pause control
let event_tx = agent.register_session_stream(&session_id_str);
let pause_rx = agent.register_pause_control(&session_id_str);
let initial_message = req.message.unwrap_or_else(|| {
format!(
"Begin a {} penetration test against {} ({}). \
Identify vulnerabilities and provide evidence for each finding.",
session.strategy, target.name, target.base_url,
)
});
let llm = agent.llm.clone();
let db = agent.db.clone();
let session_clone = session.clone();
let target_clone = target.clone();
let agent_ref = agent.clone();
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db, event_tx, Some(pause_rx));
orchestrator
.run_session_guarded(&session_clone, &target_clone, &initial_message)
.await;
agent_ref.cleanup_session(&session_id_str);
drop(permit);
});
Ok(Json(ApiResponse {
data: session,
total: None,
page: None,
}))
}
}
fn parse_strategy(s: &str) -> PentestStrategy {
match s {
"quick" => PentestStrategy::Quick,
"targeted" => PentestStrategy::Targeted,
"aggressive" => PentestStrategy::Aggressive,
"stealth" => PentestStrategy::Stealth,
_ => PentestStrategy::Comprehensive,
}
}
/// GET /api/v1/pentest/lookup-repo — Look up a tracked repository by git URL
#[tracing::instrument(skip_all)]
pub async fn lookup_repo(
Extension(agent): AgentExt,
Query(params): Query<LookupRepoQuery>,
) -> Result<Json<ApiResponse<serde_json::Value>>, StatusCode> {
let repo = agent
.db
.repositories()
.find_one(doc! { "git_url": &params.url })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let data = match repo {
Some(r) => serde_json::json!({
"name": r.name,
"default_branch": r.default_branch,
"last_scanned_commit": r.last_scanned_commit,
}),
None => serde_json::Value::Null,
};
// Create session
let mut session = PentestSession::new(req.target_id.clone(), strategy);
session.repo_id = target.repo_id.clone();
let insert_result = agent
.db
.pentest_sessions()
.insert_one(&session)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create session: {e}"),
)
})?;
// Set the generated ID back on the session so the orchestrator has it
session.id = insert_result.inserted_id.as_object_id();
let initial_message = req.message.unwrap_or_else(|| {
format!(
"Begin a {} penetration test against {} ({}). \
Identify vulnerabilities and provide evidence for each finding.",
session.strategy, target.name, target.base_url,
)
});
// Spawn the orchestrator on a background task
let llm = agent.llm.clone();
let db = agent.db.clone();
let session_clone = session.clone();
let target_clone = target.clone();
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db);
orchestrator
.run_session_guarded(&session_clone, &target_clone, &initial_message)
.await;
});
Ok(Json(ApiResponse {
data: session,
data,
total: None,
page: None,
}))
@@ -158,7 +442,7 @@ pub async fn get_session(
) -> Result<Json<ApiResponse<PentestSession>>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let session = agent
let mut session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
@@ -166,6 +450,16 @@ pub async fn get_session(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// Redact credentials in response
if let Some(ref mut cfg) = session.config {
if cfg.auth.username.is_some() {
cfg.auth.username = Some("********".to_string());
}
if cfg.auth.password.is_some() {
cfg.auth.password = Some("********".to_string());
}
}
Ok(Json(ApiResponse {
data: session,
total: None,
@@ -241,8 +535,20 @@ pub async fn send_message(
let llm = agent.llm.clone();
let db = agent.db.clone();
let message = req.message.clone();
// Use existing broadcast sender if available, otherwise create a new one
let event_tx = agent
.subscribe_session(&session_id)
.and_then(|_| {
agent
.session_streams
.get(&session_id)
.map(|entry| entry.value().clone())
})
.unwrap_or_else(|| agent.register_session_stream(&session_id));
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db);
let orchestrator = PentestOrchestrator::new(llm, db, event_tx, None);
orchestrator
.run_session_guarded(&session, &target, &message)
.await;
@@ -277,10 +583,10 @@ pub async fn stop_session(
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Running {
if session.status != PentestStatus::Running && session.status != PentestStatus::Paused {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, not running", session.status),
format!("Session is {}, not running or paused", session.status),
));
}
@@ -303,6 +609,9 @@ pub async fn stop_session(
)
})?;
// Clean up session resources
agent.cleanup_session(&id);
let updated = agent
.db
.pentest_sessions()
@@ -328,6 +637,92 @@ pub async fn stop_session(
}))
}
/// POST /api/v1/pentest/sessions/:id/pause — Pause a running pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn pause_session(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<serde_json::Value>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Running {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, not running", session.status),
));
}
if !agent.pause_session(&id) {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to send pause signal".to_string(),
));
}
Ok(Json(ApiResponse {
data: serde_json::json!({ "status": "paused" }),
total: None,
page: None,
}))
}
/// POST /api/v1/pentest/sessions/:id/resume — Resume a paused pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn resume_session(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<serde_json::Value>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Paused {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, not paused", session.status),
));
}
if !agent.resume_session(&id) {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to send resume signal".to_string(),
));
}
Ok(Json(ApiResponse {
data: serde_json::json!({ "status": "running" }),
total: None,
page: None,
}))
}
/// GET /api/v1/pentest/sessions/:id/attack-chain — Get attack chain nodes for a session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn get_attack_chain(

View File

@@ -1,10 +1,14 @@
use std::convert::Infallible;
use std::sync::Arc;
use std::time::Duration;
use axum::extract::{Extension, Path};
use axum::http::StatusCode;
use axum::response::sse::{Event, Sse};
use axum::response::sse::{Event, KeepAlive, Sse};
use futures_util::stream;
use mongodb::bson::doc;
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::StreamExt;
use compliance_core::models::pentest::*;
@@ -16,16 +20,13 @@ type AgentExt = Extension<Arc<ComplianceAgent>>;
/// GET /api/v1/pentest/sessions/:id/stream — SSE endpoint for real-time events
///
/// Returns recent messages as SSE events (polling approach).
/// True real-time streaming with broadcast channels will be added in a future iteration.
/// Replays stored messages/nodes as initial burst, then subscribes to the
/// broadcast channel for live updates. Sends keepalive comments every 15s.
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn session_stream(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<
Sse<impl futures_util::Stream<Item = Result<Event, std::convert::Infallible>>>,
StatusCode,
> {
) -> Result<Sse<impl futures_util::Stream<Item = Result<Event, Infallible>>>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
// Verify session exists
@@ -37,6 +38,10 @@ pub async fn session_stream(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// ── Initial burst: replay stored data ──────────────────────────
let mut initial_events: Vec<Result<Event, Infallible>> = Vec::new();
// Fetch recent messages for this session
let messages: Vec<PentestMessage> = match agent
.db
@@ -63,9 +68,6 @@ pub async fn session_stream(
Err(_) => Vec::new(),
};
// Build SSE events from stored data
let mut events: Vec<Result<Event, std::convert::Infallible>> = Vec::new();
for msg in &messages {
let event_data = serde_json::json!({
"type": "message",
@@ -74,7 +76,7 @@ pub async fn session_stream(
"created_at": msg.created_at.to_rfc3339(),
});
if let Ok(data) = serde_json::to_string(&event_data) {
events.push(Ok(Event::default().event("message").data(data)));
initial_events.push(Ok(Event::default().event("message").data(data)));
}
}
@@ -87,11 +89,11 @@ pub async fn session_stream(
"findings_produced": node.findings_produced,
});
if let Ok(data) = serde_json::to_string(&event_data) {
events.push(Ok(Event::default().event("tool").data(data)));
initial_events.push(Ok(Event::default().event("tool").data(data)));
}
}
// Add session status event
// Add current session status event
let session = agent
.db
.pentest_sessions()
@@ -108,9 +110,49 @@ pub async fn session_stream(
"tool_invocations": s.tool_invocations,
});
if let Ok(data) = serde_json::to_string(&status_data) {
events.push(Ok(Event::default().event("status").data(data)));
initial_events.push(Ok(Event::default().event("status").data(data)));
}
}
Ok(Sse::new(stream::iter(events)))
// ── Live stream: subscribe to broadcast ────────────────────────
let live_stream = if let Some(rx) = agent.subscribe_session(&id) {
let broadcast = BroadcastStream::new(rx).filter_map(|result| match result {
Ok(event) => {
if let Ok(data) = serde_json::to_string(&event) {
let event_type = match &event {
PentestEvent::ToolStart { .. } => "tool_start",
PentestEvent::ToolComplete { .. } => "tool_complete",
PentestEvent::Finding { .. } => "finding",
PentestEvent::Message { .. } => "message",
PentestEvent::Complete { .. } => "complete",
PentestEvent::Error { .. } => "error",
PentestEvent::Thinking { .. } => "thinking",
PentestEvent::Paused => "paused",
PentestEvent::Resumed => "resumed",
};
Some(Ok(Event::default().event(event_type).data(data)))
} else {
None
}
}
Err(_) => None,
});
// Box to unify types
Box::pin(broadcast)
as std::pin::Pin<Box<dyn futures_util::Stream<Item = Result<Event, Infallible>> + Send>>
} else {
// No active broadcast — return empty stream
Box::pin(stream::empty())
as std::pin::Pin<Box<dyn futures_util::Stream<Item = Result<Event, Infallible>> + Send>>
};
// Chain initial burst + live stream
let combined = stream::iter(initial_events).chain(live_stream);
Ok(Sse::new(combined).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keepalive"),
))
}

View File

@@ -100,6 +100,10 @@ pub fn build_router() -> Router {
get(handlers::chat::embedding_status),
)
// Pentest API endpoints
.route(
"/api/v1/pentest/lookup-repo",
get(handlers::pentest::lookup_repo),
)
.route(
"/api/v1/pentest/sessions",
get(handlers::pentest::list_sessions).post(handlers::pentest::create_session),
@@ -116,6 +120,14 @@ pub fn build_router() -> Router {
"/api/v1/pentest/sessions/{id}/stop",
post(handlers::pentest::stop_session),
)
.route(
"/api/v1/pentest/sessions/{id}/pause",
post(handlers::pentest::pause_session),
)
.route(
"/api/v1/pentest/sessions/{id}/resume",
post(handlers::pentest::resume_session),
)
.route(
"/api/v1/pentest/sessions/{id}/stream",
get(handlers::pentest::session_stream),

View File

@@ -49,5 +49,12 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
.unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
keycloak_url: env_var_opt("KEYCLOAK_URL"),
keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
keycloak_admin_username: env_var_opt("KEYCLOAK_ADMIN_USERNAME"),
keycloak_admin_password: env_secret_opt("KEYCLOAK_ADMIN_PASSWORD"),
pentest_verification_email: env_var_opt("PENTEST_VERIFICATION_EMAIL"),
pentest_imap_host: env_var_opt("PENTEST_IMAP_HOST"),
pentest_imap_port: env_var_opt("PENTEST_IMAP_PORT").and_then(|p| p.parse().ok()),
pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
})
}

View File

@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::GraphContext;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
/// Maximum number of findings to include in a single LLM triage call.
const TRIAGE_CHUNK_SIZE: usize = 30;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.
Actions:
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
@@ -19,8 +22,8 @@ Consider:
- Is the finding actionable by a developer?
- Would a real attacker be able to exploit this?
Respond in JSON format:
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
Respond with a JSON array, one entry per finding in the same order they were presented:
[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;
pub async fn triage_findings(
llm: &Arc<LlmClient>,
@@ -29,60 +32,76 @@ pub async fn triage_findings(
) -> usize {
let mut passed = 0;
for finding in findings.iter_mut() {
let file_classification = classify_file_path(finding.file_path.as_deref());
// Process findings in chunks to avoid overflowing the LLM context window.
for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
let chunk = &mut findings[chunk_start..chunk_end];
let mut user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
);
// Build a combined prompt for the entire chunk.
let mut user_prompt = String::new();
let mut file_classifications: Vec<String> = Vec::new();
for (i, finding) in chunk.iter().enumerate() {
let file_classification = classify_file_path(finding.file_path.as_deref());
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
"\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
i + 1,
finding.fingerprint,
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|i| i.finding_id == finding.fingerprint)
{
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|im| im.finding_id == finding.fingerprint)
{
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
));
}
}
user_prompt.push('\n');
file_classifications.push(file_classification);
}
// Send the batch to the LLM.
match llm
.chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
.await
@@ -98,58 +117,77 @@ pub async fn triage_findings(
} else {
cleaned
};
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
// Apply file-path confidence adjustment
let adjusted_confidence =
adjust_confidence(result.confidence, &file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
// Downgrade severity by one level
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
Ok(results) => {
for (idx, finding) in chunk.iter_mut().enumerate() {
// Match result by position; fall back to keeping the finding.
let Some(result) = results.get(idx) else {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
continue;
};
let file_classification = file_classifications
.get(idx)
.map(|s| s.as_str())
.unwrap_or("unknown");
let adjusted_confidence =
adjust_confidence(result.confidence, file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale.clone());
if let Some(ref remediation) = result.remediation {
finding.remediation = Some(remediation.clone());
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
}
}
}
}
} else {
// Parse failure — keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!(
"Failed to parse triage response for {}: {response}",
finding.fingerprint
);
Err(_) => {
// Batch parse failure — keep all findings in the chunk.
tracing::warn!(
"Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
);
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
}
Err(e) => {
// On LLM error, keep the finding
tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
finding.status = FindingStatus::Triaged;
passed += 1;
// On LLM error, keep all findings in the chunk.
tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
}
@@ -266,6 +304,10 @@ fn upgrade_severity(
#[derive(serde::Deserialize)]
struct TriageResult {
/// Finding fingerprint echoed back by the LLM (optional).
#[serde(default)]
#[allow(dead_code)]
id: String,
#[serde(default = "default_action")]
action: String,
#[serde(default)]

View File

@@ -1,6 +1,6 @@
mod agent;
mod api;
mod config;
pub(crate) mod config;
mod database;
mod error;
mod llm;
@@ -15,11 +15,20 @@ mod webhooks;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
dotenvy::dotenv().ok();
match dotenvy::dotenv() {
Ok(path) => eprintln!("[dotenv] Loaded from: {}", path.display()),
Err(e) => eprintln!("[dotenv] FAILED: {e}"),
}
let _telemetry_guard = compliance_core::telemetry::init_telemetry("compliance-agent");
tracing::info!("Loading configuration...");
// Log critical env vars at startup
tracing::info!(
chrome_ws_url = std::env::var("CHROME_WS_URL").ok().as_deref(),
pentest_email = std::env::var("PENTEST_VERIFICATION_EMAIL").ok().as_deref(),
encryption_key_set = std::env::var("PENTEST_ENCRYPTION_KEY").is_ok(),
"Loading configuration..."
);
let config = config::load_config()?;
// Ensure SSH key pair exists for cloning private repos

View File

@@ -0,0 +1,483 @@
use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
use compliance_core::AgentConfig;
use secrecy::ExposeSecret;
use tracing::{info, warn};
/// Attempt to delete a test user created during a pentest session.
///
/// Routes to the appropriate identity provider based on `TestUserRecord.provider`.
/// Falls back to browser-based cleanup if no API credentials are available.
///
/// Returns `Ok(true)` if the user was deleted, `Ok(false)` if skipped, `Err` on failure.
pub async fn cleanup_test_user(
user: &TestUserRecord,
config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
if user.cleaned_up {
return Ok(false);
}
let provider = user.provider.as_ref();
match provider {
Some(IdentityProvider::Keycloak) => cleanup_keycloak(user, config, http).await,
Some(IdentityProvider::Auth0) => cleanup_auth0(user, config, http).await,
Some(IdentityProvider::Okta) => cleanup_okta(user, config, http).await,
Some(IdentityProvider::Firebase) => {
warn!("Firebase user cleanup not yet implemented");
Ok(false)
}
Some(IdentityProvider::Custom) | None => {
// For custom/unknown providers, try Keycloak if configured, else skip
if config.keycloak_url.is_some() && config.keycloak_admin_username.is_some() {
cleanup_keycloak(user, config, http).await
} else {
warn!(
username = user.username.as_deref(),
"No identity provider configured for cleanup — skipping"
);
Ok(false)
}
}
}
}
/// Delete a user from Keycloak via the Admin REST API.
///
/// Flow: get admin token → search user by username → delete by ID.
async fn cleanup_keycloak(
user: &TestUserRecord,
config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let base_url = config
.keycloak_url
.as_deref()
.ok_or("KEYCLOAK_URL not configured")?;
let realm = config
.keycloak_realm
.as_deref()
.ok_or("KEYCLOAK_REALM not configured")?;
let admin_user = config
.keycloak_admin_username
.as_deref()
.ok_or("KEYCLOAK_ADMIN_USERNAME not configured")?;
let admin_pass = config
.keycloak_admin_password
.as_ref()
.ok_or("KEYCLOAK_ADMIN_PASSWORD not configured")?;
let username = user
.username
.as_deref()
.ok_or("No username in test user record")?;
info!(username, realm, "Cleaning up Keycloak test user");
// Step 1: Get admin access token
let token_url = format!("{base_url}/realms/master/protocol/openid-connect/token");
let token_resp = http
.post(&token_url)
.form(&[
("grant_type", "password"),
("client_id", "admin-cli"),
("username", admin_user),
("password", admin_pass.expose_secret()),
])
.send()
.await
.map_err(|e| format!("Keycloak token request failed: {e}"))?;
if !token_resp.status().is_success() {
let status = token_resp.status();
let body = token_resp.text().await.unwrap_or_default();
return Err(format!("Keycloak admin auth failed ({status}): {body}"));
}
let token_body: serde_json::Value = token_resp
.json()
.await
.map_err(|e| format!("Failed to parse Keycloak token: {e}"))?;
let access_token = token_body
.get("access_token")
.and_then(|v| v.as_str())
.ok_or("No access_token in Keycloak response")?;
// Step 2: Search for user by username
let search_url =
format!("{base_url}/admin/realms/{realm}/users?username={username}&exact=true");
let search_resp = http
.get(&search_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Keycloak user search failed: {e}"))?;
if !search_resp.status().is_success() {
let status = search_resp.status();
let body = search_resp.text().await.unwrap_or_default();
return Err(format!("Keycloak user search failed ({status}): {body}"));
}
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Keycloak users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User '{username}' not found in Keycloak realm '{realm}'"))?;
// Step 3: Delete the user
let delete_url = format!("{base_url}/admin/realms/{realm}/users/{user_id}");
let delete_resp = http
.delete(&delete_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Keycloak user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(username, user_id, "Keycloak test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Keycloak delete failed ({status}): {body}"))
}
}
/// Delete a user from Auth0 via the Management API.
///
/// Requires `AUTH0_DOMAIN`, `AUTH0_CLIENT_ID`, `AUTH0_CLIENT_SECRET` env vars.
async fn cleanup_auth0(
user: &TestUserRecord,
_config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let domain = std::env::var("AUTH0_DOMAIN").map_err(|_| "AUTH0_DOMAIN not set")?;
let client_id = std::env::var("AUTH0_CLIENT_ID").map_err(|_| "AUTH0_CLIENT_ID not set")?;
let client_secret =
std::env::var("AUTH0_CLIENT_SECRET").map_err(|_| "AUTH0_CLIENT_SECRET not set")?;
let email = user
.email
.as_deref()
.ok_or("No email in test user record for Auth0 lookup")?;
info!(email, "Cleaning up Auth0 test user");
// Get management API token
let token_resp = http
.post(format!("https://{domain}/oauth/token"))
.json(&serde_json::json!({
"grant_type": "client_credentials",
"client_id": client_id,
"client_secret": client_secret,
"audience": format!("https://{domain}/api/v2/"),
}))
.send()
.await
.map_err(|e| format!("Auth0 token request failed: {e}"))?;
let token_body: serde_json::Value = token_resp
.json()
.await
.map_err(|e| format!("Failed to parse Auth0 token: {e}"))?;
let access_token = token_body
.get("access_token")
.and_then(|v| v.as_str())
.ok_or("No access_token in Auth0 response")?;
// Search for user by email
let encoded_email = urlencoding::encode(email);
let search_url = format!("https://{domain}/api/v2/users-by-email?email={encoded_email}");
let search_resp = http
.get(&search_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Auth0 user search failed: {e}"))?;
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Auth0 users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("user_id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User with email '{email}' not found in Auth0"))?;
// Delete
let encoded_id = urlencoding::encode(user_id);
let delete_resp = http
.delete(format!("https://{domain}/api/v2/users/{encoded_id}"))
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Auth0 user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(email, user_id, "Auth0 test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Auth0 delete failed ({status}): {body}"))
}
}
/// Delete a user from Okta via the Users API.
///
/// Requires `OKTA_DOMAIN`, `OKTA_API_TOKEN` env vars.
async fn cleanup_okta(
user: &TestUserRecord,
_config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let domain = std::env::var("OKTA_DOMAIN").map_err(|_| "OKTA_DOMAIN not set")?;
let api_token = std::env::var("OKTA_API_TOKEN").map_err(|_| "OKTA_API_TOKEN not set")?;
let username = user
.username
.as_deref()
.or(user.email.as_deref())
.ok_or("No username/email in test user record for Okta lookup")?;
info!(username, "Cleaning up Okta test user");
// Search user
let encoded = urlencoding::encode(username);
let search_url = format!("https://{domain}/api/v1/users?search=profile.login+eq+\"{encoded}\"");
let search_resp = http
.get(&search_url)
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await
.map_err(|e| format!("Okta user search failed: {e}"))?;
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Okta users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User '{username}' not found in Okta"))?;
// Deactivate first (required by Okta before delete)
let _ = http
.post(format!(
"https://{domain}/api/v1/users/{user_id}/lifecycle/deactivate"
))
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await;
// Delete
let delete_resp = http
.delete(format!("https://{domain}/api/v1/users/{user_id}"))
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await
.map_err(|e| format!("Okta user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(username, user_id, "Okta test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Okta delete failed ({status}): {body}"))
}
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
use secrecy::SecretString;
fn make_config_no_keycloak() -> AgentConfig {
AgentConfig {
mongodb_uri: String::new(),
mongodb_database: String::new(),
litellm_url: String::new(),
litellm_api_key: SecretString::from(String::new()),
litellm_model: String::new(),
litellm_embed_model: String::new(),
github_token: None,
github_webhook_secret: None,
gitlab_url: None,
gitlab_token: None,
gitlab_webhook_secret: None,
jira_url: None,
jira_email: None,
jira_api_token: None,
jira_project_key: None,
searxng_url: None,
nvd_api_key: None,
agent_port: 3001,
scan_schedule: String::new(),
cve_monitor_schedule: String::new(),
git_clone_base_path: String::new(),
ssh_key_path: String::new(),
keycloak_url: None,
keycloak_realm: None,
keycloak_admin_username: None,
keycloak_admin_password: None,
pentest_verification_email: None,
pentest_imap_host: None,
pentest_imap_port: None,
pentest_imap_username: None,
pentest_imap_password: None,
}
}
#[tokio::test]
async fn already_cleaned_up_returns_false() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Keycloak),
cleaned_up: true,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn firebase_returns_false_not_implemented() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Firebase),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn no_provider_no_keycloak_skips() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: None,
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn custom_provider_no_keycloak_skips() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Custom),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn keycloak_missing_config_returns_error() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Keycloak),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("KEYCLOAK_URL")));
}
#[tokio::test]
async fn keycloak_missing_username_returns_error() {
let user = TestUserRecord {
username: None,
email: Some("test@example.com".into()),
provider_user_id: None,
provider: Some(IdentityProvider::Keycloak),
cleaned_up: false,
};
let mut config = make_config_no_keycloak();
config.keycloak_url = Some("http://localhost:8080".into());
config.keycloak_realm = Some("test".into());
config.keycloak_admin_username = Some("admin".into());
config.keycloak_admin_password = Some(SecretString::from("pass".to_string()));
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("username")));
}
#[tokio::test]
async fn auth0_missing_env_returns_error() {
let user = TestUserRecord {
username: None,
email: Some("test@example.com".into()),
provider_user_id: None,
provider: Some(IdentityProvider::Auth0),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("AUTH0_DOMAIN")));
}
#[tokio::test]
async fn okta_missing_env_returns_error() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Okta),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("OKTA_DOMAIN")));
}
}

View File

@@ -0,0 +1,117 @@
use aes_gcm::aead::AeadCore;
use aes_gcm::{
aead::{Aead, KeyInit, OsRng},
Aes256Gcm, Nonce,
};
use base64::Engine;
/// Load the 32-byte encryption key from PENTEST_ENCRYPTION_KEY env var.
/// Returns None if not set or invalid length.
pub fn load_encryption_key() -> Option<[u8; 32]> {
let hex_key = std::env::var("PENTEST_ENCRYPTION_KEY").ok()?;
let bytes = hex::decode(hex_key).ok()?;
if bytes.len() != 32 {
return None;
}
let mut key = [0u8; 32];
key.copy_from_slice(&bytes);
Some(key)
}
/// Encrypt a plaintext string. Returns base64-encoded nonce+ciphertext.
/// Returns the original string if no encryption key is available.
pub fn encrypt(plaintext: &str) -> String {
let Some(key_bytes) = load_encryption_key() else {
return plaintext.to_string();
};
let Ok(cipher) = Aes256Gcm::new_from_slice(&key_bytes) else {
return plaintext.to_string();
};
let nonce = Aes256Gcm::generate_nonce(&mut OsRng);
let Ok(ciphertext) = cipher.encrypt(&nonce, plaintext.as_bytes()) else {
return plaintext.to_string();
};
let mut combined = nonce.to_vec();
combined.extend_from_slice(&ciphertext);
base64::engine::general_purpose::STANDARD.encode(&combined)
}
/// Decrypt a base64-encoded nonce+ciphertext string.
/// Returns None if decryption fails.
pub fn decrypt(encrypted: &str) -> Option<String> {
let key_bytes = load_encryption_key()?;
let cipher = Aes256Gcm::new_from_slice(&key_bytes).ok()?;
let combined = base64::engine::general_purpose::STANDARD
.decode(encrypted)
.ok()?;
if combined.len() < 12 {
return None;
}
let (nonce_bytes, ciphertext) = combined.split_at(12);
let nonce = Nonce::from_slice(nonce_bytes);
let plaintext = cipher.decrypt(nonce, ciphertext).ok()?;
String::from_utf8(plaintext).ok()
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Mutex;
// Guard to serialize tests that touch env vars
static ENV_LOCK: Mutex<()> = Mutex::new(());
fn with_key<F: FnOnce()>(hex_key: &str, f: F) {
let _guard = ENV_LOCK.lock();
unsafe { std::env::set_var("PENTEST_ENCRYPTION_KEY", hex_key) };
f();
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
}
#[test]
fn round_trip() {
let key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
with_key(key, || {
let plaintext = "my_secret_password";
let encrypted = encrypt(plaintext);
assert_ne!(encrypted, plaintext);
let decrypted = decrypt(&encrypted);
assert_eq!(decrypted, Some(plaintext.to_string()));
});
}
#[test]
fn wrong_key_fails() {
let _guard = ENV_LOCK.lock();
let key1 = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
let key2 = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789";
let encrypted = {
unsafe { std::env::set_var("PENTEST_ENCRYPTION_KEY", key1) };
let e = encrypt("secret");
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
e
};
unsafe { std::env::set_var("PENTEST_ENCRYPTION_KEY", key2) };
assert!(decrypt(&encrypted).is_none());
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
}
#[test]
fn no_key_passthrough() {
let _guard = ENV_LOCK.lock();
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
let result = encrypt("plain");
assert_eq!(result, "plain");
}
#[test]
fn corrupted_ciphertext() {
let key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
with_key(key, || {
assert!(decrypt("not-valid-base64!!!").is_none());
// Valid base64 but wrong content
let garbage = base64::engine::general_purpose::STANDARD.encode(b"tooshort");
assert!(decrypt(&garbage).is_none());
});
}
}

View File

@@ -1,4 +1,6 @@
pub mod cleanup;
mod context;
pub mod crypto;
pub mod orchestrator;
mod prompt_builder;
pub mod report;

View File

@@ -2,7 +2,7 @@ use std::sync::Arc;
use std::time::Duration;
use mongodb::bson::doc;
use tokio::sync::broadcast;
use tokio::sync::{broadcast, watch};
use compliance_core::models::dast::DastTarget;
use compliance_core::models::pentest::*;
@@ -22,29 +22,27 @@ pub struct PentestOrchestrator {
pub(crate) llm: Arc<LlmClient>,
pub(crate) db: Database,
pub(crate) event_tx: broadcast::Sender<PentestEvent>,
pub(crate) pause_rx: Option<watch::Receiver<bool>>,
}
impl PentestOrchestrator {
pub fn new(llm: Arc<LlmClient>, db: Database) -> Self {
let (event_tx, _) = broadcast::channel(256);
/// Create a new orchestrator with an externally-provided broadcast sender
/// and an optional pause receiver.
pub fn new(
llm: Arc<LlmClient>,
db: Database,
event_tx: broadcast::Sender<PentestEvent>,
pause_rx: Option<watch::Receiver<bool>>,
) -> Self {
Self {
tool_registry: ToolRegistry::new(),
llm,
db,
event_tx,
pause_rx,
}
}
#[allow(dead_code)]
pub fn subscribe(&self) -> broadcast::Receiver<PentestEvent> {
self.event_tx.subscribe()
}
#[allow(dead_code)]
pub fn event_sender(&self) -> broadcast::Sender<PentestEvent> {
self.event_tx.clone()
}
/// Run a pentest session with timeout and automatic failure marking on errors.
pub async fn run_session_guarded(
&self,
@@ -54,8 +52,18 @@ impl PentestOrchestrator {
) {
let session_id = session.id;
// Use config-specified timeout or default
let timeout_duration = session
.config
.as_ref()
.and_then(|c| c.max_duration_minutes)
.map(|m| Duration::from_secs(m as u64 * 60))
.unwrap_or(SESSION_TIMEOUT);
let timeout_minutes = timeout_duration.as_secs() / 60;
match tokio::time::timeout(
SESSION_TIMEOUT,
timeout_duration,
self.run_session(session, target, initial_message),
)
.await
@@ -72,12 +80,10 @@ impl PentestOrchestrator {
});
}
Err(_) => {
tracing::warn!(?session_id, "Pentest session timed out after 30 minutes");
self.mark_session_failed(session_id, "Session timed out after 30 minutes")
.await;
let _ = self.event_tx.send(PentestEvent::Error {
message: "Session timed out after 30 minutes".to_string(),
});
let msg = format!("Session timed out after {timeout_minutes} minutes");
tracing::warn!(?session_id, "{msg}");
self.mark_session_failed(session_id, &msg).await;
let _ = self.event_tx.send(PentestEvent::Error { message: msg });
}
}
}
@@ -103,6 +109,45 @@ impl PentestOrchestrator {
}
}
/// Check if the session is paused; if so, update DB status and wait until resumed.
async fn wait_if_paused(&self, session: &PentestSession) {
let Some(ref pause_rx) = self.pause_rx else {
return;
};
let mut rx = pause_rx.clone();
if !*rx.borrow() {
return;
}
// We are paused — update DB status
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(doc! { "_id": sid }, doc! { "$set": { "status": "paused" }})
.await;
}
let _ = self.event_tx.send(PentestEvent::Paused);
// Wait until unpaused
while *rx.borrow() {
if rx.changed().await.is_err() {
break;
}
}
// Resumed — update DB status back to running
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(doc! { "_id": sid }, doc! { "$set": { "status": "running" }})
.await;
}
let _ = self.event_tx.send(PentestEvent::Resumed);
}
async fn run_session(
&self,
session: &PentestSession,
@@ -175,6 +220,9 @@ impl PentestOrchestrator {
let mut prev_node_ids: Vec<String> = Vec::new();
for _iteration in 0..max_iterations {
// Check pause state at top of each iteration
self.wait_if_paused(session).await;
let response = self
.llm
.chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192))
@@ -342,10 +390,13 @@ impl PentestOrchestrator {
)
.await;
// Build LLM-facing summary: strip large fields
// (screenshots, raw HTML) to save context window
let llm_data = summarize_tool_output(&result.data);
serde_json::json!({
"summary": result.summary,
"findings_count": findings_count,
"data": result.data,
"data": llm_data,
})
.to_string()
}
@@ -417,6 +468,61 @@ impl PentestOrchestrator {
.await;
}
// Clean up test user via identity provider API if requested
if session
.config
.as_ref()
.is_some_and(|c| c.auth.cleanup_test_user)
{
if let Some(ref test_user) = session.test_user {
let http = reqwest::Client::new();
// We need the AgentConfig — read from env since orchestrator doesn't hold it
let config = crate::config::load_config();
match config {
Ok(cfg) => {
match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
.await
{
Ok(true) => {
tracing::info!(
username = test_user.username.as_deref(),
"Test user cleaned up via provider API"
);
// Mark as cleaned up in DB
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": { "test_user.cleaned_up": true } },
)
.await;
}
}
Ok(false) => {
tracing::info!(
"Test user cleanup skipped (no provider configured)"
);
}
Err(e) => {
tracing::warn!(error = %e, "Test user cleanup failed");
let _ = self.event_tx.send(PentestEvent::Error {
message: format!("Test user cleanup failed: {e}"),
});
}
}
}
Err(e) => {
tracing::warn!(error = %e, "Could not load config for cleanup");
}
}
}
}
// Clean up the persistent browser session for this pentest
compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
let _ = self.event_tx.send(PentestEvent::Complete {
summary: format!(
"Pentest complete. {} findings from {} tool invocations.",
@@ -427,3 +533,174 @@ impl PentestOrchestrator {
Ok(())
}
}
/// Strip large fields from tool output before sending to the LLM.
/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
/// The full data is still stored in the DB for the report.
fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
let Some(obj) = data.as_object() else {
return data.clone();
};
let mut summarized = serde_json::Map::new();
for (key, value) in obj {
match key.as_str() {
// Replace screenshot base64 with a placeholder
"screenshot_base64" => {
if let Some(s) = value.as_str() {
if !s.is_empty() {
summarized.insert(
key.clone(),
serde_json::Value::String(
"[screenshot captured and saved to report]".to_string(),
),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate raw HTML content
"html" => {
if let Some(s) = value.as_str() {
if s.len() > 2000 {
summarized.insert(
key.clone(),
serde_json::Value::String(format!(
"{}... [truncated, {} chars total]",
&s[..2000],
s.len()
)),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate page text
"text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
let s = value.as_str().unwrap_or_default();
summarized.insert(
key.clone(),
serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
);
}
// Trim large arrays (e.g., "elements", "links", "inputs")
"elements" | "links" | "inputs" => {
if let Some(arr) = value.as_array() {
if arr.len() > 15 {
let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
trimmed.push(serde_json::json!(format!(
"... and {} more",
arr.len() - 15
)));
summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Recursively summarize nested objects (e.g., "page" in get_content)
_ if value.is_object() => {
summarized.insert(key.clone(), summarize_tool_output(value));
}
// Keep everything else as-is
_ => {
summarized.insert(key.clone(), value.clone());
}
}
}
serde_json::Value::Object(summarized)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_summarize_strips_screenshot() {
let input = json!({
"screenshot_base64": "iVBOR...",
"url": "https://example.com"
});
let result = summarize_tool_output(&input);
assert_eq!(
result["screenshot_base64"],
"[screenshot captured and saved to report]"
);
assert_eq!(result["url"], "https://example.com");
}
#[test]
fn test_summarize_truncates_html() {
let long_html = "x".repeat(3000);
let input = json!({ "html": long_html });
let result = summarize_tool_output(&input);
let s = result["html"].as_str().unwrap_or_default();
assert!(s.contains("[truncated, 3000 chars total]"));
assert!(s.starts_with(&"x".repeat(2000)));
assert!(s.len() < 3000);
}
#[test]
fn test_summarize_truncates_text() {
let long_text = "a".repeat(2000);
let input = json!({ "text": long_text });
let result = summarize_tool_output(&input);
let s = result["text"].as_str().unwrap_or_default();
assert!(s.contains("[truncated]"));
assert!(s.starts_with(&"a".repeat(1500)));
assert!(s.len() < 2000);
}
#[test]
fn test_summarize_trims_large_arrays() {
let elements: Vec<serde_json::Value> = (0..20).map(|i| json!(format!("el-{i}"))).collect();
let input = json!({ "elements": elements });
let result = summarize_tool_output(&input);
let arr = result["elements"].as_array();
assert!(arr.is_some());
if let Some(arr) = arr {
// 15 kept + 1 summary entry
assert_eq!(arr.len(), 16);
assert_eq!(arr[15], json!("... and 5 more"));
}
}
#[test]
fn test_summarize_preserves_small_data() {
let input = json!({
"url": "https://example.com",
"status": 200,
"title": "Example"
});
let result = summarize_tool_output(&input);
assert_eq!(result, input);
}
#[test]
fn test_summarize_recursive() {
let input = json!({
"page": {
"screenshot_base64": "iVBORw0KGgoAAAA...",
"url": "https://example.com"
}
});
let result = summarize_tool_output(&input);
assert_eq!(
result["page"]["screenshot_base64"],
"[screenshot captured and saved to report]"
);
assert_eq!(result["page"]["url"], "https://example.com");
}
#[test]
fn test_summarize_non_object() {
let string_val = json!("just a string");
assert_eq!(summarize_tool_output(&string_val), string_val);
let num_val = json!(42);
assert_eq!(summarize_tool_output(&num_val), num_val);
}
}

View File

@@ -5,6 +5,100 @@ use compliance_core::models::sbom::SbomEntry;
use super::orchestrator::PentestOrchestrator;
/// Attempt to decrypt a field; if decryption fails, return the original value
/// (which may be plaintext from before encryption was enabled).
fn decrypt_field(value: &str) -> String {
super::crypto::decrypt(value).unwrap_or_else(|| value.to_string())
}
/// Build additional prompt sections from PentestConfig when present.
fn build_config_sections(config: &PentestConfig) -> String {
let mut sections = String::new();
// Authentication section
match config.auth.mode {
AuthMode::Manual => {
sections.push_str("\n## Authentication\n");
sections.push_str("- **Mode**: Manual credentials\n");
if let Some(ref u) = config.auth.username {
let decrypted = decrypt_field(u);
sections.push_str(&format!("- **Username**: {decrypted}\n"));
}
if let Some(ref p) = config.auth.password {
let decrypted = decrypt_field(p);
sections.push_str(&format!("- **Password**: {decrypted}\n"));
}
sections.push_str(
"Use these credentials to log in before testing authenticated endpoints.\n",
);
}
AuthMode::AutoRegister => {
sections.push_str("\n## Authentication\n");
sections.push_str("- **Mode**: Auto-register\n");
if let Some(ref url) = config.auth.registration_url {
sections.push_str(&format!("- **Registration URL**: {url}\n"));
} else {
sections.push_str(
"- **Registration URL**: Not provided — use Playwright to discover the registration page.\n",
);
}
if let Some(ref email) = config.auth.verification_email {
sections.push_str(&format!(
"- **Verification Email**: Use plus-addressing from `{email}` \
(e.g. `{base}+{{session_id}}@{domain}`) for email verification. \
The system will poll the IMAP mailbox for verification links.\n",
base = email.split('@').next().unwrap_or(email),
domain = email.split('@').nth(1).unwrap_or("example.com"),
));
}
sections.push_str(
"Register a new test account using the registration page, then use it for testing.\n",
);
}
AuthMode::None => {}
}
// Custom headers
if !config.custom_headers.is_empty() {
sections.push_str("\n## Custom HTTP Headers\n");
sections.push_str("Include these headers in all HTTP requests:\n");
for (k, v) in &config.custom_headers {
sections.push_str(&format!("- `{k}: {v}`\n"));
}
}
// Scope exclusions
if !config.scope_exclusions.is_empty() {
sections.push_str("\n## Scope Exclusions\n");
sections.push_str("Do NOT test the following paths:\n");
for path in &config.scope_exclusions {
sections.push_str(&format!("- `{path}`\n"));
}
}
// Git context
if config.git_repo_url.is_some() || config.branch.is_some() || config.commit_hash.is_some() {
sections.push_str("\n## Git Context\n");
if let Some(ref url) = config.git_repo_url {
sections.push_str(&format!("- **Repository**: {url}\n"));
}
if let Some(ref branch) = config.branch {
sections.push_str(&format!("- **Branch**: {branch}\n"));
}
if let Some(ref commit) = config.commit_hash {
sections.push_str(&format!("- **Commit**: {commit}\n"));
}
}
// Environment
sections.push_str(&format!(
"\n## Environment\n- **Target environment**: {}\n",
config.environment
));
sections
}
/// Return strategy guidance text for the given strategy.
fn strategy_guidance(strategy: &PentestStrategy) -> &'static str {
match strategy {
@@ -155,6 +249,11 @@ impl PentestOrchestrator {
let sast_section = build_sast_section(sast_findings);
let sbom_section = build_sbom_section(sbom_entries);
let code_section = build_code_section(code_context);
let config_sections = session
.config
.as_ref()
.map(build_config_sections)
.unwrap_or_default();
format!(
r#"You are an expert penetration tester conducting an authorized security assessment.
@@ -178,7 +277,7 @@ impl PentestOrchestrator {
## Code Entry Points (Knowledge Graph)
{code_section}
{config_sections}
## Available Tools
{tool_names}
@@ -186,15 +285,34 @@ impl PentestOrchestrator {
1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
2. Run the OpenAPI parser to discover API endpoints from specs.
3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
7. Test rate limiting on critical endpoints (login, API).
8. Check for console.log leakage in frontend JavaScript.
9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
10. When testing is complete, provide a structured summary with severity and remediation.
11. Always explain your reasoning before invoking each tool.
12. When done, say "Testing complete" followed by a final summary.
4. If the target requires authentication (auto-register mode), use the browser tool to:
a. Navigate to the target — it will redirect to the login page.
b. Click the "Register" link to reach the registration form.
c. Fill all form fields (username, email with plus-addressing, password, name) one by one.
d. Click submit. If a Terms & Conditions page appears, accept it.
e. After registration, use the browser to navigate through the application pages.
f. **Take a screenshot after each major page** for evidence in the report.
5. Use the browser tool to explore the authenticated application — navigate to each section,
use get_content to understand the page structure, and take screenshots.
6. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
7. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
8. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
9. Test rate limiting on critical endpoints (login, API).
10. Check for console.log leakage in frontend JavaScript.
11. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
12. When testing is complete, provide a structured summary with severity and remediation.
13. Always explain your reasoning before invoking each tool.
14. When done, say "Testing complete" followed by a final summary.
## Browser Tool Usage
- The browser tab **persists** between calls — cookies and login state are preserved.
- After navigate, the response includes `elements` (links, inputs, buttons on the page).
- Use `get_content` to see forms, links, buttons, headings, and page text.
- Use `click` with CSS selectors to interact (e.g., `a:text('Register')`, `input[type='submit']`).
- Use `fill` with selector + value to fill form fields (e.g., `input[name='email']`).
- **Take screenshots** (`action: screenshot`) after important actions for evidence.
- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
page content with the browser tool to verify if it shows real data or a login redirect.
## Important
- This is an authorized penetration test. All testing is permitted within the target scope.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,40 @@
use super::html_escape;
pub(super) fn appendix(session_id: &str) -> String {
format!(
r##"<!-- ═══════════════ 5. APPENDIX ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">5.</span> Appendix</h2>
<h3>Severity Definitions</h3>
<table class="info">
<tr><td style="color: var(--sev-critical); font-weight: 700;">Critical</td><td>Vulnerabilities that can be exploited remotely without authentication to execute arbitrary code, exfiltrate sensitive data, or fully compromise the system.</td></tr>
<tr><td style="color: var(--sev-high); font-weight: 700;">High</td><td>Vulnerabilities that allow significant unauthorized access or data exposure, typically requiring minimal user interaction or privileges.</td></tr>
<tr><td style="color: var(--sev-medium); font-weight: 700;">Medium</td><td>Vulnerabilities that may lead to limited data exposure or require specific conditions to exploit, but still represent meaningful risk.</td></tr>
<tr><td style="color: var(--sev-low); font-weight: 700;">Low</td><td>Minor issues with limited direct impact. May contribute to broader attack chains or indicate defense-in-depth weaknesses.</td></tr>
<tr><td style="color: var(--sev-info); font-weight: 700;">Info</td><td>Observations and best-practice recommendations that do not represent direct security vulnerabilities.</td></tr>
</table>
<h3>Disclaimer</h3>
<p style="font-size: 9pt; color: var(--text-secondary);">
This report was generated by an automated AI-powered penetration testing engine. While the system
employs advanced techniques to identify vulnerabilities, no automated assessment can guarantee
complete coverage. The results should be reviewed by qualified security professionals and validated
in the context of the target application's threat model. Findings are point-in-time observations
and may change as the application evolves.
</p>
<!-- ═══════════════ FOOTER ═══════════════ -->
<div class="report-footer">
<div class="footer-company">Compliance Scanner</div>
<div>AI-Powered Security Assessment Platform</div>
<div style="margin-top: 6px;">This document is confidential and intended solely for the named recipient.</div>
<div>Report ID: {session_id}</div>
</div>
</div><!-- .report-body -->
</body>
</html>"##,
session_id = html_escape(session_id),
)
}

View File

@@ -0,0 +1,193 @@
use super::html_escape;
use compliance_core::models::pentest::AttackChainNode;
pub(super) fn attack_chain(chain: &[AttackChainNode]) -> String {
let chain_section = if chain.is_empty() {
r#"<p style="color: var(--text-muted);">No attack chain steps recorded.</p>"#.to_string()
} else {
build_chain_html(chain)
};
format!(
r##"<!-- ═══════════════ 4. ATTACK CHAIN ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">4.</span> Attack Chain Timeline</h2>
<p>
The following sequence shows each tool invocation made by the AI orchestrator during the assessment,
grouped by phase. Each step includes the tool's name, execution status, and the AI's reasoning
for choosing that action.
</p>
<div style="margin-top: 16px;">
{chain_section}
</div>"##
)
}
fn build_chain_html(chain: &[AttackChainNode]) -> String {
let mut chain_html = String::new();
// Compute phases via BFS from root nodes
let mut phase_map: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
let mut queue: std::collections::VecDeque<String> = std::collections::VecDeque::new();
for node in chain {
if node.parent_node_ids.is_empty() {
let nid = node.node_id.clone();
if !nid.is_empty() {
phase_map.insert(nid.clone(), 0);
queue.push_back(nid);
}
}
}
while let Some(nid) = queue.pop_front() {
let parent_phase = phase_map.get(&nid).copied().unwrap_or(0);
for node in chain {
if node.parent_node_ids.contains(&nid) {
let child_id = node.node_id.clone();
if !child_id.is_empty() && !phase_map.contains_key(&child_id) {
phase_map.insert(child_id.clone(), parent_phase + 1);
queue.push_back(child_id);
}
}
}
}
// Assign phase 0 to any unassigned nodes
for node in chain {
let nid = node.node_id.clone();
if !nid.is_empty() && !phase_map.contains_key(&nid) {
phase_map.insert(nid, 0);
}
}
// Group nodes by phase
let max_phase = phase_map.values().copied().max().unwrap_or(0);
let phase_labels = [
"Reconnaissance",
"Enumeration",
"Exploitation",
"Validation",
"Post-Exploitation",
];
for phase_idx in 0..=max_phase {
let phase_nodes: Vec<&AttackChainNode> = chain
.iter()
.filter(|n| {
let nid = n.node_id.clone();
phase_map.get(&nid).copied().unwrap_or(0) == phase_idx
})
.collect();
if phase_nodes.is_empty() {
continue;
}
let label = if phase_idx < phase_labels.len() {
phase_labels[phase_idx]
} else {
"Additional Testing"
};
chain_html.push_str(&format!(
r#"<div class="phase-block">
<div class="phase-header">
<span class="phase-num">Phase {}</span>
<span class="phase-label">{}</span>
<span class="phase-count">{} step{}</span>
</div>
<div class="phase-steps">"#,
phase_idx + 1,
label,
phase_nodes.len(),
if phase_nodes.len() == 1 { "" } else { "s" },
));
for (i, node) in phase_nodes.iter().enumerate() {
let status_label = format!("{:?}", node.status);
let status_class = match status_label.to_lowercase().as_str() {
"completed" => "step-completed",
"failed" => "step-failed",
_ => "step-running",
};
let findings_badge = if !node.findings_produced.is_empty() {
format!(
r#"<span class="step-findings">{} finding{}</span>"#,
node.findings_produced.len(),
if node.findings_produced.len() == 1 {
""
} else {
"s"
},
)
} else {
String::new()
};
let risk_badge = node
.risk_score
.map(|r| {
let risk_class = if r >= 70 {
"risk-high"
} else if r >= 40 {
"risk-med"
} else {
"risk-low"
};
format!(r#"<span class="step-risk {risk_class}">Risk: {r}</span>"#)
})
.unwrap_or_default();
let reasoning_html = if node.llm_reasoning.is_empty() {
String::new()
} else {
format!(
r#"<div class="step-reasoning">{}</div>"#,
html_escape(&node.llm_reasoning)
)
};
// Render inline screenshot if this is a browser screenshot action
let screenshot_html = if node.tool_name == "browser" {
node.tool_output
.as_ref()
.and_then(|out| out.get("screenshot_base64"))
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.map(|b64| {
format!(
r#"<div class="step-screenshot"><img src="data:image/png;base64,{b64}" alt="Browser screenshot" style="max-width:100%;border:1px solid #e2e8f0;border-radius:6px;margin-top:8px;"/></div>"#
)
})
.unwrap_or_default()
} else {
String::new()
};
chain_html.push_str(&format!(
r#"<div class="step-row">
<div class="step-num">{num}</div>
<div class="step-connector"></div>
<div class="step-content">
<div class="step-header">
<span class="step-tool">{tool_name}</span>
<span class="step-status {status_class}">{status_label}</span>
{findings_badge}
{risk_badge}
</div>
{reasoning_html}
{screenshot_html}
</div>
</div>"#,
num = i + 1,
tool_name = html_escape(&node.tool_name),
));
}
chain_html.push_str("</div></div>");
}
chain_html
}

View File

@@ -0,0 +1,69 @@
use super::html_escape;
pub(super) fn cover(
target_name: &str,
session_id: &str,
date_short: &str,
target_url: &str,
requester_name: &str,
requester_email: &str,
app_screenshot_b64: Option<&str>,
) -> String {
let screenshot_html = app_screenshot_b64
.filter(|s| !s.is_empty())
.map(|b64| {
format!(
r#"<div style="margin: 20px auto; max-width: 560px; border: 1px solid #cbd5e1; border-radius: 8px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.08);">
<img src="data:image/png;base64,{b64}" alt="Application screenshot" style="width:100%;display:block;"/>
</div>"#
)
})
.unwrap_or_default();
format!(
r##"<!-- ═══════════════ COVER PAGE ═══════════════ -->
<div class="cover">
<svg class="cover-shield" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 96 96">
<defs>
<linearGradient id="sg" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" stop-color="#0d2137"/>
<stop offset="100%" stop-color="#1a56db"/>
</linearGradient>
</defs>
<path d="M48 6 L22 22 L22 48 C22 66 34 80 48 86 C62 80 74 66 74 48 L74 22 Z"
fill="none" stroke="url(#sg)" stroke-width="3.5" stroke-linejoin="round"/>
<path d="M48 12 L26 26 L26 47 C26 63 36 76 48 82 C60 76 70 63 70 47 L70 26 Z"
fill="url(#sg)" opacity="0.07"/>
<circle cx="44" cy="44" r="11" fill="none" stroke="#0d2137" stroke-width="2.5"/>
<line x1="52" y1="52" x2="62" y2="62" stroke="#0d2137" stroke-width="2.5" stroke-linecap="round"/>
<path d="M39 44 L42.5 47.5 L49 41" fill="none" stroke="#166534" stroke-width="2.5"
stroke-linecap="round" stroke-linejoin="round"/>
</svg>
<div class="cover-tag">CONFIDENTIAL</div>
<div class="cover-title">Penetration Test Report</div>
<div class="cover-subtitle">{target_name}</div>
<div class="cover-divider"></div>
<div class="cover-meta">
<strong>Report ID:</strong> {session_id}<br>
<strong>Date:</strong> {date_short}<br>
<strong>Target:</strong> {target_url}<br>
<strong>Prepared for:</strong> {requester_name} ({requester_email})
</div>
{screenshot_html}
<div class="cover-footer">
Compliance Scanner &mdash; AI-Powered Security Assessment Platform
</div>
</div>"##,
target_name = html_escape(target_name),
session_id = html_escape(session_id),
date_short = date_short,
target_url = html_escape(target_url),
requester_name = html_escape(requester_name),
requester_email = html_escape(requester_email),
)
}

View File

@@ -0,0 +1,238 @@
use super::html_escape;
use compliance_core::models::dast::DastFinding;
pub(super) fn executive_summary(
findings: &[DastFinding],
target_name: &str,
target_url: &str,
tool_count: usize,
tool_invocations: u32,
success_rate: f64,
) -> String {
let critical = findings
.iter()
.filter(|f| f.severity.to_string() == "critical")
.count();
let high = findings
.iter()
.filter(|f| f.severity.to_string() == "high")
.count();
let medium = findings
.iter()
.filter(|f| f.severity.to_string() == "medium")
.count();
let low = findings
.iter()
.filter(|f| f.severity.to_string() == "low")
.count();
let info = findings
.iter()
.filter(|f| f.severity.to_string() == "info")
.count();
let exploitable = findings.iter().filter(|f| f.exploitable).count();
let total = findings.len();
let overall_risk = if critical > 0 {
"CRITICAL"
} else if high > 0 {
"HIGH"
} else if medium > 0 {
"MEDIUM"
} else if low > 0 {
"LOW"
} else {
"INFORMATIONAL"
};
let risk_color = match overall_risk {
"CRITICAL" => "#991b1b",
"HIGH" => "#c2410c",
"MEDIUM" => "#a16207",
"LOW" => "#1d4ed8",
_ => "#4b5563",
};
let risk_score: usize =
std::cmp::min(100, critical * 25 + high * 15 + medium * 8 + low * 3 + info);
let severity_bar = build_severity_bar(critical, high, medium, low, info, total);
// Table of contents finding sub-entries
let severity_order = ["critical", "high", "medium", "low", "info"];
let toc_findings_sub = if !findings.is_empty() {
let mut sub = String::new();
let mut fnum = 0usize;
for &sev_key in severity_order.iter() {
let count = findings
.iter()
.filter(|f| f.severity.to_string() == sev_key)
.count();
if count == 0 {
continue;
}
for f in findings
.iter()
.filter(|f| f.severity.to_string() == sev_key)
{
fnum += 1;
sub.push_str(&format!(
r#"<div class="toc-sub">F-{:03} — {}</div>"#,
fnum,
html_escape(&f.title),
));
}
}
sub
} else {
String::new()
};
let critical_high_str = format!("{} / {}", critical, high);
let escaped_target_name = html_escape(target_name);
let escaped_target_url = html_escape(target_url);
format!(
r##"<!-- ═══════════════ TABLE OF CONTENTS ═══════════════ -->
<div class="report-body">
<div class="toc">
<h2>Table of Contents</h2>
<div class="toc-entry"><span class="toc-num">1</span><span class="toc-label">Executive Summary</span></div>
<div class="toc-entry"><span class="toc-num">2</span><span class="toc-label">Scope &amp; Methodology</span></div>
<div class="toc-entry"><span class="toc-num">3</span><span class="toc-label">Findings ({total_findings})</span></div>
{toc_findings_sub}
<div class="toc-entry"><span class="toc-num">4</span><span class="toc-label">Attack Chain Timeline</span></div>
<div class="toc-entry"><span class="toc-num">5</span><span class="toc-label">Appendix</span></div>
</div>
<!-- ═══════════════ 1. EXECUTIVE SUMMARY ═══════════════ -->
<h2><span class="section-num">1.</span> Executive Summary</h2>
<div class="risk-gauge">
<div class="risk-gauge-meter">
<div class="risk-gauge-track">
<div class="risk-gauge-fill" style="width: {risk_score}%; background: {risk_color};"></div>
</div>
<div class="risk-gauge-score" style="color: {risk_color};">{risk_score} / 100</div>
</div>
<div class="risk-gauge-text">
<div class="risk-gauge-label" style="color: {risk_color};">Overall Risk: {overall_risk}</div>
<div class="risk-gauge-desc">
Based on {total_findings} finding{findings_plural} identified across the target application.
</div>
</div>
</div>
<div class="exec-grid">
<div class="kpi-card">
<div class="kpi-value">{total_findings}</div>
<div class="kpi-label">Total Findings</div>
</div>
<div class="kpi-card">
<div class="kpi-value" style="color: var(--sev-critical);">{critical_high}</div>
<div class="kpi-label">Critical / High</div>
</div>
<div class="kpi-card">
<div class="kpi-value" style="color: var(--sev-critical);">{exploitable_count}</div>
<div class="kpi-label">Exploitable</div>
</div>
<div class="kpi-card">
<div class="kpi-value">{tool_count}</div>
<div class="kpi-label">Tools Used</div>
</div>
</div>
<h3>Severity Distribution</h3>
{severity_bar}
<p>
This report presents the results of an automated penetration test conducted against
<strong>{target_name}</strong> (<code>{target_url}</code>) using the Compliance Scanner
AI-powered testing engine. A total of <strong>{total_findings} vulnerabilities</strong> were
identified, of which <strong>{exploitable_count}</strong> were confirmed exploitable with
working proof-of-concept payloads. The assessment employed <strong>{tool_count} security tools</strong>
across <strong>{tool_invocations} invocations</strong> ({success_rate:.0}% success rate).
</p>"##,
total_findings = total,
findings_plural = if total == 1 { "" } else { "s" },
critical_high = critical_high_str,
exploitable_count = exploitable,
target_name = escaped_target_name,
target_url = escaped_target_url,
)
}
fn build_severity_bar(
critical: usize,
high: usize,
medium: usize,
low: usize,
info: usize,
total: usize,
) -> String {
if total == 0 {
return String::new();
}
let crit_pct = (critical as f64 / total as f64 * 100.0) as usize;
let high_pct = (high as f64 / total as f64 * 100.0) as usize;
let med_pct = (medium as f64 / total as f64 * 100.0) as usize;
let low_pct = (low as f64 / total as f64 * 100.0) as usize;
let info_pct = 100_usize.saturating_sub(crit_pct + high_pct + med_pct + low_pct);
let mut bar = String::from(r#"<div class="sev-bar">"#);
if critical > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-critical" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(crit_pct, 4),
critical
));
}
if high > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-high" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(high_pct, 4),
high
));
}
if medium > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-medium" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(med_pct, 4),
medium
));
}
if low > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-low" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(low_pct, 4),
low
));
}
if info > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-info" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(info_pct, 4),
info
));
}
bar.push_str("</div>");
bar.push_str(r#"<div class="sev-bar-legend">"#);
if critical > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#991b1b"></i> Critical</span>"#);
}
if high > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#c2410c"></i> High</span>"#);
}
if medium > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#a16207"></i> Medium</span>"#);
}
if low > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#1d4ed8"></i> Low</span>"#);
}
if info > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#4b5563"></i> Info</span>"#);
}
bar.push_str("</div>");
bar
}

View File

@@ -0,0 +1,522 @@
use super::html_escape;
use compliance_core::models::dast::DastFinding;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::CodeContextHint;
use compliance_core::models::sbom::SbomEntry;
/// Render the findings section with code-level correlation.
///
/// For each DAST finding, if a linked SAST finding exists (via `linked_sast_finding_id`)
/// or if we can match the endpoint to a code entry point, we render a "Code-Level
/// Remediation" block showing the exact file, line, code snippet, and suggested fix.
pub(super) fn findings(
findings_list: &[DastFinding],
sast_findings: &[Finding],
code_context: &[CodeContextHint],
sbom_entries: &[SbomEntry],
) -> String {
if findings_list.is_empty() {
return r#"<!-- ═══════════════ 3. FINDINGS ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">3.</span> Findings</h2>
<p style="color: var(--text-muted);">No vulnerabilities were identified during this assessment.</p>"#.to_string();
}
let severity_order = ["critical", "high", "medium", "low", "info"];
let severity_labels = ["Critical", "High", "Medium", "Low", "Informational"];
let severity_colors = ["#991b1b", "#c2410c", "#a16207", "#1d4ed8", "#4b5563"];
// Build SAST lookup by ObjectId hex string
let sast_by_id: std::collections::HashMap<String, &Finding> = sast_findings
.iter()
.filter_map(|f| {
let id = f.id.as_ref()?.to_hex();
Some((id, f))
})
.collect();
let mut findings_html = String::new();
let mut finding_num = 0usize;
for (si, &sev_key) in severity_order.iter().enumerate() {
let sev_findings: Vec<&DastFinding> = findings_list
.iter()
.filter(|f| f.severity.to_string() == sev_key)
.collect();
if sev_findings.is_empty() {
continue;
}
findings_html.push_str(&format!(
r#"<h4 class="sev-group-title" style="border-color: {color}">{label} ({count})</h4>"#,
color = severity_colors[si],
label = severity_labels[si],
count = sev_findings.len(),
));
for f in sev_findings {
finding_num += 1;
let sev_color = severity_colors[si];
let exploitable_badge = if f.exploitable {
r#"<span class="badge badge-exploit">EXPLOITABLE</span>"#
} else {
""
};
let cwe_cell = f
.cwe
.as_deref()
.map(|c| format!("<tr><td>CWE</td><td>{}</td></tr>", html_escape(c)))
.unwrap_or_default();
let param_row = f
.parameter
.as_deref()
.map(|p| {
format!(
"<tr><td>Parameter</td><td><code>{}</code></td></tr>",
html_escape(p)
)
})
.unwrap_or_default();
let remediation = f
.remediation
.as_deref()
.unwrap_or("Refer to industry best practices for this vulnerability class.");
let evidence_html = build_evidence_html(f);
// ── Code-level correlation ──────────────────────────────
let code_correlation =
build_code_correlation(f, &sast_by_id, code_context, sbom_entries);
findings_html.push_str(&format!(
r#"
<div class="finding" style="border-left-color: {sev_color}">
<div class="finding-header">
<span class="finding-id">F-{num:03}</span>
<span class="finding-title">{title}</span>
{exploitable_badge}
</div>
<table class="finding-meta">
<tr><td>Type</td><td>{vuln_type}</td></tr>
<tr><td>Endpoint</td><td><code>{method} {endpoint}</code></td></tr>
{param_row}
{cwe_cell}
</table>
<div class="finding-desc">{description}</div>
{evidence_html}
{code_correlation}
<div class="remediation">
<div class="remediation-label">Recommendation</div>
{remediation}
</div>
</div>
"#,
num = finding_num,
title = html_escape(&f.title),
vuln_type = f.vuln_type,
method = f.method,
endpoint = html_escape(&f.endpoint),
description = html_escape(&f.description),
));
}
}
format!(
r##"<!-- ═══════════════ 3. FINDINGS ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">3.</span> Findings</h2>
{findings_html}"##
)
}
/// Build the evidence table HTML for a finding.
fn build_evidence_html(f: &DastFinding) -> String {
if f.evidence.is_empty() {
return String::new();
}
let mut eh = String::from(
r#"<div class="evidence-block"><div class="evidence-title">Evidence</div><table class="evidence-table"><thead><tr><th>Request</th><th>Status</th><th>Details</th></tr></thead><tbody>"#,
);
for ev in &f.evidence {
let payload_info = ev
.payload
.as_deref()
.map(|p| {
format!(
"<br><span class=\"evidence-payload\">Payload: <code>{}</code></span>",
html_escape(p)
)
})
.unwrap_or_default();
eh.push_str(&format!(
"<tr><td><code>{} {}</code></td><td>{}</td><td>{}{}</td></tr>",
html_escape(&ev.request_method),
html_escape(&ev.request_url),
ev.response_status,
ev.response_snippet
.as_deref()
.map(html_escape)
.unwrap_or_default(),
payload_info,
));
}
eh.push_str("</tbody></table></div>");
eh
}
/// Build the code-level correlation block for a DAST finding.
///
/// Attempts correlation in priority order:
/// 1. Direct link via `linked_sast_finding_id` → shows exact file, line, snippet, suggested fix
/// 2. Endpoint match via code context → shows handler function, file, known SAST vulns
/// 3. CWE/CVE match to SBOM → shows vulnerable dependency + version to upgrade
fn build_code_correlation(
dast_finding: &DastFinding,
sast_by_id: &std::collections::HashMap<String, &Finding>,
code_context: &[CodeContextHint],
sbom_entries: &[SbomEntry],
) -> String {
let mut sections: Vec<String> = Vec::new();
// 1. Direct SAST link
if let Some(ref sast_id) = dast_finding.linked_sast_finding_id {
if let Some(sast) = sast_by_id.get(sast_id) {
let mut s = String::new();
s.push_str(r#"<div class="code-correlation-item">"#);
s.push_str(r#"<div class="code-correlation-badge">SAST Correlation</div>"#);
s.push_str("<table class=\"code-meta\">");
if let Some(ref fp) = sast.file_path {
let line_info = sast
.line_number
.map(|l| format!(":{l}"))
.unwrap_or_default();
s.push_str(&format!(
"<tr><td>Location</td><td><code>{}{}</code></td></tr>",
html_escape(fp),
line_info,
));
}
s.push_str(&format!(
"<tr><td>Scanner</td><td>{} &mdash; {}</td></tr>",
html_escape(&sast.scanner),
html_escape(&sast.title),
));
if let Some(ref cwe) = sast.cwe {
s.push_str(&format!(
"<tr><td>CWE</td><td>{}</td></tr>",
html_escape(cwe)
));
}
if let Some(ref rule) = sast.rule_id {
s.push_str(&format!(
"<tr><td>Rule</td><td><code>{}</code></td></tr>",
html_escape(rule)
));
}
s.push_str("</table>");
// Code snippet
if let Some(ref snippet) = sast.code_snippet {
if !snippet.is_empty() {
s.push_str(&format!(
"<div class=\"code-snippet-block\"><div class=\"code-snippet-label\">Vulnerable Code</div><pre class=\"code-snippet\">{}</pre></div>",
html_escape(snippet)
));
}
}
// Suggested fix
if let Some(ref fix) = sast.suggested_fix {
if !fix.is_empty() {
s.push_str(&format!(
"<div class=\"code-fix-block\"><div class=\"code-fix-label\">Suggested Fix</div><pre class=\"code-fix\">{}</pre></div>",
html_escape(fix)
));
}
}
// Remediation from SAST
if let Some(ref rem) = sast.remediation {
if !rem.is_empty() {
s.push_str(&format!(
"<div class=\"code-remediation\">{}</div>",
html_escape(rem)
));
}
}
s.push_str("</div>");
sections.push(s);
}
}
// 2. Endpoint match via code context
let endpoint_lower = dast_finding.endpoint.to_lowercase();
let matching_hints: Vec<&CodeContextHint> = code_context
.iter()
.filter(|hint| {
// Match by endpoint pattern overlap
let pattern_lower = hint.endpoint_pattern.to_lowercase();
endpoint_lower.contains(&pattern_lower)
|| pattern_lower.contains(&endpoint_lower)
|| hint.file_path.to_lowercase().contains(
&endpoint_lower
.split('/')
.next_back()
.unwrap_or("")
.replace(".html", "")
.replace(".php", ""),
)
})
.collect();
for hint in &matching_hints {
let mut s = String::new();
s.push_str(r#"<div class="code-correlation-item">"#);
s.push_str(r#"<div class="code-correlation-badge">Code Entry Point</div>"#);
s.push_str("<table class=\"code-meta\">");
s.push_str(&format!(
"<tr><td>Handler</td><td><code>{}</code></td></tr>",
html_escape(&hint.handler_function),
));
s.push_str(&format!(
"<tr><td>File</td><td><code>{}</code></td></tr>",
html_escape(&hint.file_path),
));
s.push_str(&format!(
"<tr><td>Route</td><td><code>{}</code></td></tr>",
html_escape(&hint.endpoint_pattern),
));
s.push_str("</table>");
if !hint.known_vulnerabilities.is_empty() {
s.push_str("<div class=\"code-linked-vulns\"><strong>Known SAST issues in this file:</strong><ul>");
for vuln in &hint.known_vulnerabilities {
s.push_str(&format!("<li>{}</li>", html_escape(vuln)));
}
s.push_str("</ul></div>");
}
s.push_str("</div>");
sections.push(s);
}
// 3. SBOM match — if a linked SAST finding has a CVE, or we can match by CWE
let linked_cve = dast_finding
.linked_sast_finding_id
.as_deref()
.and_then(|id| sast_by_id.get(id))
.and_then(|f| f.cve.as_deref());
if let Some(cve_id) = linked_cve {
let matching_deps: Vec<&SbomEntry> = sbom_entries
.iter()
.filter(|e| e.known_vulnerabilities.iter().any(|v| v.id == cve_id))
.collect();
for dep in &matching_deps {
let mut s = String::new();
s.push_str(r#"<div class="code-correlation-item">"#);
s.push_str(r#"<div class="code-correlation-badge">Vulnerable Dependency</div>"#);
s.push_str("<table class=\"code-meta\">");
s.push_str(&format!(
"<tr><td>Package</td><td><code>{} {}</code> ({})</td></tr>",
html_escape(&dep.name),
html_escape(&dep.version),
html_escape(&dep.package_manager),
));
let cve_ids: Vec<&str> = dep
.known_vulnerabilities
.iter()
.map(|v| v.id.as_str())
.collect();
s.push_str(&format!(
"<tr><td>CVEs</td><td>{}</td></tr>",
cve_ids.join(", "),
));
if let Some(ref purl) = dep.purl {
s.push_str(&format!(
"<tr><td>PURL</td><td><code>{}</code></td></tr>",
html_escape(purl),
));
}
s.push_str("</table>");
s.push_str(&format!(
"<div class=\"code-remediation\">Upgrade <code>{}</code> to the latest patched version to resolve {}.</div>",
html_escape(&dep.name),
html_escape(cve_id),
));
s.push_str("</div>");
sections.push(s);
}
}
if sections.is_empty() {
return String::new();
}
format!(
r#"<div class="code-correlation">
<div class="code-correlation-title">Code-Level Remediation</div>
{}
</div>"#,
sections.join("\n")
)
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::dast::{DastEvidence, DastVulnType};
use compliance_core::models::finding::Severity;
use compliance_core::models::scan::ScanType;
/// Helper: create a minimal `DastFinding`.
fn make_dast(title: &str, severity: Severity, endpoint: &str) -> DastFinding {
DastFinding::new(
"run1".into(),
"target1".into(),
DastVulnType::Xss,
title.into(),
"desc".into(),
severity,
endpoint.into(),
"GET".into(),
)
}
/// Helper: create a minimal SAST `Finding` with an ObjectId.
fn make_sast(title: &str) -> Finding {
let mut f = Finding::new(
"repo1".into(),
"fp1".into(),
"semgrep".into(),
ScanType::Sast,
title.into(),
"sast desc".into(),
Severity::High,
);
f.id = Some(mongodb::bson::oid::ObjectId::new());
f
}
#[test]
fn test_findings_empty() {
let result = findings(&[], &[], &[], &[]);
assert!(
result.contains("No vulnerabilities were identified"),
"Empty findings should contain the no-vulns message"
);
}
#[test]
fn test_findings_grouped_by_severity() {
let f_high = make_dast("High vuln", Severity::High, "/a");
let f_low = make_dast("Low vuln", Severity::Low, "/b");
let f_critical = make_dast("Crit vuln", Severity::Critical, "/c");
let result = findings(&[f_high, f_low, f_critical], &[], &[], &[]);
// All severity group headers should appear
assert!(
result.contains("Critical (1)"),
"should have Critical header"
);
assert!(result.contains("High (1)"), "should have High header");
assert!(result.contains("Low (1)"), "should have Low header");
// Critical should appear before High, High before Low
let crit_pos = result.find("Critical (1)");
let high_pos = result.find("High (1)");
let low_pos = result.find("Low (1)");
assert!(crit_pos < high_pos, "Critical should come before High");
assert!(high_pos < low_pos, "High should come before Low");
}
#[test]
fn test_code_correlation_sast_link() {
let mut sast = make_sast("SQL Injection in query");
sast.file_path = Some("src/db/query.rs".into());
sast.line_number = Some(42);
sast.code_snippet =
Some("let q = format!(\"SELECT * FROM {} WHERE id={}\", table, id);".into());
let sast_id = sast.id.as_ref().map(|oid| oid.to_hex()).unwrap_or_default();
let mut dast = make_dast("SQLi on /api/users", Severity::High, "/api/users");
dast.linked_sast_finding_id = Some(sast_id);
let result = findings(&[dast], &[sast], &[], &[]);
assert!(
result.contains("SAST Correlation"),
"should render SAST Correlation badge"
);
assert!(
result.contains("src/db/query.rs"),
"should contain the file path"
);
assert!(result.contains(":42"), "should contain the line number");
assert!(
result.contains("Vulnerable Code"),
"should render code snippet block"
);
}
#[test]
fn test_code_correlation_no_match() {
let dast = make_dast("XSS in search", Severity::Medium, "/search");
// No linked_sast_finding_id, no code context, no sbom
let result = findings(&[dast], &[], &[], &[]);
assert!(
!result.contains("code-correlation"),
"should not contain any code-correlation div"
);
}
#[test]
fn test_evidence_html_empty() {
let f = make_dast("No evidence", Severity::Low, "/x");
let result = build_evidence_html(&f);
assert!(result.is_empty(), "no evidence should yield empty string");
}
#[test]
fn test_evidence_html_with_entries() {
let mut f = make_dast("Has evidence", Severity::High, "/y");
f.evidence.push(DastEvidence {
request_method: "POST".into(),
request_url: "https://example.com/login".into(),
request_headers: None,
request_body: None,
response_status: 200,
response_headers: None,
response_snippet: Some("OK".into()),
screenshot_path: None,
payload: Some("<script>alert(1)</script>".into()),
response_time_ms: None,
});
let result = build_evidence_html(&f);
assert!(
result.contains("evidence-table"),
"should render the evidence table"
);
assert!(result.contains("POST"), "should contain request method");
assert!(
result.contains("https://example.com/login"),
"should contain request URL"
);
assert!(result.contains("200"), "should contain response status");
assert!(
result.contains("&lt;script&gt;alert(1)&lt;/script&gt;"),
"payload should be HTML-escaped"
);
}
}

View File

@@ -0,0 +1,518 @@
mod appendix;
mod attack_chain;
mod cover;
mod executive_summary;
mod findings;
mod scope;
mod styles;
use super::ReportContext;
#[allow(clippy::format_in_format_args)]
pub(super) fn build_html_report(ctx: &ReportContext) -> String {
let session = &ctx.session;
let session_id = session
.id
.map(|oid| oid.to_hex())
.unwrap_or_else(|| "-".to_string());
let date_str = session
.started_at
.format("%B %d, %Y at %H:%M UTC")
.to_string();
let date_short = session.started_at.format("%B %d, %Y").to_string();
let completed_str = session
.completed_at
.map(|d| d.format("%B %d, %Y at %H:%M UTC").to_string())
.unwrap_or_else(|| "In Progress".to_string());
// Collect unique tool names used
let tool_names: Vec<String> = {
let mut names: Vec<String> = ctx
.attack_chain
.iter()
.map(|n| n.tool_name.clone())
.collect();
names.sort();
names.dedup();
names
};
// Find the best app screenshot for the cover page:
// prefer the first navigate to the target URL that has a screenshot,
// falling back to any navigate with a screenshot
let app_screenshot: Option<String> = ctx
.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.filter_map(|n| {
n.tool_output
.as_ref()?
.get("screenshot_base64")?
.as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
})
// Skip the Keycloak login page screenshots — prefer one that shows the actual app
.find(|_| {
ctx.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.any(|n| {
n.tool_output
.as_ref()
.and_then(|o| o.get("title"))
.and_then(|t| t.as_str())
.is_some_and(|t| t.contains("Compliance") || t.contains("Dashboard"))
})
})
.or_else(|| {
// Fallback: any screenshot
ctx.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.filter_map(|n| {
n.tool_output
.as_ref()?
.get("screenshot_base64")?
.as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
})
.next()
});
let styles_html = styles::styles();
let cover_html = cover::cover(
&ctx.target_name,
&session_id,
&date_short,
&ctx.target_url,
&ctx.requester_name,
&ctx.requester_email,
app_screenshot.as_deref(),
);
let exec_html = executive_summary::executive_summary(
&ctx.findings,
&ctx.target_name,
&ctx.target_url,
tool_names.len(),
session.tool_invocations,
session.success_rate(),
);
let scope_html = scope::scope(
session,
&ctx.target_name,
&ctx.target_url,
&date_str,
&completed_str,
&tool_names,
ctx.config.as_ref(),
);
let findings_html = findings::findings(
&ctx.findings,
&ctx.sast_findings,
&ctx.code_context,
&ctx.sbom_entries,
);
let chain_html = attack_chain::attack_chain(&ctx.attack_chain);
let appendix_html = appendix::appendix(&session_id);
format!(
r#"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Penetration Test Report — {target_name}</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Libre+Baskerville:ital,wght@0,400;0,700;1,400&family=Source+Sans+3:ital,wght@0,300;0,400;0,600;0,700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
{styles_html}
</head>
<body>
{cover_html}
{exec_html}
{scope_html}
{findings_html}
{chain_html}
{appendix_html}
"#,
target_name = html_escape(&ctx.target_name),
)
}
fn tool_category(tool_name: &str) -> &'static str {
let name = tool_name.to_lowercase();
if name.contains("nmap") || name.contains("port") {
return "Network Reconnaissance";
}
if name.contains("nikto") || name.contains("header") {
return "Web Server Analysis";
}
if name.contains("zap") || name.contains("spider") || name.contains("crawl") {
return "Web Application Scanning";
}
if name.contains("sqlmap") || name.contains("sqli") || name.contains("sql") {
return "SQL Injection Testing";
}
if name.contains("xss") || name.contains("cross-site") {
return "Cross-Site Scripting Testing";
}
if name.contains("dir")
|| name.contains("brute")
|| name.contains("fuzz")
|| name.contains("gobuster")
{
return "Directory Enumeration";
}
if name.contains("ssl") || name.contains("tls") || name.contains("cert") {
return "SSL/TLS Analysis";
}
if name.contains("api") || name.contains("endpoint") {
return "API Security Testing";
}
if name.contains("auth") || name.contains("login") || name.contains("credential") {
return "Authentication Testing";
}
if name.contains("cors") {
return "CORS Testing";
}
if name.contains("csrf") {
return "CSRF Testing";
}
if name.contains("nuclei") || name.contains("template") {
return "Vulnerability Scanning";
}
if name.contains("whatweb") || name.contains("tech") || name.contains("wappalyzer") {
return "Technology Fingerprinting";
}
"Security Testing"
}
fn html_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::dast::{DastFinding, DastVulnType};
use compliance_core::models::finding::Severity;
use compliance_core::models::pentest::{
AttackChainNode, AttackNodeStatus, PentestSession, PentestStrategy,
};
// ── html_escape ──────────────────────────────────────────────────
#[test]
fn html_escape_handles_ampersand() {
assert_eq!(html_escape("a & b"), "a &amp; b");
}
#[test]
fn html_escape_handles_angle_brackets() {
assert_eq!(html_escape("<script>"), "&lt;script&gt;");
}
#[test]
fn html_escape_handles_quotes() {
assert_eq!(html_escape(r#"key="val""#), "key=&quot;val&quot;");
}
#[test]
fn html_escape_handles_all_special_chars() {
assert_eq!(
html_escape(r#"<a href="x">&y</a>"#),
"&lt;a href=&quot;x&quot;&gt;&amp;y&lt;/a&gt;"
);
}
#[test]
fn html_escape_no_change_for_plain_text() {
assert_eq!(html_escape("hello world"), "hello world");
}
#[test]
fn html_escape_empty_string() {
assert_eq!(html_escape(""), "");
}
// ── tool_category ────────────────────────────────────────────────
#[test]
fn tool_category_nmap() {
assert_eq!(tool_category("nmap_scan"), "Network Reconnaissance");
}
#[test]
fn tool_category_port_scanner() {
assert_eq!(tool_category("port_scanner"), "Network Reconnaissance");
}
#[test]
fn tool_category_nikto() {
assert_eq!(tool_category("nikto"), "Web Server Analysis");
}
#[test]
fn tool_category_header_check() {
assert_eq!(
tool_category("security_header_check"),
"Web Server Analysis"
);
}
#[test]
fn tool_category_zap_spider() {
assert_eq!(tool_category("zap_spider"), "Web Application Scanning");
}
#[test]
fn tool_category_sqlmap() {
assert_eq!(tool_category("sqlmap"), "SQL Injection Testing");
}
#[test]
fn tool_category_xss_scanner() {
assert_eq!(tool_category("xss_scanner"), "Cross-Site Scripting Testing");
}
#[test]
fn tool_category_dir_bruteforce() {
assert_eq!(tool_category("dir_bruteforce"), "Directory Enumeration");
}
#[test]
fn tool_category_gobuster() {
assert_eq!(tool_category("gobuster"), "Directory Enumeration");
}
#[test]
fn tool_category_ssl_check() {
assert_eq!(tool_category("ssl_check"), "SSL/TLS Analysis");
}
#[test]
fn tool_category_tls_scan() {
assert_eq!(tool_category("tls_scan"), "SSL/TLS Analysis");
}
#[test]
fn tool_category_api_test() {
assert_eq!(tool_category("api_endpoint_test"), "API Security Testing");
}
#[test]
fn tool_category_auth_bypass() {
assert_eq!(tool_category("auth_bypass_check"), "Authentication Testing");
}
#[test]
fn tool_category_cors() {
assert_eq!(tool_category("cors_check"), "CORS Testing");
}
#[test]
fn tool_category_csrf() {
assert_eq!(tool_category("csrf_scanner"), "CSRF Testing");
}
#[test]
fn tool_category_nuclei() {
assert_eq!(tool_category("nuclei"), "Vulnerability Scanning");
}
#[test]
fn tool_category_whatweb() {
assert_eq!(tool_category("whatweb"), "Technology Fingerprinting");
}
#[test]
fn tool_category_unknown_defaults_to_security_testing() {
assert_eq!(tool_category("custom_tool"), "Security Testing");
}
#[test]
fn tool_category_is_case_insensitive() {
assert_eq!(tool_category("NMAP_Scanner"), "Network Reconnaissance");
assert_eq!(tool_category("SQLMap"), "SQL Injection Testing");
}
// ── build_html_report ────────────────────────────────────────────
fn make_session(strategy: PentestStrategy) -> PentestSession {
let mut s = PentestSession::new("target-1".into(), strategy);
s.tool_invocations = 5;
s.tool_successes = 4;
s.findings_count = 2;
s.exploitable_count = 1;
s
}
fn make_finding(severity: Severity, title: &str, exploitable: bool) -> DastFinding {
let mut f = DastFinding::new(
"run-1".into(),
"target-1".into(),
DastVulnType::Xss,
title.into(),
"description".into(),
severity,
"https://example.com/test".into(),
"GET".into(),
);
f.exploitable = exploitable;
f
}
fn make_attack_node(tool_name: &str) -> AttackChainNode {
let mut node = AttackChainNode::new(
"session-1".into(),
"node-1".into(),
tool_name.into(),
serde_json::json!({}),
"Testing this tool".into(),
);
node.status = AttackNodeStatus::Completed;
node
}
fn make_report_context(
findings: Vec<DastFinding>,
chain: Vec<AttackChainNode>,
) -> ReportContext {
ReportContext {
session: make_session(PentestStrategy::Comprehensive),
target_name: "Test App".into(),
target_url: "https://example.com".into(),
findings,
attack_chain: chain,
requester_name: "Alice".into(),
requester_email: "alice@example.com".into(),
config: None,
sast_findings: Vec::new(),
sbom_entries: Vec::new(),
code_context: Vec::new(),
}
}
#[test]
fn report_contains_target_info() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("Test App"));
assert!(html.contains("https://example.com"));
}
#[test]
fn report_contains_requester_info() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("Alice"));
assert!(html.contains("alice@example.com"));
}
#[test]
fn report_shows_informational_risk_when_no_findings() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("INFORMATIONAL"));
}
#[test]
fn report_shows_critical_risk_with_critical_finding() {
let findings = vec![make_finding(Severity::Critical, "Critical XSS", true)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("CRITICAL"));
}
#[test]
fn report_shows_high_risk_without_critical() {
let findings = vec![make_finding(Severity::High, "High SQLi", false)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
// Should show HIGH, not CRITICAL
assert!(html.contains("HIGH"));
}
#[test]
fn report_shows_medium_risk_level() {
let findings = vec![make_finding(Severity::Medium, "Medium Issue", false)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("MEDIUM"));
}
#[test]
fn report_includes_finding_title() {
let findings = vec![make_finding(
Severity::High,
"Reflected XSS in /search",
true,
)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("Reflected XSS in /search"));
}
#[test]
fn report_shows_exploitable_badge() {
let findings = vec![make_finding(Severity::Critical, "SQLi", true)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
// The report should mark exploitable findings
assert!(html.contains("EXPLOITABLE"));
}
#[test]
fn report_includes_attack_chain_tool_names() {
let chain = vec![make_attack_node("nmap_scan"), make_attack_node("sqlmap")];
let ctx = make_report_context(vec![], chain);
let html = build_html_report(&ctx);
assert!(html.contains("nmap_scan"));
assert!(html.contains("sqlmap"));
}
#[test]
fn report_is_valid_html_structure() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("<!DOCTYPE html>") || html.contains("<html"));
assert!(html.contains("</html>"));
}
#[test]
fn report_strategy_appears() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
// PentestStrategy::Comprehensive => "comprehensive"
assert!(html.contains("comprehensive") || html.contains("Comprehensive"));
}
#[test]
fn report_finding_count_is_correct() {
let findings = vec![
make_finding(Severity::Critical, "F1", true),
make_finding(Severity::High, "F2", false),
make_finding(Severity::Low, "F3", false),
];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
// The total count "3" should appear somewhere
assert!(
html.contains(">3<")
|| html.contains(">3 ")
|| html.contains("3 findings")
|| html.contains("3 Total")
);
}
}

View File

@@ -0,0 +1,127 @@
use super::{html_escape, tool_category};
use compliance_core::models::pentest::{AuthMode, PentestConfig, PentestSession};
pub(super) fn scope(
session: &PentestSession,
target_name: &str,
target_url: &str,
date_str: &str,
completed_str: &str,
tool_names: &[String],
config: Option<&PentestConfig>,
) -> String {
let tools_table: String = tool_names
.iter()
.enumerate()
.map(|(i, t)| {
let category = tool_category(t);
format!(
"<tr><td>{}</td><td><code>{}</code></td><td>{}</td></tr>",
i + 1,
html_escape(t),
category,
)
})
.collect::<Vec<_>>()
.join("\n");
let engagement_config_section = if let Some(cfg) = config {
let mut rows = String::new();
rows.push_str(&format!(
"<tr><td>Environment</td><td>{}</td></tr>",
html_escape(&cfg.environment.to_string())
));
if let Some(ref app_type) = cfg.app_type {
rows.push_str(&format!(
"<tr><td>Application Type</td><td>{}</td></tr>",
html_escape(app_type)
));
}
let auth_mode = match cfg.auth.mode {
AuthMode::None => "No authentication",
AuthMode::Manual => "Manual credentials",
AuthMode::AutoRegister => "Auto-register",
};
rows.push_str(&format!("<tr><td>Auth Mode</td><td>{auth_mode}</td></tr>"));
if !cfg.scope_exclusions.is_empty() {
let excl = cfg
.scope_exclusions
.iter()
.map(|s| html_escape(s))
.collect::<Vec<_>>()
.join(", ");
rows.push_str(&format!(
"<tr><td>Scope Exclusions</td><td><code>{excl}</code></td></tr>"
));
}
if !cfg.tester.name.is_empty() {
rows.push_str(&format!(
"<tr><td>Tester</td><td>{} ({})</td></tr>",
html_escape(&cfg.tester.name),
html_escape(&cfg.tester.email)
));
}
if let Some(ref ts) = cfg.disclaimer_accepted_at {
rows.push_str(&format!(
"<tr><td>Disclaimer Accepted</td><td>{}</td></tr>",
ts.format("%B %d, %Y at %H:%M UTC")
));
}
if let Some(ref branch) = cfg.branch {
rows.push_str(&format!(
"<tr><td>Git Branch</td><td>{}</td></tr>",
html_escape(branch)
));
}
if let Some(ref commit) = cfg.commit_hash {
rows.push_str(&format!(
"<tr><td>Git Commit</td><td><code>{}</code></td></tr>",
html_escape(commit)
));
}
format!("<h3>Engagement Configuration</h3>\n<table class=\"info\">\n{rows}\n</table>")
} else {
String::new()
};
format!(
r##"
<!-- ═══════════════ 2. SCOPE & METHODOLOGY ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">2.</span> Scope &amp; Methodology</h2>
<p>
The assessment was performed using an AI-driven orchestrator that autonomously selects and
executes security testing tools based on the target's attack surface, technology stack, and
any available static analysis (SAST) findings and SBOM data.
</p>
<h3>Engagement Details</h3>
<table class="info">
<tr><td>Target</td><td><strong>{target_name}</strong></td></tr>
<tr><td>URL</td><td><code>{target_url}</code></td></tr>
<tr><td>Strategy</td><td>{strategy}</td></tr>
<tr><td>Status</td><td>{status}</td></tr>
<tr><td>Started</td><td>{date_str}</td></tr>
<tr><td>Completed</td><td>{completed_str}</td></tr>
<tr><td>Tool Invocations</td><td>{tool_invocations} ({tool_successes} successful, {success_rate:.1}% success rate)</td></tr>
</table>
{engagement_config_section}
<h3>Tools Employed</h3>
<table class="tools-table">
<thead><tr><th>#</th><th>Tool</th><th>Category</th></tr></thead>
<tbody>{tools_table}</tbody>
</table>"##,
target_name = html_escape(target_name),
target_url = html_escape(target_url),
strategy = session.strategy,
status = session.status,
date_str = date_str,
completed_str = completed_str,
tool_invocations = session.tool_invocations,
tool_successes = session.tool_successes,
success_rate = session.success_rate(),
)
}

View File

@@ -0,0 +1,889 @@
pub(super) fn styles() -> String {
r##"<style>
/* ──────────────── Base / Print-first ──────────────── */
@page {
size: A4;
margin: 20mm 18mm 25mm 18mm;
}
@page :first {
margin: 0;
}
*, *::before, *::after { margin: 0; padding: 0; box-sizing: border-box; }
:root {
--text: #1a1a2e;
--text-secondary: #475569;
--text-muted: #64748b;
--heading: #0d2137;
--accent: #1a56db;
--accent-light: #dbeafe;
--border: #d1d5db;
--border-light: #e5e7eb;
--bg-subtle: #f8fafc;
--bg-section: #f1f5f9;
--sev-critical: #991b1b;
--sev-high: #c2410c;
--sev-medium: #a16207;
--sev-low: #1d4ed8;
--sev-info: #4b5563;
--font-serif: 'Libre Baskerville', 'Georgia', serif;
--font-sans: 'Source Sans 3', 'Helvetica Neue', sans-serif;
--font-mono: 'JetBrains Mono', 'Consolas', monospace;
}
body {
font-family: var(--font-sans);
color: var(--text);
background: #fff;
line-height: 1.65;
font-size: 10.5pt;
-webkit-print-color-adjust: exact;
print-color-adjust: exact;
}
.report-body {
max-width: 190mm;
margin: 0 auto;
padding: 0 16px;
}
/* ──────────────── Cover Page ──────────────── */
.cover {
height: 100vh;
min-height: 297mm;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
text-align: center;
padding: 40mm 30mm;
page-break-after: always;
break-after: page;
position: relative;
background: #fff;
}
.cover-shield {
width: 72px;
height: 72px;
margin-bottom: 32px;
}
.cover-tag {
display: inline-block;
background: var(--sev-critical);
color: #fff;
font-family: var(--font-sans);
font-size: 8pt;
font-weight: 700;
letter-spacing: 0.15em;
text-transform: uppercase;
padding: 4px 16px;
border-radius: 2px;
margin-bottom: 28px;
}
.cover-title {
font-family: var(--font-serif);
font-size: 28pt;
font-weight: 700;
color: var(--heading);
line-height: 1.2;
margin-bottom: 8px;
}
.cover-subtitle {
font-family: var(--font-serif);
font-size: 14pt;
color: var(--text-secondary);
font-weight: 400;
font-style: italic;
margin-bottom: 48px;
}
.cover-meta {
font-size: 10pt;
color: var(--text-secondary);
line-height: 2;
}
.cover-meta strong {
color: var(--text);
}
.cover-divider {
width: 60px;
height: 2px;
background: var(--accent);
margin: 24px auto;
}
.cover-footer {
position: absolute;
bottom: 30mm;
left: 0;
right: 0;
text-align: center;
font-size: 8pt;
color: var(--text-muted);
letter-spacing: 0.05em;
}
/* ──────────────── Typography ──────────────── */
h2 {
font-family: var(--font-serif);
font-size: 16pt;
font-weight: 700;
color: var(--heading);
margin: 36px 0 16px;
padding-bottom: 8px;
border-bottom: 2px solid var(--heading);
page-break-after: avoid;
break-after: avoid;
}
h3 {
font-family: var(--font-serif);
font-size: 12pt;
font-weight: 700;
color: var(--heading);
margin: 24px 0 10px;
page-break-after: avoid;
break-after: avoid;
}
h4 {
font-family: var(--font-sans);
font-size: 10pt;
font-weight: 700;
color: var(--text-secondary);
margin: 16px 0 8px;
}
p {
margin: 8px 0;
font-size: 10.5pt;
}
code {
font-family: var(--font-mono);
font-size: 9pt;
background: var(--bg-section);
padding: 1px 5px;
border-radius: 3px;
border: 1px solid var(--border-light);
word-break: break-all;
}
/* ──────────────── Section Numbers ──────────────── */
.section-num {
color: var(--accent);
margin-right: 8px;
}
/* ──────────────── Table of Contents ──────────────── */
.toc {
page-break-after: always;
break-after: page;
padding-top: 24px;
}
.toc h2 {
border-bottom-color: var(--accent);
margin-top: 0;
}
.toc-entry {
display: flex;
justify-content: space-between;
align-items: baseline;
padding: 6px 0;
border-bottom: 1px dotted var(--border);
font-size: 11pt;
}
.toc-entry .toc-num {
font-weight: 700;
color: var(--accent);
min-width: 24px;
margin-right: 10px;
}
.toc-entry .toc-label {
flex: 1;
font-weight: 600;
color: var(--heading);
}
.toc-sub {
padding: 3px 0 3px 34px;
font-size: 9.5pt;
color: var(--text-secondary);
}
/* ──────────────── Executive Summary ──────────────── */
.exec-grid {
display: grid;
grid-template-columns: 1fr 1fr 1fr 1fr;
gap: 12px;
margin: 16px 0 20px;
}
.kpi-card {
border: 1px solid var(--border);
border-radius: 6px;
padding: 14px 12px;
text-align: center;
background: var(--bg-subtle);
}
.kpi-value {
font-family: var(--font-serif);
font-size: 22pt;
font-weight: 700;
line-height: 1.1;
}
.kpi-label {
font-size: 8pt;
text-transform: uppercase;
letter-spacing: 0.08em;
color: var(--text-muted);
margin-top: 4px;
font-weight: 600;
}
/* Risk gauge */
.risk-gauge {
display: flex;
align-items: center;
gap: 16px;
padding: 16px 20px;
border: 1px solid var(--border);
border-radius: 6px;
background: var(--bg-subtle);
margin: 16px 0;
}
.risk-gauge-meter {
width: 140px;
flex-shrink: 0;
}
.risk-gauge-track {
height: 10px;
background: var(--border-light);
border-radius: 5px;
overflow: hidden;
position: relative;
}
.risk-gauge-fill {
height: 100%;
border-radius: 5px;
transition: width 0.3s;
}
.risk-gauge-score {
font-family: var(--font-serif);
font-size: 9pt;
font-weight: 700;
text-align: center;
margin-top: 3px;
}
.risk-gauge-text {
flex: 1;
}
.risk-gauge-label {
font-family: var(--font-serif);
font-size: 14pt;
font-weight: 700;
}
.risk-gauge-desc {
font-size: 9.5pt;
color: var(--text-secondary);
margin-top: 2px;
}
/* Severity bar */
.sev-bar {
display: flex;
height: 28px;
border-radius: 4px;
overflow: hidden;
margin: 12px 0 6px;
border: 1px solid var(--border);
}
.sev-bar-seg {
display: flex;
align-items: center;
justify-content: center;
color: #fff;
font-size: 8.5pt;
font-weight: 700;
min-width: 24px;
}
.sev-bar-critical { background: var(--sev-critical); }
.sev-bar-high { background: var(--sev-high); }
.sev-bar-medium { background: var(--sev-medium); }
.sev-bar-low { background: var(--sev-low); }
.sev-bar-info { background: var(--sev-info); }
.sev-bar-legend {
display: flex;
gap: 16px;
font-size: 8.5pt;
color: var(--text-secondary);
margin-bottom: 16px;
}
.sev-dot {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 2px;
margin-right: 4px;
vertical-align: middle;
}
/* ──────────────── Info Tables ──────────────── */
table.info {
width: 100%;
border-collapse: collapse;
margin: 10px 0;
font-size: 10pt;
}
table.info td,
table.info th {
padding: 7px 12px;
border: 1px solid var(--border);
text-align: left;
vertical-align: top;
}
table.info td:first-child,
table.info th:first-child {
width: 160px;
font-weight: 600;
color: var(--text-secondary);
background: var(--bg-subtle);
}
/* Methodology tools table */
table.tools-table {
width: 100%;
border-collapse: collapse;
margin: 10px 0;
font-size: 10pt;
}
table.tools-table th {
background: var(--heading);
color: #fff;
padding: 8px 12px;
text-align: left;
font-weight: 600;
font-size: 9pt;
text-transform: uppercase;
letter-spacing: 0.04em;
}
table.tools-table td {
padding: 6px 12px;
border-bottom: 1px solid var(--border-light);
}
table.tools-table tr:nth-child(even) td {
background: var(--bg-subtle);
}
table.tools-table td:first-child {
width: 32px;
text-align: center;
color: var(--text-muted);
font-weight: 600;
}
/* ──────────────── Badges ──────────────── */
.badge {
display: inline-block;
padding: 2px 8px;
border-radius: 3px;
font-size: 7.5pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.03em;
vertical-align: middle;
}
.badge-exploit {
background: var(--sev-critical);
color: #fff;
}
/* ──────────────── Findings ──────────────── */
.sev-group-title {
font-family: var(--font-sans);
font-size: 11pt;
font-weight: 700;
color: var(--heading);
padding: 8px 0 6px 12px;
margin: 20px 0 8px;
border-left: 4px solid;
page-break-after: avoid;
break-after: avoid;
}
.finding {
border: 1px solid var(--border);
border-left: 4px solid;
border-radius: 0 4px 4px 0;
padding: 14px 16px;
margin-bottom: 12px;
background: #fff;
page-break-inside: avoid;
break-inside: avoid;
}
.finding-header {
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 8px;
}
.finding-id {
font-family: var(--font-mono);
font-size: 9pt;
font-weight: 500;
color: var(--text-muted);
background: var(--bg-section);
padding: 2px 6px;
border-radius: 3px;
border: 1px solid var(--border-light);
}
.finding-title {
font-family: var(--font-serif);
font-weight: 700;
font-size: 11pt;
flex: 1;
color: var(--heading);
}
.finding-meta {
border-collapse: collapse;
margin: 6px 0;
font-size: 9.5pt;
width: 100%;
}
.finding-meta td {
padding: 3px 10px 3px 0;
vertical-align: top;
}
.finding-meta td:first-child {
color: var(--text-muted);
font-weight: 600;
width: 90px;
white-space: nowrap;
}
.finding-desc {
margin: 8px 0;
font-size: 10pt;
color: var(--text);
line-height: 1.6;
}
.remediation {
margin-top: 10px;
padding: 10px 14px;
background: var(--accent-light);
border-left: 3px solid var(--accent);
border-radius: 0 4px 4px 0;
font-size: 9.5pt;
line-height: 1.55;
}
.remediation-label {
font-weight: 700;
font-size: 8.5pt;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--accent);
margin-bottom: 3px;
}
.evidence-block {
margin: 10px 0;
page-break-inside: avoid;
break-inside: avoid;
}
.evidence-title {
font-weight: 700;
font-size: 8.5pt;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--text-muted);
margin-bottom: 4px;
}
.evidence-table {
width: 100%;
border-collapse: collapse;
font-size: 9pt;
}
.evidence-table th {
background: var(--bg-section);
padding: 5px 8px;
text-align: left;
font-weight: 600;
font-size: 8.5pt;
text-transform: uppercase;
letter-spacing: 0.03em;
color: var(--text-secondary);
border: 1px solid var(--border-light);
}
.evidence-table td {
padding: 5px 8px;
border: 1px solid var(--border-light);
vertical-align: top;
word-break: break-word;
}
.evidence-payload {
font-size: 8.5pt;
color: var(--sev-critical);
}
.linked-sast {
font-size: 9pt;
color: var(--text-muted);
margin: 6px 0;
font-style: italic;
}
/* ──────────────── Code-Level Correlation ──────────────── */
.code-correlation {
margin: 12px 0;
border: 1px solid #e2e8f0;
border-radius: 6px;
overflow: hidden;
}
.code-correlation-title {
background: #1e293b;
color: #f8fafc;
padding: 6px 12px;
font-size: 9pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.code-correlation-item {
padding: 10px 12px;
border-bottom: 1px solid #e2e8f0;
}
.code-correlation-item:last-child { border-bottom: none; }
.code-correlation-badge {
display: inline-block;
background: #3b82f6;
color: #fff;
font-size: 7pt;
font-weight: 600;
padding: 2px 8px;
border-radius: 3px;
text-transform: uppercase;
letter-spacing: 0.04em;
margin-bottom: 6px;
}
.code-meta {
width: 100%;
font-size: 8.5pt;
border-collapse: collapse;
margin-bottom: 6px;
}
.code-meta td:first-child {
width: 80px;
font-weight: 600;
color: var(--text-muted);
padding: 2px 8px 2px 0;
vertical-align: top;
}
.code-meta td:last-child {
padding: 2px 0;
}
.code-snippet-block, .code-fix-block {
margin: 6px 0;
}
.code-snippet-label, .code-fix-label {
font-size: 7.5pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.04em;
color: var(--text-muted);
margin-bottom: 3px;
}
.code-snippet-label { color: #dc2626; }
.code-fix-label { color: #16a34a; }
.code-snippet {
background: #fef2f2;
border: 1px solid #fecaca;
border-left: 3px solid #dc2626;
padding: 8px 10px;
font-family: 'JetBrains Mono', monospace;
font-size: 8pt;
line-height: 1.5;
white-space: pre-wrap;
word-break: break-word;
border-radius: 0 4px 4px 0;
margin: 0;
}
.code-fix {
background: #f0fdf4;
border: 1px solid #bbf7d0;
border-left: 3px solid #16a34a;
padding: 8px 10px;
font-family: 'JetBrains Mono', monospace;
font-size: 8pt;
line-height: 1.5;
white-space: pre-wrap;
word-break: break-word;
border-radius: 0 4px 4px 0;
margin: 0;
}
.code-remediation {
font-size: 8.5pt;
color: var(--text-secondary);
margin-top: 4px;
padding: 4px 0;
}
.code-linked-vulns {
font-size: 8.5pt;
margin-top: 4px;
}
.code-linked-vulns ul {
margin: 2px 0 0 16px;
padding: 0;
}
.code-linked-vulns li {
margin-bottom: 2px;
}
/* ──────────────── Attack Chain ──────────────── */
.phase-block {
margin-bottom: 20px;
page-break-inside: avoid;
break-inside: avoid;
}
.phase-header {
display: flex;
align-items: center;
gap: 10px;
padding: 8px 14px;
background: var(--heading);
color: #fff;
border-radius: 4px 4px 0 0;
font-size: 9.5pt;
}
.phase-num {
font-weight: 700;
font-size: 8pt;
text-transform: uppercase;
letter-spacing: 0.1em;
background: rgba(255,255,255,0.15);
padding: 2px 8px;
border-radius: 3px;
}
.phase-label {
font-weight: 600;
flex: 1;
}
.phase-count {
font-size: 8.5pt;
opacity: 0.7;
}
.phase-steps {
border: 1px solid var(--border);
border-top: none;
border-radius: 0 0 4px 4px;
}
.step-row {
display: flex;
align-items: flex-start;
gap: 10px;
padding: 8px 14px;
border-bottom: 1px solid var(--border-light);
position: relative;
}
.step-row:last-child {
border-bottom: none;
}
.step-num {
width: 22px;
height: 22px;
border-radius: 50%;
background: var(--bg-section);
border: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: center;
font-size: 8pt;
font-weight: 700;
color: var(--text-secondary);
flex-shrink: 0;
margin-top: 1px;
}
.step-connector {
display: none;
}
.step-content {
flex: 1;
min-width: 0;
}
.step-header {
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
}
.step-tool {
font-family: var(--font-mono);
font-size: 9.5pt;
font-weight: 500;
color: var(--heading);
}
.step-status {
font-size: 7.5pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.04em;
padding: 1px 7px;
border-radius: 3px;
}
.step-completed { background: #dcfce7; color: #166534; }
.step-failed { background: #fef2f2; color: #991b1b; }
.step-running { background: #fef9c3; color: #854d0e; }
.step-findings {
font-size: 8pt;
font-weight: 600;
color: var(--sev-high);
background: #fff7ed;
padding: 1px 7px;
border-radius: 3px;
border: 1px solid #fed7aa;
}
.step-risk {
font-size: 7.5pt;
font-weight: 700;
padding: 1px 6px;
border-radius: 3px;
}
.risk-high { background: #fef2f2; color: var(--sev-critical); border: 1px solid #fecaca; }
.risk-med { background: #fffbeb; color: var(--sev-medium); border: 1px solid #fde68a; }
.risk-low { background: #f0fdf4; color: #166534; border: 1px solid #bbf7d0; }
.step-reasoning {
font-size: 9pt;
color: var(--text-muted);
margin-top: 3px;
line-height: 1.5;
font-style: italic;
}
/* ──────────────── Footer ──────────────── */
.report-footer {
margin-top: 48px;
padding-top: 14px;
border-top: 2px solid var(--heading);
font-size: 8pt;
color: var(--text-muted);
text-align: center;
line-height: 1.8;
}
.report-footer .footer-company {
font-weight: 700;
color: var(--text-secondary);
}
/* ──────────────── Page Break Utilities ──────────────── */
.page-break {
page-break-before: always;
break-before: page;
}
.avoid-break {
page-break-inside: avoid;
break-inside: avoid;
}
/* ──────────────── Print Overrides ──────────────── */
@media print {
body {
font-size: 10pt;
}
.cover {
height: auto;
min-height: 250mm;
padding: 50mm 20mm;
}
.report-body {
padding: 0;
}
.no-print {
display: none !important;
}
a {
color: var(--accent);
text-decoration: none;
}
}
/* ──────────────── Screen Enhancements ──────────────── */
@media screen {
body {
background: #e2e8f0;
}
.cover {
background: #fff;
box-shadow: 0 1px 4px rgba(0,0,0,0.08);
}
.report-body {
background: #fff;
padding: 20px 32px 40px;
box-shadow: 0 1px 4px rgba(0,0,0,0.08);
margin-bottom: 40px;
}
}
</style>"##
.to_string()
}

View File

@@ -3,7 +3,11 @@ mod html;
mod pdf;
use compliance_core::models::dast::DastFinding;
use compliance_core::models::pentest::{AttackChainNode, PentestSession};
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::{
AttackChainNode, CodeContextHint, PentestConfig, PentestSession,
};
use compliance_core::models::sbom::SbomEntry;
use sha2::{Digest, Sha256};
/// Report archive with metadata
@@ -23,6 +27,13 @@ pub struct ReportContext {
pub attack_chain: Vec<AttackChainNode>,
pub requester_name: String,
pub requester_email: String,
pub config: Option<PentestConfig>,
/// SAST findings for the linked repository (for code-level correlation)
pub sast_findings: Vec<Finding>,
/// Vulnerable dependencies from SBOM
pub sbom_entries: Vec<SbomEntry>,
/// Code knowledge graph entry points linked to SAST findings
pub code_context: Vec<CodeContextHint>,
}
/// Generate a password-protected ZIP archive containing the pentest report.