Files
compliance-scanner-agent/compliance-agent/src/pentest/report/pdf.rs
Sharang Parnerkar 4eac1209d8
All checks were successful
CI / Check (push) Successful in 11m16s
CI / Detect Changes (push) Successful in 3s
CI / Deploy Agent (push) Successful in 3s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Has been skipped
fix: remote Chrome PDF export & MCP endpoint sync (#15)
2026-03-13 10:12:20 +00:00

290 lines
8.8 KiB
Rust

use futures_util::SinkExt;
use tokio_tungstenite::tungstenite::Message;
type WsStream =
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
/// Convert HTML string to PDF bytes.
///
/// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote
/// headless Chrome via the Chrome DevTools Protocol over WebSocket.
/// Otherwise falls back to a local Chrome/Chromium binary.
pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
if let Ok(ws_url) = std::env::var("CHROME_WS_URL") {
tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)");
cdp_print_to_pdf(&ws_url, html).await
} else {
tracing::info!("Generating PDF via local Chrome binary");
local_chrome_pdf(html).await
}
}
/// Send a CDP command (no session) and return the response.
async fn cdp_send(
ws: &mut WsStream,
id: u64,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, String> {
let msg = serde_json::json!({ "id": id, "method": method, "params": params });
ws.send(Message::Text(msg.to_string().into()))
.await
.map_err(|e| format!("WS send failed: {e}"))?;
read_until_result(ws, id).await
}
/// Send a CDP command on a session and return the response.
async fn cdp_send_session(
ws: &mut WsStream,
id: u64,
session_id: &str,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, String> {
let msg = serde_json::json!({
"id": id,
"sessionId": session_id,
"method": method,
"params": params,
});
ws.send(Message::Text(msg.to_string().into()))
.await
.map_err(|e| format!("WS send failed: {e}"))?;
read_until_result(ws, id).await
}
/// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket.
async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result<Vec<u8>, String> {
use base64::Engine;
// Step 1: Discover browser WS endpoint via /json/version
let http_url = base_ws_url
.replace("ws://", "http://")
.replace("wss://", "https://");
let version_url = format!("{http_url}/json/version");
let version: serde_json::Value = reqwest::get(&version_url)
.await
.map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))?
.json()
.await
.map_err(|e| format!("Invalid /json/version response: {e}"))?;
let browser_ws = version["webSocketDebuggerUrl"]
.as_str()
.ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?;
// Step 2: Connect to browser WS endpoint
let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws)
.await
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
let mut id: u64 = 1;
// Step 3: Create a new target (tab)
let resp = cdp_send(
&mut ws,
id,
"Target.createTarget",
serde_json::json!({ "url": "about:blank" }),
)
.await?;
id += 1;
let target_id = resp
.get("result")
.and_then(|r| r.get("targetId"))
.and_then(|t| t.as_str())
.ok_or("No targetId in createTarget response")?
.to_string();
// Step 4: Attach to target
let resp = cdp_send(
&mut ws,
id,
"Target.attachToTarget",
serde_json::json!({ "targetId": target_id, "flatten": true }),
)
.await?;
id += 1;
let session_id = resp
.get("result")
.and_then(|r| r.get("sessionId"))
.and_then(|s| s.as_str())
.ok_or("No sessionId in attachToTarget response")?
.to_string();
// Step 5: Enable Page domain
cdp_send_session(
&mut ws,
id,
&session_id,
"Page.enable",
serde_json::json!({}),
)
.await?;
id += 1;
// Step 6: Set page content with the HTML
cdp_send_session(
&mut ws,
id,
&session_id,
"Page.setDocumentContent",
serde_json::json!({ "frameId": target_id, "html": html }),
)
.await?;
id += 1;
// Brief pause for rendering
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
// Step 7: Print to PDF
let pdf_response = cdp_send_session(
&mut ws,
id,
&session_id,
"Page.printToPDF",
serde_json::json!({
"printBackground": true,
"preferCSSPageSize": true,
"displayHeaderFooter": false,
}),
)
.await?;
id += 1;
let pdf_b64 = pdf_response
.get("result")
.and_then(|r| r.get("data"))
.and_then(|d| d.as_str())
.ok_or("No PDF data in printToPDF response")?;
let pdf_bytes = base64::engine::general_purpose::STANDARD
.decode(pdf_b64)
.map_err(|e| format!("Failed to decode PDF base64: {e}"))?;
// Step 8: Close the target
let _ = cdp_send(
&mut ws,
id,
"Target.closeTarget",
serde_json::json!({ "targetId": target_id }),
)
.await;
let _ = ws.close(None).await;
if pdf_bytes.is_empty() {
return Err("Chrome produced an empty PDF".to_string());
}
tracing::info!(
size_kb = pdf_bytes.len() / 1024,
"PDF report generated via CDP"
);
Ok(pdf_bytes)
}
/// Read WebSocket messages until we get a response matching the given id.
async fn read_until_result(ws: &mut WsStream, id: u64) -> Result<serde_json::Value, String> {
use futures_util::StreamExt;
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
loop {
let msg = tokio::time::timeout_at(deadline, ws.next())
.await
.map_err(|_| format!("Timeout waiting for CDP response id={id}"))?
.ok_or_else(|| "WebSocket closed unexpectedly".to_string())?
.map_err(|e| format!("WebSocket read error: {e}"))?;
if let Message::Text(text) = msg {
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
if val.get("id").and_then(|i| i.as_u64()) == Some(id) {
if let Some(err) = val.get("error") {
return Err(format!("CDP error: {err}"));
}
return Ok(val);
}
}
}
}
}
/// Fallback: generate PDF using a local Chrome/Chromium binary.
async fn local_chrome_pdf(html: &str) -> Result<Vec<u8>, String> {
let tmp_dir = std::env::temp_dir();
let run_id = uuid::Uuid::new_v4().to_string();
let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html"));
let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf"));
std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?;
let chrome_bin = find_chrome_binary().ok_or_else(|| {
"Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally."
.to_string()
})?;
tracing::info!(chrome = %chrome_bin, "Generating PDF report via headless Chrome");
let html_url = format!("file://{}", html_path.display());
let output = tokio::process::Command::new(&chrome_bin)
.args([
"--headless",
"--disable-gpu",
"--no-sandbox",
"--disable-software-rasterizer",
"--run-all-compositor-stages-before-draw",
"--disable-dev-shm-usage",
&format!("--print-to-pdf={}", pdf_path.display()),
"--no-pdf-header-footer",
&html_url,
])
.output()
.await
.map_err(|e| format!("Failed to run Chrome: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let _ = std::fs::remove_file(&html_path);
let _ = std::fs::remove_file(&pdf_path);
return Err(format!("Chrome PDF generation failed: {stderr}"));
}
let pdf_bytes =
std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?;
let _ = std::fs::remove_file(&html_path);
let _ = std::fs::remove_file(&pdf_path);
if pdf_bytes.is_empty() {
return Err("Chrome produced an empty PDF".to_string());
}
tracing::info!(size_kb = pdf_bytes.len() / 1024, "PDF report generated");
Ok(pdf_bytes)
}
/// Search for Chrome/Chromium binary on the system.
fn find_chrome_binary() -> Option<String> {
let candidates = [
"google-chrome-stable",
"google-chrome",
"chromium-browser",
"chromium",
];
for name in &candidates {
if let Ok(output) = std::process::Command::new("which").arg(name).output() {
if output.status.success() {
let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
if !path.is_empty() {
return Some(path);
}
}
}
}
None
}