use futures_util::SinkExt; use tokio_tungstenite::tungstenite::Message; type WsStream = tokio_tungstenite::WebSocketStream>; /// Convert HTML string to PDF bytes. /// /// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote /// headless Chrome via the Chrome DevTools Protocol over WebSocket. /// Otherwise falls back to a local Chrome/Chromium binary. pub(super) async fn html_to_pdf(html: &str) -> Result, String> { if let Ok(ws_url) = std::env::var("CHROME_WS_URL") { tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)"); cdp_print_to_pdf(&ws_url, html).await } else { tracing::info!("Generating PDF via local Chrome binary"); local_chrome_pdf(html).await } } /// Send a CDP command (no session) and return the response. async fn cdp_send( ws: &mut WsStream, id: u64, method: &str, params: serde_json::Value, ) -> Result { let msg = serde_json::json!({ "id": id, "method": method, "params": params }); ws.send(Message::Text(msg.to_string().into())) .await .map_err(|e| format!("WS send failed: {e}"))?; read_until_result(ws, id).await } /// Send a CDP command on a session and return the response. async fn cdp_send_session( ws: &mut WsStream, id: u64, session_id: &str, method: &str, params: serde_json::Value, ) -> Result { let msg = serde_json::json!({ "id": id, "sessionId": session_id, "method": method, "params": params, }); ws.send(Message::Text(msg.to_string().into())) .await .map_err(|e| format!("WS send failed: {e}"))?; read_until_result(ws, id).await } /// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket. async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result, String> { use base64::Engine; // Step 1: Discover browser WS endpoint via /json/version let http_url = base_ws_url .replace("ws://", "http://") .replace("wss://", "https://"); let version_url = format!("{http_url}/json/version"); let version: serde_json::Value = reqwest::get(&version_url) .await .map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))? .json() .await .map_err(|e| format!("Invalid /json/version response: {e}"))?; let browser_ws = version["webSocketDebuggerUrl"] .as_str() .ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?; // Step 2: Connect to browser WS endpoint let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws) .await .map_err(|e| format!("WebSocket connect failed: {e}"))?; let mut id: u64 = 1; // Step 3: Create a new target (tab) let resp = cdp_send( &mut ws, id, "Target.createTarget", serde_json::json!({ "url": "about:blank" }), ) .await?; id += 1; let target_id = resp .get("result") .and_then(|r| r.get("targetId")) .and_then(|t| t.as_str()) .ok_or("No targetId in createTarget response")? .to_string(); // Step 4: Attach to target let resp = cdp_send( &mut ws, id, "Target.attachToTarget", serde_json::json!({ "targetId": target_id, "flatten": true }), ) .await?; id += 1; let session_id = resp .get("result") .and_then(|r| r.get("sessionId")) .and_then(|s| s.as_str()) .ok_or("No sessionId in attachToTarget response")? .to_string(); // Step 5: Enable Page domain cdp_send_session( &mut ws, id, &session_id, "Page.enable", serde_json::json!({}), ) .await?; id += 1; // Step 6: Set page content with the HTML cdp_send_session( &mut ws, id, &session_id, "Page.setDocumentContent", serde_json::json!({ "frameId": target_id, "html": html }), ) .await?; id += 1; // Brief pause for rendering tokio::time::sleep(std::time::Duration::from_millis(500)).await; // Step 7: Print to PDF let pdf_response = cdp_send_session( &mut ws, id, &session_id, "Page.printToPDF", serde_json::json!({ "printBackground": true, "preferCSSPageSize": true, "displayHeaderFooter": false, }), ) .await?; id += 1; let pdf_b64 = pdf_response .get("result") .and_then(|r| r.get("data")) .and_then(|d| d.as_str()) .ok_or("No PDF data in printToPDF response")?; let pdf_bytes = base64::engine::general_purpose::STANDARD .decode(pdf_b64) .map_err(|e| format!("Failed to decode PDF base64: {e}"))?; // Step 8: Close the target let _ = cdp_send( &mut ws, id, "Target.closeTarget", serde_json::json!({ "targetId": target_id }), ) .await; let _ = ws.close(None).await; if pdf_bytes.is_empty() { return Err("Chrome produced an empty PDF".to_string()); } tracing::info!( size_kb = pdf_bytes.len() / 1024, "PDF report generated via CDP" ); Ok(pdf_bytes) } /// Read WebSocket messages until we get a response matching the given id. async fn read_until_result(ws: &mut WsStream, id: u64) -> Result { use futures_util::StreamExt; let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30); loop { let msg = tokio::time::timeout_at(deadline, ws.next()) .await .map_err(|_| format!("Timeout waiting for CDP response id={id}"))? .ok_or_else(|| "WebSocket closed unexpectedly".to_string())? .map_err(|e| format!("WebSocket read error: {e}"))?; if let Message::Text(text) = msg { if let Ok(val) = serde_json::from_str::(&text) { if val.get("id").and_then(|i| i.as_u64()) == Some(id) { if let Some(err) = val.get("error") { return Err(format!("CDP error: {err}")); } return Ok(val); } } } } } /// Fallback: generate PDF using a local Chrome/Chromium binary. async fn local_chrome_pdf(html: &str) -> Result, String> { let tmp_dir = std::env::temp_dir(); let run_id = uuid::Uuid::new_v4().to_string(); let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html")); let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf")); std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?; let chrome_bin = find_chrome_binary().ok_or_else(|| { "Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally." .to_string() })?; tracing::info!(chrome = %chrome_bin, "Generating PDF report via headless Chrome"); let html_url = format!("file://{}", html_path.display()); let output = tokio::process::Command::new(&chrome_bin) .args([ "--headless", "--disable-gpu", "--no-sandbox", "--disable-software-rasterizer", "--run-all-compositor-stages-before-draw", "--disable-dev-shm-usage", &format!("--print-to-pdf={}", pdf_path.display()), "--no-pdf-header-footer", &html_url, ]) .output() .await .map_err(|e| format!("Failed to run Chrome: {e}"))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); let _ = std::fs::remove_file(&html_path); let _ = std::fs::remove_file(&pdf_path); return Err(format!("Chrome PDF generation failed: {stderr}")); } let pdf_bytes = std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?; let _ = std::fs::remove_file(&html_path); let _ = std::fs::remove_file(&pdf_path); if pdf_bytes.is_empty() { return Err("Chrome produced an empty PDF".to_string()); } tracing::info!(size_kb = pdf_bytes.len() / 1024, "PDF report generated"); Ok(pdf_bytes) } /// Search for Chrome/Chromium binary on the system. fn find_chrome_binary() -> Option { let candidates = [ "google-chrome-stable", "google-chrome", "chromium-browser", "chromium", ]; for name in &candidates { if let Ok(output) = std::process::Command::new("which").arg(name).output() { if output.status.success() { let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); if !path.is_empty() { return Some(path); } } } } None }