From 584ef2c82277c20fae5a00fb4467d206e153d58f Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar Date: Fri, 13 Mar 2026 10:57:28 +0100 Subject: [PATCH] fix: remote Chrome PDF via CDP, sync MCP endpoint URL on boot - Add CHROME_WS_URL env var support for PDF report generation via Chrome DevTools Protocol over WebSocket (falls back to local binary) - Update seeded MCP server endpoint URLs on boot when MCP_ENDPOINT_URL env var differs from stored value (previously only seeded once) Co-Authored-By: Claude Opus 4.6 --- compliance-agent/Cargo.toml | 2 + compliance-agent/src/pentest/report/pdf.rs | 222 +++++++++++++++++- .../src/infrastructure/server.rs | 25 +- 3 files changed, 234 insertions(+), 15 deletions(-) diff --git a/compliance-agent/Cargo.toml b/compliance-agent/Cargo.toml index bc2eac7..69522c1 100644 --- a/compliance-agent/Cargo.toml +++ b/compliance-agent/Cargo.toml @@ -37,3 +37,5 @@ urlencoding = "2" futures-util = "0.3" jsonwebtoken = "9" zip = { workspace = true } +tokio-tungstenite = { version = "0.26", features = ["rustls-tls-webpki-roots"] } +futures-core = "0.3" diff --git a/compliance-agent/src/pentest/report/pdf.rs b/compliance-agent/src/pentest/report/pdf.rs index 7e6c6c0..8eda318 100644 --- a/compliance-agent/src/pentest/report/pdf.rs +++ b/compliance-agent/src/pentest/report/pdf.rs @@ -1,16 +1,228 @@ -/// Convert HTML string to PDF bytes using headless Chrome/Chromium. +use futures_util::SinkExt; +use tokio_tungstenite::tungstenite::Message; + +type WsStream = + tokio_tungstenite::WebSocketStream>; + +/// Convert HTML string to PDF bytes. +/// +/// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote +/// headless Chrome via the Chrome DevTools Protocol over WebSocket. +/// Otherwise falls back to a local Chrome/Chromium binary. pub(super) async fn html_to_pdf(html: &str) -> Result, String> { + if let Ok(ws_url) = std::env::var("CHROME_WS_URL") { + tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)"); + cdp_print_to_pdf(&ws_url, html).await + } else { + tracing::info!("Generating PDF via local Chrome binary"); + local_chrome_pdf(html).await + } +} + +/// Send a CDP command (no session) and return the response. +async fn cdp_send( + ws: &mut WsStream, + id: u64, + method: &str, + params: serde_json::Value, +) -> Result { + let msg = serde_json::json!({ "id": id, "method": method, "params": params }); + ws.send(Message::Text(msg.to_string().into())) + .await + .map_err(|e| format!("WS send failed: {e}"))?; + read_until_result(ws, id).await +} + +/// Send a CDP command on a session and return the response. +async fn cdp_send_session( + ws: &mut WsStream, + id: u64, + session_id: &str, + method: &str, + params: serde_json::Value, +) -> Result { + let msg = serde_json::json!({ + "id": id, + "sessionId": session_id, + "method": method, + "params": params, + }); + ws.send(Message::Text(msg.to_string().into())) + .await + .map_err(|e| format!("WS send failed: {e}"))?; + read_until_result(ws, id).await +} + +/// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket. +async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result, String> { + use base64::Engine; + + // Step 1: Discover browser WS endpoint via /json/version + let http_url = base_ws_url + .replace("ws://", "http://") + .replace("wss://", "https://"); + let version_url = format!("{http_url}/json/version"); + + let version: serde_json::Value = reqwest::get(&version_url) + .await + .map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))? + .json() + .await + .map_err(|e| format!("Invalid /json/version response: {e}"))?; + + let browser_ws = version["webSocketDebuggerUrl"] + .as_str() + .ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?; + + // Step 2: Connect to browser WS endpoint + let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws) + .await + .map_err(|e| format!("WebSocket connect failed: {e}"))?; + + let mut id: u64 = 1; + + // Step 3: Create a new target (tab) + let resp = cdp_send( + &mut ws, + id, + "Target.createTarget", + serde_json::json!({ "url": "about:blank" }), + ) + .await?; + id += 1; + + let target_id = resp + .get("result") + .and_then(|r| r.get("targetId")) + .and_then(|t| t.as_str()) + .ok_or("No targetId in createTarget response")? + .to_string(); + + // Step 4: Attach to target + let resp = cdp_send( + &mut ws, + id, + "Target.attachToTarget", + serde_json::json!({ "targetId": target_id, "flatten": true }), + ) + .await?; + id += 1; + + let session_id = resp + .get("result") + .and_then(|r| r.get("sessionId")) + .and_then(|s| s.as_str()) + .ok_or("No sessionId in attachToTarget response")? + .to_string(); + + // Step 5: Enable Page domain + cdp_send_session( + &mut ws, + id, + &session_id, + "Page.enable", + serde_json::json!({}), + ) + .await?; + id += 1; + + // Step 6: Set page content with the HTML + cdp_send_session( + &mut ws, + id, + &session_id, + "Page.setDocumentContent", + serde_json::json!({ "frameId": target_id, "html": html }), + ) + .await?; + id += 1; + + // Brief pause for rendering + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + + // Step 7: Print to PDF + let pdf_response = cdp_send_session( + &mut ws, + id, + &session_id, + "Page.printToPDF", + serde_json::json!({ + "printBackground": true, + "preferCSSPageSize": true, + "displayHeaderFooter": false, + }), + ) + .await?; + id += 1; + + let pdf_b64 = pdf_response + .get("result") + .and_then(|r| r.get("data")) + .and_then(|d| d.as_str()) + .ok_or("No PDF data in printToPDF response")?; + + let pdf_bytes = base64::engine::general_purpose::STANDARD + .decode(pdf_b64) + .map_err(|e| format!("Failed to decode PDF base64: {e}"))?; + + // Step 8: Close the target + let _ = cdp_send( + &mut ws, + id, + "Target.closeTarget", + serde_json::json!({ "targetId": target_id }), + ) + .await; + + let _ = ws.close(None).await; + + if pdf_bytes.is_empty() { + return Err("Chrome produced an empty PDF".to_string()); + } + + tracing::info!( + size_kb = pdf_bytes.len() / 1024, + "PDF report generated via CDP" + ); + Ok(pdf_bytes) +} + +/// Read WebSocket messages until we get a response matching the given id. +async fn read_until_result(ws: &mut WsStream, id: u64) -> Result { + use futures_util::StreamExt; + + let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30); + loop { + let msg = tokio::time::timeout_at(deadline, ws.next()) + .await + .map_err(|_| format!("Timeout waiting for CDP response id={id}"))? + .ok_or_else(|| "WebSocket closed unexpectedly".to_string())? + .map_err(|e| format!("WebSocket read error: {e}"))?; + + if let Message::Text(text) = msg { + if let Ok(val) = serde_json::from_str::(&text) { + if val.get("id").and_then(|i| i.as_u64()) == Some(id) { + if let Some(err) = val.get("error") { + return Err(format!("CDP error: {err}")); + } + return Ok(val); + } + } + } + } +} + +/// Fallback: generate PDF using a local Chrome/Chromium binary. +async fn local_chrome_pdf(html: &str) -> Result, String> { let tmp_dir = std::env::temp_dir(); let run_id = uuid::Uuid::new_v4().to_string(); let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html")); let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf")); - // Write HTML to temp file std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?; - // Find Chrome/Chromium binary let chrome_bin = find_chrome_binary().ok_or_else(|| { - "Chrome/Chromium not found. Install google-chrome or chromium to generate PDF reports." + "Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally." .to_string() })?; @@ -36,7 +248,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result, String> { if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); - // Clean up temp files let _ = std::fs::remove_file(&html_path); let _ = std::fs::remove_file(&pdf_path); return Err(format!("Chrome PDF generation failed: {stderr}")); @@ -45,7 +256,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result, String> { let pdf_bytes = std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?; - // Clean up temp files let _ = std::fs::remove_file(&html_path); let _ = std::fs::remove_file(&pdf_path); diff --git a/compliance-dashboard/src/infrastructure/server.rs b/compliance-dashboard/src/infrastructure/server.rs index a98f2af..20cb6fb 100644 --- a/compliance-dashboard/src/infrastructure/server.rs +++ b/compliance-dashboard/src/infrastructure/server.rs @@ -150,16 +150,23 @@ async fn seed_default_mcp_servers(db: &Database, mcp_endpoint_url: Option<&str>) let collection = db.mcp_servers(); - for (name, description, tools) in defaults { - // Skip if already exists - let exists = collection - .find_one(doc! { "name": name }) - .await - .ok() - .flatten() - .is_some(); + let expected_url = format!("{endpoint}/mcp"); - if exists { + for (name, description, tools) in defaults { + // If it already exists, update the endpoint URL if it changed + if let Ok(Some(existing)) = collection.find_one(doc! { "name": name }).await { + if existing.endpoint_url != expected_url { + let _ = collection + .update_one( + doc! { "name": name }, + doc! { "$set": { "endpoint_url": &expected_url } }, + ) + .await; + tracing::info!( + "Updated MCP server '{name}' endpoint: {} -> {expected_url}", + existing.endpoint_url + ); + } continue; }