fix: remote Chrome PDF via CDP, sync MCP endpoint URL on boot
All checks were successful
CI / Check (pull_request) Successful in 11m33s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
All checks were successful
CI / Check (pull_request) Successful in 11m33s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
- Add CHROME_WS_URL env var support for PDF report generation via Chrome DevTools Protocol over WebSocket (falls back to local binary) - Update seeded MCP server endpoint URLs on boot when MCP_ENDPOINT_URL env var differs from stored value (previously only seeded once) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,3 +37,5 @@ urlencoding = "2"
|
||||
futures-util = "0.3"
|
||||
jsonwebtoken = "9"
|
||||
zip = { workspace = true }
|
||||
tokio-tungstenite = { version = "0.26", features = ["rustls-tls-webpki-roots"] }
|
||||
futures-core = "0.3"
|
||||
|
||||
@@ -1,16 +1,228 @@
|
||||
/// Convert HTML string to PDF bytes using headless Chrome/Chromium.
|
||||
use futures_util::SinkExt;
|
||||
use tokio_tungstenite::tungstenite::Message;
|
||||
|
||||
type WsStream =
|
||||
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
|
||||
|
||||
/// Convert HTML string to PDF bytes.
|
||||
///
|
||||
/// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote
|
||||
/// headless Chrome via the Chrome DevTools Protocol over WebSocket.
|
||||
/// Otherwise falls back to a local Chrome/Chromium binary.
|
||||
pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
||||
if let Ok(ws_url) = std::env::var("CHROME_WS_URL") {
|
||||
tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)");
|
||||
cdp_print_to_pdf(&ws_url, html).await
|
||||
} else {
|
||||
tracing::info!("Generating PDF via local Chrome binary");
|
||||
local_chrome_pdf(html).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a CDP command (no session) and return the response.
|
||||
async fn cdp_send(
|
||||
ws: &mut WsStream,
|
||||
id: u64,
|
||||
method: &str,
|
||||
params: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let msg = serde_json::json!({ "id": id, "method": method, "params": params });
|
||||
ws.send(Message::Text(msg.to_string().into()))
|
||||
.await
|
||||
.map_err(|e| format!("WS send failed: {e}"))?;
|
||||
read_until_result(ws, id).await
|
||||
}
|
||||
|
||||
/// Send a CDP command on a session and return the response.
|
||||
async fn cdp_send_session(
|
||||
ws: &mut WsStream,
|
||||
id: u64,
|
||||
session_id: &str,
|
||||
method: &str,
|
||||
params: serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let msg = serde_json::json!({
|
||||
"id": id,
|
||||
"sessionId": session_id,
|
||||
"method": method,
|
||||
"params": params,
|
||||
});
|
||||
ws.send(Message::Text(msg.to_string().into()))
|
||||
.await
|
||||
.map_err(|e| format!("WS send failed: {e}"))?;
|
||||
read_until_result(ws, id).await
|
||||
}
|
||||
|
||||
/// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket.
|
||||
async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result<Vec<u8>, String> {
|
||||
use base64::Engine;
|
||||
|
||||
// Step 1: Discover browser WS endpoint via /json/version
|
||||
let http_url = base_ws_url
|
||||
.replace("ws://", "http://")
|
||||
.replace("wss://", "https://");
|
||||
let version_url = format!("{http_url}/json/version");
|
||||
|
||||
let version: serde_json::Value = reqwest::get(&version_url)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))?
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Invalid /json/version response: {e}"))?;
|
||||
|
||||
let browser_ws = version["webSocketDebuggerUrl"]
|
||||
.as_str()
|
||||
.ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?;
|
||||
|
||||
// Step 2: Connect to browser WS endpoint
|
||||
let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws)
|
||||
.await
|
||||
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
|
||||
|
||||
let mut id: u64 = 1;
|
||||
|
||||
// Step 3: Create a new target (tab)
|
||||
let resp = cdp_send(
|
||||
&mut ws,
|
||||
id,
|
||||
"Target.createTarget",
|
||||
serde_json::json!({ "url": "about:blank" }),
|
||||
)
|
||||
.await?;
|
||||
id += 1;
|
||||
|
||||
let target_id = resp
|
||||
.get("result")
|
||||
.and_then(|r| r.get("targetId"))
|
||||
.and_then(|t| t.as_str())
|
||||
.ok_or("No targetId in createTarget response")?
|
||||
.to_string();
|
||||
|
||||
// Step 4: Attach to target
|
||||
let resp = cdp_send(
|
||||
&mut ws,
|
||||
id,
|
||||
"Target.attachToTarget",
|
||||
serde_json::json!({ "targetId": target_id, "flatten": true }),
|
||||
)
|
||||
.await?;
|
||||
id += 1;
|
||||
|
||||
let session_id = resp
|
||||
.get("result")
|
||||
.and_then(|r| r.get("sessionId"))
|
||||
.and_then(|s| s.as_str())
|
||||
.ok_or("No sessionId in attachToTarget response")?
|
||||
.to_string();
|
||||
|
||||
// Step 5: Enable Page domain
|
||||
cdp_send_session(
|
||||
&mut ws,
|
||||
id,
|
||||
&session_id,
|
||||
"Page.enable",
|
||||
serde_json::json!({}),
|
||||
)
|
||||
.await?;
|
||||
id += 1;
|
||||
|
||||
// Step 6: Set page content with the HTML
|
||||
cdp_send_session(
|
||||
&mut ws,
|
||||
id,
|
||||
&session_id,
|
||||
"Page.setDocumentContent",
|
||||
serde_json::json!({ "frameId": target_id, "html": html }),
|
||||
)
|
||||
.await?;
|
||||
id += 1;
|
||||
|
||||
// Brief pause for rendering
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
|
||||
// Step 7: Print to PDF
|
||||
let pdf_response = cdp_send_session(
|
||||
&mut ws,
|
||||
id,
|
||||
&session_id,
|
||||
"Page.printToPDF",
|
||||
serde_json::json!({
|
||||
"printBackground": true,
|
||||
"preferCSSPageSize": true,
|
||||
"displayHeaderFooter": false,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
id += 1;
|
||||
|
||||
let pdf_b64 = pdf_response
|
||||
.get("result")
|
||||
.and_then(|r| r.get("data"))
|
||||
.and_then(|d| d.as_str())
|
||||
.ok_or("No PDF data in printToPDF response")?;
|
||||
|
||||
let pdf_bytes = base64::engine::general_purpose::STANDARD
|
||||
.decode(pdf_b64)
|
||||
.map_err(|e| format!("Failed to decode PDF base64: {e}"))?;
|
||||
|
||||
// Step 8: Close the target
|
||||
let _ = cdp_send(
|
||||
&mut ws,
|
||||
id,
|
||||
"Target.closeTarget",
|
||||
serde_json::json!({ "targetId": target_id }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let _ = ws.close(None).await;
|
||||
|
||||
if pdf_bytes.is_empty() {
|
||||
return Err("Chrome produced an empty PDF".to_string());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
size_kb = pdf_bytes.len() / 1024,
|
||||
"PDF report generated via CDP"
|
||||
);
|
||||
Ok(pdf_bytes)
|
||||
}
|
||||
|
||||
/// Read WebSocket messages until we get a response matching the given id.
|
||||
async fn read_until_result(ws: &mut WsStream, id: u64) -> Result<serde_json::Value, String> {
|
||||
use futures_util::StreamExt;
|
||||
|
||||
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
|
||||
loop {
|
||||
let msg = tokio::time::timeout_at(deadline, ws.next())
|
||||
.await
|
||||
.map_err(|_| format!("Timeout waiting for CDP response id={id}"))?
|
||||
.ok_or_else(|| "WebSocket closed unexpectedly".to_string())?
|
||||
.map_err(|e| format!("WebSocket read error: {e}"))?;
|
||||
|
||||
if let Message::Text(text) = msg {
|
||||
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||
if val.get("id").and_then(|i| i.as_u64()) == Some(id) {
|
||||
if let Some(err) = val.get("error") {
|
||||
return Err(format!("CDP error: {err}"));
|
||||
}
|
||||
return Ok(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback: generate PDF using a local Chrome/Chromium binary.
|
||||
async fn local_chrome_pdf(html: &str) -> Result<Vec<u8>, String> {
|
||||
let tmp_dir = std::env::temp_dir();
|
||||
let run_id = uuid::Uuid::new_v4().to_string();
|
||||
let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html"));
|
||||
let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf"));
|
||||
|
||||
// Write HTML to temp file
|
||||
std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?;
|
||||
|
||||
// Find Chrome/Chromium binary
|
||||
let chrome_bin = find_chrome_binary().ok_or_else(|| {
|
||||
"Chrome/Chromium not found. Install google-chrome or chromium to generate PDF reports."
|
||||
"Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally."
|
||||
.to_string()
|
||||
})?;
|
||||
|
||||
@@ -36,7 +248,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
// Clean up temp files
|
||||
let _ = std::fs::remove_file(&html_path);
|
||||
let _ = std::fs::remove_file(&pdf_path);
|
||||
return Err(format!("Chrome PDF generation failed: {stderr}"));
|
||||
@@ -45,7 +256,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
||||
let pdf_bytes =
|
||||
std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?;
|
||||
|
||||
// Clean up temp files
|
||||
let _ = std::fs::remove_file(&html_path);
|
||||
let _ = std::fs::remove_file(&pdf_path);
|
||||
|
||||
|
||||
@@ -150,16 +150,23 @@ async fn seed_default_mcp_servers(db: &Database, mcp_endpoint_url: Option<&str>)
|
||||
|
||||
let collection = db.mcp_servers();
|
||||
|
||||
for (name, description, tools) in defaults {
|
||||
// Skip if already exists
|
||||
let exists = collection
|
||||
.find_one(doc! { "name": name })
|
||||
.await
|
||||
.ok()
|
||||
.flatten()
|
||||
.is_some();
|
||||
let expected_url = format!("{endpoint}/mcp");
|
||||
|
||||
if exists {
|
||||
for (name, description, tools) in defaults {
|
||||
// If it already exists, update the endpoint URL if it changed
|
||||
if let Ok(Some(existing)) = collection.find_one(doc! { "name": name }).await {
|
||||
if existing.endpoint_url != expected_url {
|
||||
let _ = collection
|
||||
.update_one(
|
||||
doc! { "name": name },
|
||||
doc! { "$set": { "endpoint_url": &expected_url } },
|
||||
)
|
||||
.await;
|
||||
tracing::info!(
|
||||
"Updated MCP server '{name}' endpoint: {} -> {expected_url}",
|
||||
existing.endpoint_url
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user