fix: remote Chrome PDF export & MCP endpoint sync (#15)
All checks were successful
All checks were successful
This commit was merged in pull request #15.
This commit is contained in:
@@ -37,3 +37,5 @@ urlencoding = "2"
|
|||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
jsonwebtoken = "9"
|
jsonwebtoken = "9"
|
||||||
zip = { workspace = true }
|
zip = { workspace = true }
|
||||||
|
tokio-tungstenite = { version = "0.26", features = ["rustls-tls-webpki-roots"] }
|
||||||
|
futures-core = "0.3"
|
||||||
|
|||||||
@@ -1,16 +1,228 @@
|
|||||||
/// Convert HTML string to PDF bytes using headless Chrome/Chromium.
|
use futures_util::SinkExt;
|
||||||
|
use tokio_tungstenite::tungstenite::Message;
|
||||||
|
|
||||||
|
type WsStream =
|
||||||
|
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
|
||||||
|
|
||||||
|
/// Convert HTML string to PDF bytes.
|
||||||
|
///
|
||||||
|
/// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote
|
||||||
|
/// headless Chrome via the Chrome DevTools Protocol over WebSocket.
|
||||||
|
/// Otherwise falls back to a local Chrome/Chromium binary.
|
||||||
pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
||||||
|
if let Ok(ws_url) = std::env::var("CHROME_WS_URL") {
|
||||||
|
tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)");
|
||||||
|
cdp_print_to_pdf(&ws_url, html).await
|
||||||
|
} else {
|
||||||
|
tracing::info!("Generating PDF via local Chrome binary");
|
||||||
|
local_chrome_pdf(html).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Send a CDP command (no session) and return the response.
|
||||||
|
async fn cdp_send(
|
||||||
|
ws: &mut WsStream,
|
||||||
|
id: u64,
|
||||||
|
method: &str,
|
||||||
|
params: serde_json::Value,
|
||||||
|
) -> Result<serde_json::Value, String> {
|
||||||
|
let msg = serde_json::json!({ "id": id, "method": method, "params": params });
|
||||||
|
ws.send(Message::Text(msg.to_string().into()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("WS send failed: {e}"))?;
|
||||||
|
read_until_result(ws, id).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Send a CDP command on a session and return the response.
|
||||||
|
async fn cdp_send_session(
|
||||||
|
ws: &mut WsStream,
|
||||||
|
id: u64,
|
||||||
|
session_id: &str,
|
||||||
|
method: &str,
|
||||||
|
params: serde_json::Value,
|
||||||
|
) -> Result<serde_json::Value, String> {
|
||||||
|
let msg = serde_json::json!({
|
||||||
|
"id": id,
|
||||||
|
"sessionId": session_id,
|
||||||
|
"method": method,
|
||||||
|
"params": params,
|
||||||
|
});
|
||||||
|
ws.send(Message::Text(msg.to_string().into()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("WS send failed: {e}"))?;
|
||||||
|
read_until_result(ws, id).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket.
|
||||||
|
async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result<Vec<u8>, String> {
|
||||||
|
use base64::Engine;
|
||||||
|
|
||||||
|
// Step 1: Discover browser WS endpoint via /json/version
|
||||||
|
let http_url = base_ws_url
|
||||||
|
.replace("ws://", "http://")
|
||||||
|
.replace("wss://", "https://");
|
||||||
|
let version_url = format!("{http_url}/json/version");
|
||||||
|
|
||||||
|
let version: serde_json::Value = reqwest::get(&version_url)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))?
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Invalid /json/version response: {e}"))?;
|
||||||
|
|
||||||
|
let browser_ws = version["webSocketDebuggerUrl"]
|
||||||
|
.as_str()
|
||||||
|
.ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?;
|
||||||
|
|
||||||
|
// Step 2: Connect to browser WS endpoint
|
||||||
|
let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
|
||||||
|
|
||||||
|
let mut id: u64 = 1;
|
||||||
|
|
||||||
|
// Step 3: Create a new target (tab)
|
||||||
|
let resp = cdp_send(
|
||||||
|
&mut ws,
|
||||||
|
id,
|
||||||
|
"Target.createTarget",
|
||||||
|
serde_json::json!({ "url": "about:blank" }),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
id += 1;
|
||||||
|
|
||||||
|
let target_id = resp
|
||||||
|
.get("result")
|
||||||
|
.and_then(|r| r.get("targetId"))
|
||||||
|
.and_then(|t| t.as_str())
|
||||||
|
.ok_or("No targetId in createTarget response")?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Step 4: Attach to target
|
||||||
|
let resp = cdp_send(
|
||||||
|
&mut ws,
|
||||||
|
id,
|
||||||
|
"Target.attachToTarget",
|
||||||
|
serde_json::json!({ "targetId": target_id, "flatten": true }),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
id += 1;
|
||||||
|
|
||||||
|
let session_id = resp
|
||||||
|
.get("result")
|
||||||
|
.and_then(|r| r.get("sessionId"))
|
||||||
|
.and_then(|s| s.as_str())
|
||||||
|
.ok_or("No sessionId in attachToTarget response")?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Step 5: Enable Page domain
|
||||||
|
cdp_send_session(
|
||||||
|
&mut ws,
|
||||||
|
id,
|
||||||
|
&session_id,
|
||||||
|
"Page.enable",
|
||||||
|
serde_json::json!({}),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
id += 1;
|
||||||
|
|
||||||
|
// Step 6: Set page content with the HTML
|
||||||
|
cdp_send_session(
|
||||||
|
&mut ws,
|
||||||
|
id,
|
||||||
|
&session_id,
|
||||||
|
"Page.setDocumentContent",
|
||||||
|
serde_json::json!({ "frameId": target_id, "html": html }),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
id += 1;
|
||||||
|
|
||||||
|
// Brief pause for rendering
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||||
|
|
||||||
|
// Step 7: Print to PDF
|
||||||
|
let pdf_response = cdp_send_session(
|
||||||
|
&mut ws,
|
||||||
|
id,
|
||||||
|
&session_id,
|
||||||
|
"Page.printToPDF",
|
||||||
|
serde_json::json!({
|
||||||
|
"printBackground": true,
|
||||||
|
"preferCSSPageSize": true,
|
||||||
|
"displayHeaderFooter": false,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
id += 1;
|
||||||
|
|
||||||
|
let pdf_b64 = pdf_response
|
||||||
|
.get("result")
|
||||||
|
.and_then(|r| r.get("data"))
|
||||||
|
.and_then(|d| d.as_str())
|
||||||
|
.ok_or("No PDF data in printToPDF response")?;
|
||||||
|
|
||||||
|
let pdf_bytes = base64::engine::general_purpose::STANDARD
|
||||||
|
.decode(pdf_b64)
|
||||||
|
.map_err(|e| format!("Failed to decode PDF base64: {e}"))?;
|
||||||
|
|
||||||
|
// Step 8: Close the target
|
||||||
|
let _ = cdp_send(
|
||||||
|
&mut ws,
|
||||||
|
id,
|
||||||
|
"Target.closeTarget",
|
||||||
|
serde_json::json!({ "targetId": target_id }),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let _ = ws.close(None).await;
|
||||||
|
|
||||||
|
if pdf_bytes.is_empty() {
|
||||||
|
return Err("Chrome produced an empty PDF".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
size_kb = pdf_bytes.len() / 1024,
|
||||||
|
"PDF report generated via CDP"
|
||||||
|
);
|
||||||
|
Ok(pdf_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read WebSocket messages until we get a response matching the given id.
|
||||||
|
async fn read_until_result(ws: &mut WsStream, id: u64) -> Result<serde_json::Value, String> {
|
||||||
|
use futures_util::StreamExt;
|
||||||
|
|
||||||
|
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
|
||||||
|
loop {
|
||||||
|
let msg = tokio::time::timeout_at(deadline, ws.next())
|
||||||
|
.await
|
||||||
|
.map_err(|_| format!("Timeout waiting for CDP response id={id}"))?
|
||||||
|
.ok_or_else(|| "WebSocket closed unexpectedly".to_string())?
|
||||||
|
.map_err(|e| format!("WebSocket read error: {e}"))?;
|
||||||
|
|
||||||
|
if let Message::Text(text) = msg {
|
||||||
|
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||||
|
if val.get("id").and_then(|i| i.as_u64()) == Some(id) {
|
||||||
|
if let Some(err) = val.get("error") {
|
||||||
|
return Err(format!("CDP error: {err}"));
|
||||||
|
}
|
||||||
|
return Ok(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fallback: generate PDF using a local Chrome/Chromium binary.
|
||||||
|
async fn local_chrome_pdf(html: &str) -> Result<Vec<u8>, String> {
|
||||||
let tmp_dir = std::env::temp_dir();
|
let tmp_dir = std::env::temp_dir();
|
||||||
let run_id = uuid::Uuid::new_v4().to_string();
|
let run_id = uuid::Uuid::new_v4().to_string();
|
||||||
let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html"));
|
let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html"));
|
||||||
let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf"));
|
let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf"));
|
||||||
|
|
||||||
// Write HTML to temp file
|
|
||||||
std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?;
|
std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?;
|
||||||
|
|
||||||
// Find Chrome/Chromium binary
|
|
||||||
let chrome_bin = find_chrome_binary().ok_or_else(|| {
|
let chrome_bin = find_chrome_binary().ok_or_else(|| {
|
||||||
"Chrome/Chromium not found. Install google-chrome or chromium to generate PDF reports."
|
"Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally."
|
||||||
.to_string()
|
.to_string()
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -36,7 +248,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
|||||||
|
|
||||||
if !output.status.success() {
|
if !output.status.success() {
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
// Clean up temp files
|
|
||||||
let _ = std::fs::remove_file(&html_path);
|
let _ = std::fs::remove_file(&html_path);
|
||||||
let _ = std::fs::remove_file(&pdf_path);
|
let _ = std::fs::remove_file(&pdf_path);
|
||||||
return Err(format!("Chrome PDF generation failed: {stderr}"));
|
return Err(format!("Chrome PDF generation failed: {stderr}"));
|
||||||
@@ -45,7 +256,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
|||||||
let pdf_bytes =
|
let pdf_bytes =
|
||||||
std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?;
|
std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?;
|
||||||
|
|
||||||
// Clean up temp files
|
|
||||||
let _ = std::fs::remove_file(&html_path);
|
let _ = std::fs::remove_file(&html_path);
|
||||||
let _ = std::fs::remove_file(&pdf_path);
|
let _ = std::fs::remove_file(&pdf_path);
|
||||||
|
|
||||||
|
|||||||
@@ -150,16 +150,23 @@ async fn seed_default_mcp_servers(db: &Database, mcp_endpoint_url: Option<&str>)
|
|||||||
|
|
||||||
let collection = db.mcp_servers();
|
let collection = db.mcp_servers();
|
||||||
|
|
||||||
for (name, description, tools) in defaults {
|
let expected_url = format!("{endpoint}/mcp");
|
||||||
// Skip if already exists
|
|
||||||
let exists = collection
|
|
||||||
.find_one(doc! { "name": name })
|
|
||||||
.await
|
|
||||||
.ok()
|
|
||||||
.flatten()
|
|
||||||
.is_some();
|
|
||||||
|
|
||||||
if exists {
|
for (name, description, tools) in defaults {
|
||||||
|
// If it already exists, update the endpoint URL if it changed
|
||||||
|
if let Ok(Some(existing)) = collection.find_one(doc! { "name": name }).await {
|
||||||
|
if existing.endpoint_url != expected_url {
|
||||||
|
let _ = collection
|
||||||
|
.update_one(
|
||||||
|
doc! { "name": name },
|
||||||
|
doc! { "$set": { "endpoint_url": &expected_url } },
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
tracing::info!(
|
||||||
|
"Updated MCP server '{name}' endpoint: {} -> {expected_url}",
|
||||||
|
existing.endpoint_url
|
||||||
|
);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user