|
|
|
|
@@ -1,16 +1,228 @@
|
|
|
|
|
/// Convert HTML string to PDF bytes using headless Chrome/Chromium.
|
|
|
|
|
use futures_util::SinkExt;
|
|
|
|
|
use tokio_tungstenite::tungstenite::Message;
|
|
|
|
|
|
|
|
|
|
type WsStream =
|
|
|
|
|
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
|
|
|
|
|
|
|
|
|
|
/// Convert HTML string to PDF bytes.
|
|
|
|
|
///
|
|
|
|
|
/// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote
|
|
|
|
|
/// headless Chrome via the Chrome DevTools Protocol over WebSocket.
|
|
|
|
|
/// Otherwise falls back to a local Chrome/Chromium binary.
|
|
|
|
|
pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
|
|
|
|
if let Ok(ws_url) = std::env::var("CHROME_WS_URL") {
|
|
|
|
|
tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)");
|
|
|
|
|
cdp_print_to_pdf(&ws_url, html).await
|
|
|
|
|
} else {
|
|
|
|
|
tracing::info!("Generating PDF via local Chrome binary");
|
|
|
|
|
local_chrome_pdf(html).await
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Send a CDP command (no session) and return the response.
|
|
|
|
|
async fn cdp_send(
|
|
|
|
|
ws: &mut WsStream,
|
|
|
|
|
id: u64,
|
|
|
|
|
method: &str,
|
|
|
|
|
params: serde_json::Value,
|
|
|
|
|
) -> Result<serde_json::Value, String> {
|
|
|
|
|
let msg = serde_json::json!({ "id": id, "method": method, "params": params });
|
|
|
|
|
ws.send(Message::Text(msg.to_string().into()))
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("WS send failed: {e}"))?;
|
|
|
|
|
read_until_result(ws, id).await
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Send a CDP command on a session and return the response.
|
|
|
|
|
async fn cdp_send_session(
|
|
|
|
|
ws: &mut WsStream,
|
|
|
|
|
id: u64,
|
|
|
|
|
session_id: &str,
|
|
|
|
|
method: &str,
|
|
|
|
|
params: serde_json::Value,
|
|
|
|
|
) -> Result<serde_json::Value, String> {
|
|
|
|
|
let msg = serde_json::json!({
|
|
|
|
|
"id": id,
|
|
|
|
|
"sessionId": session_id,
|
|
|
|
|
"method": method,
|
|
|
|
|
"params": params,
|
|
|
|
|
});
|
|
|
|
|
ws.send(Message::Text(msg.to_string().into()))
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("WS send failed: {e}"))?;
|
|
|
|
|
read_until_result(ws, id).await
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket.
|
|
|
|
|
async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result<Vec<u8>, String> {
|
|
|
|
|
use base64::Engine;
|
|
|
|
|
|
|
|
|
|
// Step 1: Discover browser WS endpoint via /json/version
|
|
|
|
|
let http_url = base_ws_url
|
|
|
|
|
.replace("ws://", "http://")
|
|
|
|
|
.replace("wss://", "https://");
|
|
|
|
|
let version_url = format!("{http_url}/json/version");
|
|
|
|
|
|
|
|
|
|
let version: serde_json::Value = reqwest::get(&version_url)
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))?
|
|
|
|
|
.json()
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("Invalid /json/version response: {e}"))?;
|
|
|
|
|
|
|
|
|
|
let browser_ws = version["webSocketDebuggerUrl"]
|
|
|
|
|
.as_str()
|
|
|
|
|
.ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?;
|
|
|
|
|
|
|
|
|
|
// Step 2: Connect to browser WS endpoint
|
|
|
|
|
let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws)
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
|
|
|
|
|
|
|
|
|
|
let mut id: u64 = 1;
|
|
|
|
|
|
|
|
|
|
// Step 3: Create a new target (tab)
|
|
|
|
|
let resp = cdp_send(
|
|
|
|
|
&mut ws,
|
|
|
|
|
id,
|
|
|
|
|
"Target.createTarget",
|
|
|
|
|
serde_json::json!({ "url": "about:blank" }),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
id += 1;
|
|
|
|
|
|
|
|
|
|
let target_id = resp
|
|
|
|
|
.get("result")
|
|
|
|
|
.and_then(|r| r.get("targetId"))
|
|
|
|
|
.and_then(|t| t.as_str())
|
|
|
|
|
.ok_or("No targetId in createTarget response")?
|
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
|
|
// Step 4: Attach to target
|
|
|
|
|
let resp = cdp_send(
|
|
|
|
|
&mut ws,
|
|
|
|
|
id,
|
|
|
|
|
"Target.attachToTarget",
|
|
|
|
|
serde_json::json!({ "targetId": target_id, "flatten": true }),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
id += 1;
|
|
|
|
|
|
|
|
|
|
let session_id = resp
|
|
|
|
|
.get("result")
|
|
|
|
|
.and_then(|r| r.get("sessionId"))
|
|
|
|
|
.and_then(|s| s.as_str())
|
|
|
|
|
.ok_or("No sessionId in attachToTarget response")?
|
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
|
|
// Step 5: Enable Page domain
|
|
|
|
|
cdp_send_session(
|
|
|
|
|
&mut ws,
|
|
|
|
|
id,
|
|
|
|
|
&session_id,
|
|
|
|
|
"Page.enable",
|
|
|
|
|
serde_json::json!({}),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
id += 1;
|
|
|
|
|
|
|
|
|
|
// Step 6: Set page content with the HTML
|
|
|
|
|
cdp_send_session(
|
|
|
|
|
&mut ws,
|
|
|
|
|
id,
|
|
|
|
|
&session_id,
|
|
|
|
|
"Page.setDocumentContent",
|
|
|
|
|
serde_json::json!({ "frameId": target_id, "html": html }),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
id += 1;
|
|
|
|
|
|
|
|
|
|
// Brief pause for rendering
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
|
|
|
|
|
|
|
|
|
// Step 7: Print to PDF
|
|
|
|
|
let pdf_response = cdp_send_session(
|
|
|
|
|
&mut ws,
|
|
|
|
|
id,
|
|
|
|
|
&session_id,
|
|
|
|
|
"Page.printToPDF",
|
|
|
|
|
serde_json::json!({
|
|
|
|
|
"printBackground": true,
|
|
|
|
|
"preferCSSPageSize": true,
|
|
|
|
|
"displayHeaderFooter": false,
|
|
|
|
|
}),
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
id += 1;
|
|
|
|
|
|
|
|
|
|
let pdf_b64 = pdf_response
|
|
|
|
|
.get("result")
|
|
|
|
|
.and_then(|r| r.get("data"))
|
|
|
|
|
.and_then(|d| d.as_str())
|
|
|
|
|
.ok_or("No PDF data in printToPDF response")?;
|
|
|
|
|
|
|
|
|
|
let pdf_bytes = base64::engine::general_purpose::STANDARD
|
|
|
|
|
.decode(pdf_b64)
|
|
|
|
|
.map_err(|e| format!("Failed to decode PDF base64: {e}"))?;
|
|
|
|
|
|
|
|
|
|
// Step 8: Close the target
|
|
|
|
|
let _ = cdp_send(
|
|
|
|
|
&mut ws,
|
|
|
|
|
id,
|
|
|
|
|
"Target.closeTarget",
|
|
|
|
|
serde_json::json!({ "targetId": target_id }),
|
|
|
|
|
)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
let _ = ws.close(None).await;
|
|
|
|
|
|
|
|
|
|
if pdf_bytes.is_empty() {
|
|
|
|
|
return Err("Chrome produced an empty PDF".to_string());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tracing::info!(
|
|
|
|
|
size_kb = pdf_bytes.len() / 1024,
|
|
|
|
|
"PDF report generated via CDP"
|
|
|
|
|
);
|
|
|
|
|
Ok(pdf_bytes)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Read WebSocket messages until we get a response matching the given id.
|
|
|
|
|
async fn read_until_result(ws: &mut WsStream, id: u64) -> Result<serde_json::Value, String> {
|
|
|
|
|
use futures_util::StreamExt;
|
|
|
|
|
|
|
|
|
|
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
|
|
|
|
|
loop {
|
|
|
|
|
let msg = tokio::time::timeout_at(deadline, ws.next())
|
|
|
|
|
.await
|
|
|
|
|
.map_err(|_| format!("Timeout waiting for CDP response id={id}"))?
|
|
|
|
|
.ok_or_else(|| "WebSocket closed unexpectedly".to_string())?
|
|
|
|
|
.map_err(|e| format!("WebSocket read error: {e}"))?;
|
|
|
|
|
|
|
|
|
|
if let Message::Text(text) = msg {
|
|
|
|
|
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
|
|
|
|
|
if val.get("id").and_then(|i| i.as_u64()) == Some(id) {
|
|
|
|
|
if let Some(err) = val.get("error") {
|
|
|
|
|
return Err(format!("CDP error: {err}"));
|
|
|
|
|
}
|
|
|
|
|
return Ok(val);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Fallback: generate PDF using a local Chrome/Chromium binary.
|
|
|
|
|
async fn local_chrome_pdf(html: &str) -> Result<Vec<u8>, String> {
|
|
|
|
|
let tmp_dir = std::env::temp_dir();
|
|
|
|
|
let run_id = uuid::Uuid::new_v4().to_string();
|
|
|
|
|
let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html"));
|
|
|
|
|
let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf"));
|
|
|
|
|
|
|
|
|
|
// Write HTML to temp file
|
|
|
|
|
std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?;
|
|
|
|
|
|
|
|
|
|
// Find Chrome/Chromium binary
|
|
|
|
|
let chrome_bin = find_chrome_binary().ok_or_else(|| {
|
|
|
|
|
"Chrome/Chromium not found. Install google-chrome or chromium to generate PDF reports."
|
|
|
|
|
"Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally."
|
|
|
|
|
.to_string()
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
@@ -36,7 +248,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
|
|
|
|
|
|
|
|
|
if !output.status.success() {
|
|
|
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
|
|
|
// Clean up temp files
|
|
|
|
|
let _ = std::fs::remove_file(&html_path);
|
|
|
|
|
let _ = std::fs::remove_file(&pdf_path);
|
|
|
|
|
return Err(format!("Chrome PDF generation failed: {stderr}"));
|
|
|
|
|
@@ -45,7 +256,6 @@ pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
|
|
|
|
|
let pdf_bytes =
|
|
|
|
|
std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?;
|
|
|
|
|
|
|
|
|
|
// Clean up temp files
|
|
|
|
|
let _ = std::fs::remove_file(&html_path);
|
|
|
|
|
let _ = std::fs::remove_file(&pdf_path);
|
|
|
|
|
|
|
|
|
|
|