Files
compliance-scanner-agent/compliance-dast/src/tools/console_log_detector.rs
Sharang Parnerkar 3bb690e5bb
All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Tests (push) Successful in 5m15s
CI / Detect Changes (push) Successful in 5s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s
refactor: modularize codebase and add 404 unit tests (#13)
2026-03-13 08:03:45 +00:00

424 lines
15 KiB
Rust

use compliance_core::error::CoreError;
use compliance_core::models::dast::{DastEvidence, DastFinding, DastVulnType};
use compliance_core::models::Severity;
use compliance_core::traits::pentest_tool::{PentestTool, PentestToolContext, PentestToolResult};
use serde_json::json;
use tracing::info;
/// Tool that detects console.log and similar debug statements in frontend JavaScript.
pub struct ConsoleLogDetectorTool {
http: reqwest::Client,
}
/// A detected console statement with its context.
#[derive(Debug)]
struct ConsoleMatch {
pattern: String,
file_url: String,
line_snippet: String,
line_number: Option<usize>,
}
impl ConsoleLogDetectorTool {
pub fn new(http: reqwest::Client) -> Self {
Self { http }
}
/// Patterns that indicate debug/logging statements left in production code.
fn patterns() -> Vec<&'static str> {
vec![
"console.log(",
"console.debug(",
"console.error(",
"console.warn(",
"console.info(",
"console.trace(",
"console.dir(",
"console.table(",
"debugger;",
"alert(",
]
}
/// Extract JavaScript file URLs from an HTML page body.
fn extract_js_urls(html: &str, base_url: &str) -> Vec<String> {
let mut urls = Vec::new();
let base = url::Url::parse(base_url).ok();
// Simple regex-free extraction of <script src="...">
let mut search_from = 0;
while let Some(start) = html[search_from..].find("src=") {
let abs_start = search_from + start + 4;
if abs_start >= html.len() {
break;
}
let quote = html.as_bytes().get(abs_start).copied();
let (_open, close) = match quote {
Some(b'"') => ('"', '"'),
Some(b'\'') => ('\'', '\''),
_ => {
search_from = abs_start + 1;
continue;
}
};
let val_start = abs_start + 1;
if let Some(end) = html[val_start..].find(close) {
let src = &html[val_start..val_start + end];
if src.ends_with(".js") || src.contains(".js?") || src.contains("/js/") {
let full_url = if src.starts_with("http://") || src.starts_with("https://") {
src.to_string()
} else if src.starts_with("//") {
format!("https:{src}")
} else if let Some(ref base) = base {
base.join(src).map(|u| u.to_string()).unwrap_or_default()
} else {
format!("{base_url}/{}", src.trim_start_matches('/'))
};
if !full_url.is_empty() {
urls.push(full_url);
}
}
search_from = val_start + end + 1;
} else {
break;
}
}
urls
}
/// Search a JS file's contents for console/debug patterns.
fn scan_js_content(content: &str, file_url: &str) -> Vec<ConsoleMatch> {
let mut matches = Vec::new();
for (line_num, line) in content.lines().enumerate() {
let trimmed = line.trim();
// Skip comments (basic heuristic)
if trimmed.starts_with("//") || trimmed.starts_with('*') || trimmed.starts_with("/*") {
continue;
}
for pattern in Self::patterns() {
if line.contains(pattern) {
let snippet = if line.len() > 200 {
format!("{}...", &line[..200])
} else {
line.to_string()
};
matches.push(ConsoleMatch {
pattern: pattern.trim_end_matches('(').to_string(),
file_url: file_url.to_string(),
line_snippet: snippet.trim().to_string(),
line_number: Some(line_num + 1),
});
break; // One match per line is enough
}
}
}
matches
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_js_urls_from_html() {
let html = r#"
<html>
<head>
<script src="/static/app.js"></script>
<script src="https://cdn.example.com/lib.js"></script>
<script src='//cdn2.example.com/vendor.js'></script>
</head>
</html>
"#;
let urls = ConsoleLogDetectorTool::extract_js_urls(html, "https://example.com");
assert_eq!(urls.len(), 3);
assert!(urls.contains(&"https://example.com/static/app.js".to_string()));
assert!(urls.contains(&"https://cdn.example.com/lib.js".to_string()));
assert!(urls.contains(&"https://cdn2.example.com/vendor.js".to_string()));
}
#[test]
fn extract_js_urls_no_scripts() {
let html = "<html><body><p>Hello</p></body></html>";
let urls = ConsoleLogDetectorTool::extract_js_urls(html, "https://example.com");
assert!(urls.is_empty());
}
#[test]
fn extract_js_urls_filters_non_js() {
let html = r#"<link src="/style.css"><script src="/app.js"></script>"#;
let urls = ConsoleLogDetectorTool::extract_js_urls(html, "https://example.com");
// Only .js files should be extracted
assert_eq!(urls.len(), 1);
assert!(urls[0].ends_with("/app.js"));
}
#[test]
fn scan_js_content_finds_console_log() {
let js = r#"
function init() {
console.log("debug info");
doStuff();
}
"#;
let matches = ConsoleLogDetectorTool::scan_js_content(js, "https://example.com/app.js");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pattern, "console.log");
assert_eq!(matches[0].line_number, Some(3));
}
#[test]
fn scan_js_content_finds_multiple_patterns() {
let js =
"console.log('a');\nconsole.debug('b');\nconsole.error('c');\ndebugger;\nalert('x');";
let matches = ConsoleLogDetectorTool::scan_js_content(js, "test.js");
assert_eq!(matches.len(), 5);
}
#[test]
fn scan_js_content_skips_comments() {
let js = "// console.log('commented out');\n* console.log('also comment');\n/* console.log('block comment') */";
let matches = ConsoleLogDetectorTool::scan_js_content(js, "test.js");
assert!(matches.is_empty());
}
#[test]
fn scan_js_content_one_match_per_line() {
let js = "console.log('a'); console.debug('b');";
let matches = ConsoleLogDetectorTool::scan_js_content(js, "test.js");
// Only one match per line
assert_eq!(matches.len(), 1);
}
#[test]
fn scan_js_content_empty_input() {
let matches = ConsoleLogDetectorTool::scan_js_content("", "test.js");
assert!(matches.is_empty());
}
#[test]
fn patterns_list_is_not_empty() {
let patterns = ConsoleLogDetectorTool::patterns();
assert!(patterns.len() >= 8);
assert!(patterns.contains(&"console.log("));
assert!(patterns.contains(&"debugger;"));
}
}
impl PentestTool for ConsoleLogDetectorTool {
fn name(&self) -> &str {
"console_log_detector"
}
fn description(&self) -> &str {
"Detects console.log, console.debug, console.error, debugger, and similar debug \
statements left in production JavaScript. Fetches the HTML page and referenced JS files."
}
fn input_schema(&self) -> serde_json::Value {
json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL of the page to check for console.log leakage"
},
"additional_js_urls": {
"type": "array",
"description": "Optional additional JavaScript file URLs to scan",
"items": { "type": "string" }
}
},
"required": ["url"]
})
}
fn execute<'a>(
&'a self,
input: serde_json::Value,
context: &'a PentestToolContext,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<PentestToolResult, CoreError>> + Send + 'a>,
> {
Box::pin(async move {
let url = input
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| CoreError::Dast("Missing required 'url' parameter".to_string()))?;
let additional_js: Vec<String> = input
.get("additional_js_urls")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let target_id = context
.target
.id
.map(|oid| oid.to_hex())
.unwrap_or_else(|| "unknown".to_string());
// Fetch the main page
let response = self
.http
.get(url)
.send()
.await
.map_err(|e| CoreError::Dast(format!("Failed to fetch {url}: {e}")))?;
let html = response.text().await.unwrap_or_default();
// Scan inline scripts in the HTML
let mut all_matches = Vec::new();
let inline_matches = Self::scan_js_content(&html, url);
all_matches.extend(inline_matches);
// Extract JS file URLs from the HTML
let mut js_urls = Self::extract_js_urls(&html, url);
js_urls.extend(additional_js);
js_urls.dedup();
// Fetch and scan each JS file
for js_url in &js_urls {
match self.http.get(js_url).send().await {
Ok(resp) => {
if resp.status().is_success() {
let js_content = resp.text().await.unwrap_or_default();
// Only scan non-minified-looking files or files where we can still
// find patterns (minifiers typically strip console calls, but not always)
let file_matches = Self::scan_js_content(&js_content, js_url);
all_matches.extend(file_matches);
}
}
Err(_) => continue,
}
}
let mut findings = Vec::new();
let match_data: Vec<serde_json::Value> = all_matches
.iter()
.map(|m| {
json!({
"pattern": m.pattern,
"file": m.file_url,
"line": m.line_number,
"snippet": m.line_snippet,
})
})
.collect();
if !all_matches.is_empty() {
// Group by file for the finding
let mut by_file: std::collections::HashMap<&str, Vec<&ConsoleMatch>> =
std::collections::HashMap::new();
for m in &all_matches {
by_file.entry(&m.file_url).or_default().push(m);
}
for (file_url, matches) in &by_file {
let pattern_summary: Vec<String> = matches
.iter()
.take(5)
.map(|m| {
format!(
" Line {}: {} - {}",
m.line_number.unwrap_or(0),
m.pattern,
if m.line_snippet.len() > 80 {
format!("{}...", &m.line_snippet[..80])
} else {
m.line_snippet.clone()
}
)
})
.collect();
let evidence = DastEvidence {
request_method: "GET".to_string(),
request_url: file_url.to_string(),
request_headers: None,
request_body: None,
response_status: 200,
response_headers: None,
response_snippet: Some(pattern_summary.join("\n")),
screenshot_path: None,
payload: None,
response_time_ms: None,
};
let total = matches.len();
let extra = if total > 5 {
format!(" (and {} more)", total - 5)
} else {
String::new()
};
let mut finding = DastFinding::new(
String::new(),
target_id.clone(),
DastVulnType::ConsoleLogLeakage,
format!("Console/debug statements in {}", file_url),
format!(
"Found {total} console/debug statements in {file_url}{extra}. \
These can leak sensitive information such as API responses, user data, \
or internal state to anyone with browser developer tools open."
),
Severity::Low,
file_url.to_string(),
"GET".to_string(),
);
finding.cwe = Some("CWE-532".to_string());
finding.evidence = vec![evidence];
finding.remediation = Some(
"Remove console.log/debug/error statements from production code. \
Use a build step (e.g., babel plugin, terser) to strip console calls \
during the production build."
.to_string(),
);
findings.push(finding);
}
}
let total_matches = all_matches.len();
let count = findings.len();
info!(
url,
js_files = js_urls.len(),
total_matches,
"Console log detection complete"
);
Ok(PentestToolResult {
summary: if total_matches > 0 {
format!(
"Found {total_matches} console/debug statements across {} files.",
count
)
} else {
format!(
"No console/debug statements found in HTML or {} JS files.",
js_urls.len()
)
},
findings,
data: json!({
"total_matches": total_matches,
"js_files_scanned": js_urls.len(),
"matches": match_data,
}),
})
})
}
}