refactor: modularize codebase and add 404 unit tests (#13)
All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s

This commit was merged in pull request #13.
This commit is contained in:
2026-03-13 08:03:45 +00:00
parent acc5b86aa4
commit 3bb690e5bb
89 changed files with 11884 additions and 6046 deletions

View File

@@ -0,0 +1,251 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
use super::run_with_timeout;
pub(super) async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("cargo")
.args([
"clippy",
"--message-format=json",
"--quiet",
"--",
"-W",
"clippy::all",
])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "clippy".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "clippy").await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let mut findings = Vec::new();
for line in stdout.lines() {
let msg: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
continue;
}
let message = match msg.get("message") {
Some(m) => m,
None => continue,
};
let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
if level != "warning" && level != "error" {
continue;
}
let text = message
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let code = message
.get("code")
.and_then(|v| v.get("code"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
if text.starts_with("aborting due to") || code.is_empty() {
continue;
}
let (file_path, line_number) = extract_primary_span(message);
let severity = if level == "error" {
Severity::High
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"clippy",
&code,
&file_path,
&line_number.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"clippy".to_string(),
ScanType::Lint,
format!("[clippy] {text}"),
text,
severity,
);
finding.rule_id = Some(code);
if !file_path.is_empty() {
finding.file_path = Some(file_path);
}
if line_number > 0 {
finding.line_number = Some(line_number);
}
findings.push(finding);
}
Ok(findings)
}
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
let spans = match message.get("spans").and_then(|v| v.as_array()) {
Some(s) => s,
None => return (String::new(), 0),
};
for span in spans {
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
let file = span
.get("file_name")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
return (file, line);
}
}
(String::new(), 0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_primary_span_with_primary() {
let msg = serde_json::json!({
"spans": [
{
"file_name": "src/lib.rs",
"line_start": 42,
"is_primary": true
}
]
});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "src/lib.rs");
assert_eq!(line, 42);
}
#[test]
fn extract_primary_span_no_primary() {
let msg = serde_json::json!({
"spans": [
{
"file_name": "src/lib.rs",
"line_start": 42,
"is_primary": false
}
]
});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "");
assert_eq!(line, 0);
}
#[test]
fn extract_primary_span_multiple_spans() {
let msg = serde_json::json!({
"spans": [
{
"file_name": "src/other.rs",
"line_start": 10,
"is_primary": false
},
{
"file_name": "src/main.rs",
"line_start": 99,
"is_primary": true
}
]
});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "src/main.rs");
assert_eq!(line, 99);
}
#[test]
fn extract_primary_span_no_spans() {
let msg = serde_json::json!({});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "");
assert_eq!(line, 0);
}
#[test]
fn extract_primary_span_empty_spans() {
let msg = serde_json::json!({ "spans": [] });
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "");
assert_eq!(line, 0);
}
#[test]
fn parse_clippy_compiler_message_line() {
let line = r#"{"reason":"compiler-message","message":{"level":"warning","message":"unused variable","code":{"code":"unused_variables"},"spans":[{"file_name":"src/main.rs","line_start":5,"is_primary":true}]}}"#;
let msg: serde_json::Value = serde_json::from_str(line).unwrap();
assert_eq!(
msg.get("reason").and_then(|v| v.as_str()),
Some("compiler-message")
);
let message = msg.get("message").unwrap();
assert_eq!(
message.get("level").and_then(|v| v.as_str()),
Some("warning")
);
assert_eq!(
message.get("message").and_then(|v| v.as_str()),
Some("unused variable")
);
assert_eq!(
message
.get("code")
.and_then(|v| v.get("code"))
.and_then(|v| v.as_str()),
Some("unused_variables")
);
let (file, line_num) = extract_primary_span(message);
assert_eq!(file, "src/main.rs");
assert_eq!(line_num, 5);
}
#[test]
fn skip_non_compiler_message() {
let line = r#"{"reason":"build-script-executed","package_id":"foo 0.1.0"}"#;
let msg: serde_json::Value = serde_json::from_str(line).unwrap();
assert_ne!(
msg.get("reason").and_then(|v| v.as_str()),
Some("compiler-message")
);
}
#[test]
fn skip_aborting_message() {
let text = "aborting due to 3 previous errors";
assert!(text.starts_with("aborting due to"));
}
}

View File

@@ -0,0 +1,183 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
use super::run_with_timeout;
pub(super) async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
// Use the project-local eslint binary directly, not npx (which can hang downloading)
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
let child = Command::new(eslint_bin)
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "eslint".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "eslint").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
let mut findings = Vec::new();
for file_result in results {
for msg in file_result.messages {
let severity = match msg.severity {
2 => Severity::Medium,
_ => Severity::Low,
};
let rule_id = msg.rule_id.unwrap_or_default();
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"eslint",
&rule_id,
&file_result.file_path,
&msg.line.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"eslint".to_string(),
ScanType::Lint,
format!("[eslint] {}", msg.message),
msg.message,
severity,
);
finding.rule_id = Some(rule_id);
finding.file_path = Some(file_result.file_path.clone());
finding.line_number = Some(msg.line);
findings.push(finding);
}
}
Ok(findings)
}
#[derive(serde::Deserialize)]
struct EslintFileResult {
#[serde(rename = "filePath")]
file_path: String,
messages: Vec<EslintMessage>,
}
#[derive(serde::Deserialize)]
struct EslintMessage {
#[serde(rename = "ruleId")]
rule_id: Option<String>,
severity: u8,
message: String,
line: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deserialize_eslint_output() {
let json = r#"[
{
"filePath": "/home/user/project/src/app.js",
"messages": [
{
"ruleId": "no-unused-vars",
"severity": 2,
"message": "'x' is defined but never used.",
"line": 10
},
{
"ruleId": "semi",
"severity": 1,
"message": "Missing semicolon.",
"line": 15
}
]
}
]"#;
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].file_path, "/home/user/project/src/app.js");
assert_eq!(results[0].messages.len(), 2);
assert_eq!(
results[0].messages[0].rule_id,
Some("no-unused-vars".to_string())
);
assert_eq!(results[0].messages[0].severity, 2);
assert_eq!(results[0].messages[0].line, 10);
assert_eq!(results[0].messages[1].severity, 1);
}
#[test]
fn deserialize_eslint_null_rule_id() {
let json = r#"[
{
"filePath": "src/index.js",
"messages": [
{
"ruleId": null,
"severity": 2,
"message": "Parsing error: Unexpected token",
"line": 1
}
]
}
]"#;
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert_eq!(results[0].messages[0].rule_id, None);
}
#[test]
fn deserialize_eslint_empty_messages() {
let json = r#"[{"filePath": "src/clean.js", "messages": []}]"#;
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert_eq!(results[0].messages.len(), 0);
}
#[test]
fn deserialize_eslint_empty_array() {
let json = "[]";
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert!(results.is_empty());
}
#[test]
fn eslint_severity_mapping() {
// severity 2 = error -> Medium, anything else -> Low
assert_eq!(
match 2u8 {
2 => "Medium",
_ => "Low",
},
"Medium"
);
assert_eq!(
match 1u8 {
2 => "Medium",
_ => "Low",
},
"Low"
);
assert_eq!(
match 0u8 {
2 => "Medium",
_ => "Low",
},
"Low"
);
}
}

View File

@@ -0,0 +1,97 @@
mod clippy;
mod eslint;
mod ruff;
use std::path::Path;
use std::time::Duration;
use compliance_core::models::ScanType;
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
/// Timeout for each individual lint command
pub(crate) const LINT_TIMEOUT: Duration = Duration::from_secs(120);
pub struct LintScanner;
impl Scanner for LintScanner {
fn name(&self) -> &str {
"lint"
}
fn scan_type(&self) -> ScanType {
ScanType::Lint
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut all_findings = Vec::new();
// Detect which languages are present and run appropriate linters
if has_rust_project(repo_path) {
match clippy::run_clippy(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Clippy failed: {e}"),
}
}
if has_js_project(repo_path) {
match eslint::run_eslint(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("ESLint failed: {e}"),
}
}
if has_python_project(repo_path) {
match ruff::run_ruff(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Ruff failed: {e}"),
}
}
Ok(ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
})
}
}
fn has_rust_project(repo_path: &Path) -> bool {
repo_path.join("Cargo.toml").exists()
}
fn has_js_project(repo_path: &Path) -> bool {
// Only run if eslint is actually installed in the project
repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
}
fn has_python_project(repo_path: &Path) -> bool {
repo_path.join("pyproject.toml").exists()
|| repo_path.join("setup.py").exists()
|| repo_path.join("requirements.txt").exists()
}
/// Run a command with a timeout, returning its output or an error
pub(crate) async fn run_with_timeout(
child: tokio::process::Child,
scanner_name: &str,
) -> Result<std::process::Output, CoreError> {
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
match result {
Ok(Ok(output)) => Ok(output),
Ok(Err(e)) => Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(e),
}),
Err(_) => {
// Process is dropped here which sends SIGKILL on Unix
Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
)),
})
}
}
}

View File

@@ -0,0 +1,150 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
use super::run_with_timeout;
pub(super) async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("ruff")
.args(["check", ".", "--output-format", "json", "--exit-zero"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "ruff".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "ruff").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.map(|r| {
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
Severity::Medium
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"ruff",
&r.code,
&r.filename,
&r.location.row.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"ruff".to_string(),
ScanType::Lint,
format!("[ruff] {}: {}", r.code, r.message),
r.message,
severity,
);
finding.rule_id = Some(r.code);
finding.file_path = Some(r.filename);
finding.line_number = Some(r.location.row);
finding
})
.collect();
Ok(findings)
}
#[derive(serde::Deserialize)]
struct RuffResult {
code: String,
message: String,
filename: String,
location: RuffLocation,
}
#[derive(serde::Deserialize)]
struct RuffLocation {
row: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deserialize_ruff_output() {
let json = r#"[
{
"code": "E501",
"message": "Line too long (120 > 79 characters)",
"filename": "src/main.py",
"location": {"row": 42}
},
{
"code": "F401",
"message": "`os` imported but unused",
"filename": "src/utils.py",
"location": {"row": 1}
}
]"#;
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].code, "E501");
assert_eq!(results[0].filename, "src/main.py");
assert_eq!(results[0].location.row, 42);
assert_eq!(results[1].code, "F401");
assert_eq!(results[1].location.row, 1);
}
#[test]
fn deserialize_ruff_empty() {
let json = "[]";
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
assert!(results.is_empty());
}
#[test]
fn ruff_severity_e_and_f_are_medium() {
for code in &["E501", "E302", "F401", "F811"] {
let is_medium = code.starts_with('E') || code.starts_with('F');
assert!(is_medium, "Expected {code} to be Medium severity");
}
}
#[test]
fn ruff_severity_others_are_low() {
for code in &["W291", "I001", "D100", "C901", "N801"] {
let is_medium = code.starts_with('E') || code.starts_with('F');
assert!(!is_medium, "Expected {code} to be Low severity");
}
}
#[test]
fn deserialize_ruff_with_extra_fields() {
// Ruff output may contain additional fields we don't use
let json = r#"[{
"code": "W291",
"message": "Trailing whitespace",
"filename": "app.py",
"location": {"row": 3, "column": 10},
"end_location": {"row": 3, "column": 11},
"fix": null,
"noqa_row": 3
}]"#;
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].code, "W291");
}
}