feat: findings refinement, new scanners, and deployment tooling (#6)
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s
This commit was merged in pull request #6.
This commit is contained in:
186
compliance-agent/src/pipeline/code_review.rs
Normal file
186
compliance-agent/src/pipeline/code_review.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::traits::ScanOutput;
|
||||
|
||||
use crate::llm::review_prompts::REVIEW_PASSES;
|
||||
use crate::llm::LlmClient;
|
||||
use crate::pipeline::dedup;
|
||||
use crate::pipeline::git::{DiffFile, GitOps};
|
||||
|
||||
pub struct CodeReviewScanner {
|
||||
llm: Arc<LlmClient>,
|
||||
}
|
||||
|
||||
impl CodeReviewScanner {
|
||||
pub fn new(llm: Arc<LlmClient>) -> Self {
|
||||
Self { llm }
|
||||
}
|
||||
|
||||
/// Run multi-pass LLM code review on the diff between old and new commits.
|
||||
pub async fn review_diff(
|
||||
&self,
|
||||
repo_path: &Path,
|
||||
repo_id: &str,
|
||||
old_sha: &str,
|
||||
new_sha: &str,
|
||||
) -> ScanOutput {
|
||||
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
|
||||
Ok(files) => files,
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to extract diff for code review: {e}");
|
||||
return ScanOutput::default();
|
||||
}
|
||||
};
|
||||
|
||||
if diff_files.is_empty() {
|
||||
return ScanOutput::default();
|
||||
}
|
||||
|
||||
let mut all_findings = Vec::new();
|
||||
|
||||
// Chunk diff files into groups to avoid exceeding context limits
|
||||
let chunks = chunk_diff_files(&diff_files, 8000);
|
||||
|
||||
for (pass_name, system_prompt) in REVIEW_PASSES {
|
||||
for chunk in &chunks {
|
||||
let user_prompt = format!(
|
||||
"Review the following code changes:\n\n{}",
|
||||
chunk
|
||||
.iter()
|
||||
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
);
|
||||
|
||||
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
|
||||
Ok(response) => {
|
||||
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
|
||||
all_findings.extend(parsed);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ScanOutput {
|
||||
findings: all_findings,
|
||||
sbom_entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Group diff files into chunks that fit within a token budget (rough char estimate)
|
||||
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
|
||||
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
|
||||
let mut current_chunk: Vec<&DiffFile> = Vec::new();
|
||||
let mut current_size = 0;
|
||||
|
||||
for file in files {
|
||||
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
|
||||
chunks.push(std::mem::take(&mut current_chunk));
|
||||
current_size = 0;
|
||||
}
|
||||
current_chunk.push(file);
|
||||
current_size += file.hunks.len();
|
||||
}
|
||||
|
||||
if !current_chunk.is_empty() {
|
||||
chunks.push(current_chunk);
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
fn parse_review_response(
|
||||
response: &str,
|
||||
pass_name: &str,
|
||||
repo_id: &str,
|
||||
chunk: &[&DiffFile],
|
||||
) -> Vec<Finding> {
|
||||
let cleaned = response.trim();
|
||||
let cleaned = if cleaned.starts_with("```") {
|
||||
cleaned
|
||||
.trim_start_matches("```json")
|
||||
.trim_start_matches("```")
|
||||
.trim_end_matches("```")
|
||||
.trim()
|
||||
} else {
|
||||
cleaned
|
||||
};
|
||||
|
||||
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
if cleaned != "[]" {
|
||||
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
|
||||
}
|
||||
return Vec::new();
|
||||
}
|
||||
};
|
||||
|
||||
issues
|
||||
.into_iter()
|
||||
.filter(|issue| {
|
||||
// Verify the file exists in the diff chunk
|
||||
chunk.iter().any(|f| f.path == issue.file)
|
||||
})
|
||||
.map(|issue| {
|
||||
let severity = match issue.severity.as_str() {
|
||||
"critical" => Severity::Critical,
|
||||
"high" => Severity::High,
|
||||
"medium" => Severity::Medium,
|
||||
"low" => Severity::Low,
|
||||
_ => Severity::Info,
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"code-review",
|
||||
pass_name,
|
||||
&issue.file,
|
||||
&issue.line.to_string(),
|
||||
&issue.title,
|
||||
]);
|
||||
|
||||
let description = if let Some(suggestion) = &issue.suggestion {
|
||||
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
|
||||
} else {
|
||||
issue.description.clone()
|
||||
};
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
format!("code-review/{pass_name}"),
|
||||
ScanType::CodeReview,
|
||||
issue.title,
|
||||
description,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(format!("review/{pass_name}"));
|
||||
finding.file_path = Some(issue.file);
|
||||
finding.line_number = Some(issue.line);
|
||||
finding.cwe = issue.cwe;
|
||||
finding.suggested_fix = issue.suggestion;
|
||||
finding
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct ReviewIssue {
|
||||
title: String,
|
||||
description: String,
|
||||
severity: String,
|
||||
file: String,
|
||||
#[serde(default)]
|
||||
line: u32,
|
||||
#[serde(default)]
|
||||
cwe: Option<String>,
|
||||
#[serde(default)]
|
||||
suggestion: Option<String>,
|
||||
}
|
||||
@@ -64,6 +64,8 @@ impl CveScanner {
|
||||
}
|
||||
|
||||
async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
|
||||
const OSV_BATCH_SIZE: usize = 500;
|
||||
|
||||
let queries: Vec<_> = entries
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
@@ -79,32 +81,34 @@ impl CveScanner {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let body = serde_json::json!({ "queries": queries });
|
||||
let mut all_vulns: Vec<Vec<OsvVuln>> = Vec::with_capacity(queries.len());
|
||||
|
||||
let resp = self
|
||||
.http
|
||||
.post("https://api.osv.dev/v1/querybatch")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
|
||||
for chunk in queries.chunks(OSV_BATCH_SIZE) {
|
||||
let body = serde_json::json!({ "queries": chunk });
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
tracing::warn!("OSV.dev returned {status}: {body}");
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let resp = self
|
||||
.http
|
||||
.post("https://api.osv.dev/v1/querybatch")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
|
||||
|
||||
let result: OsvBatchResponse = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
tracing::warn!("OSV.dev returned {status}: {body}");
|
||||
// Push empty results for this chunk so indices stay aligned
|
||||
all_vulns.extend(std::iter::repeat_with(Vec::new).take(chunk.len()));
|
||||
continue;
|
||||
}
|
||||
|
||||
let vulns = result
|
||||
.results
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let result: OsvBatchResponse = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
|
||||
|
||||
let chunk_vulns = result.results.into_iter().map(|r| {
|
||||
r.vulns
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
@@ -116,10 +120,12 @@ impl CveScanner {
|
||||
}),
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
});
|
||||
|
||||
Ok(vulns)
|
||||
all_vulns.extend(chunk_vulns);
|
||||
}
|
||||
|
||||
Ok(all_vulns)
|
||||
}
|
||||
|
||||
async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {
|
||||
|
||||
@@ -1,17 +1,80 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use git2::{FetchOptions, Repository};
|
||||
use git2::{Cred, FetchOptions, RemoteCallbacks, Repository};
|
||||
|
||||
use crate::error::AgentError;
|
||||
|
||||
/// Credentials for accessing a private repository
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RepoCredentials {
|
||||
/// Path to the SSH private key (for SSH URLs)
|
||||
pub ssh_key_path: Option<String>,
|
||||
/// Auth token / password (for HTTPS URLs)
|
||||
pub auth_token: Option<String>,
|
||||
/// Username for HTTPS auth (defaults to "x-access-token")
|
||||
pub auth_username: Option<String>,
|
||||
}
|
||||
|
||||
impl RepoCredentials {
|
||||
pub(crate) fn make_callbacks(&self) -> RemoteCallbacks<'_> {
|
||||
let mut callbacks = RemoteCallbacks::new();
|
||||
let ssh_key = self.ssh_key_path.clone();
|
||||
let token = self.auth_token.clone();
|
||||
let username = self.auth_username.clone();
|
||||
|
||||
callbacks.credentials(move |_url, username_from_url, allowed_types| {
|
||||
// SSH key authentication
|
||||
if allowed_types.contains(git2::CredentialType::SSH_KEY) {
|
||||
if let Some(ref key_path) = ssh_key {
|
||||
let key = Path::new(key_path);
|
||||
if key.exists() {
|
||||
let user = username_from_url.unwrap_or("git");
|
||||
return Cred::ssh_key(user, None, key, None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPS userpass authentication
|
||||
if allowed_types.contains(git2::CredentialType::USER_PASS_PLAINTEXT) {
|
||||
if let Some(ref tok) = token {
|
||||
let user = username.as_deref().unwrap_or("x-access-token");
|
||||
return Cred::userpass_plaintext(user, tok);
|
||||
}
|
||||
}
|
||||
|
||||
Cred::default()
|
||||
});
|
||||
|
||||
callbacks
|
||||
}
|
||||
|
||||
fn fetch_options(&self) -> FetchOptions<'_> {
|
||||
let mut fetch_opts = FetchOptions::new();
|
||||
if self.has_credentials() {
|
||||
fetch_opts.remote_callbacks(self.make_callbacks());
|
||||
}
|
||||
fetch_opts
|
||||
}
|
||||
|
||||
fn has_credentials(&self) -> bool {
|
||||
self.ssh_key_path
|
||||
.as_ref()
|
||||
.map(|p| Path::new(p).exists())
|
||||
.unwrap_or(false)
|
||||
|| self.auth_token.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GitOps {
|
||||
base_path: PathBuf,
|
||||
credentials: RepoCredentials,
|
||||
}
|
||||
|
||||
impl GitOps {
|
||||
pub fn new(base_path: &str) -> Self {
|
||||
pub fn new(base_path: &str, credentials: RepoCredentials) -> Self {
|
||||
Self {
|
||||
base_path: PathBuf::from(base_path),
|
||||
credentials,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,17 +85,25 @@ impl GitOps {
|
||||
self.fetch(&repo_path)?;
|
||||
} else {
|
||||
std::fs::create_dir_all(&repo_path)?;
|
||||
Repository::clone(git_url, &repo_path)?;
|
||||
self.clone_repo(git_url, &repo_path)?;
|
||||
tracing::info!("Cloned {git_url} to {}", repo_path.display());
|
||||
}
|
||||
|
||||
Ok(repo_path)
|
||||
}
|
||||
|
||||
fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> {
|
||||
let mut builder = git2::build::RepoBuilder::new();
|
||||
let fetch_opts = self.credentials.fetch_options();
|
||||
builder.fetch_options(fetch_opts);
|
||||
builder.clone(git_url, repo_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn fetch(&self, repo_path: &Path) -> Result<(), AgentError> {
|
||||
let repo = Repository::open(repo_path)?;
|
||||
let mut remote = repo.find_remote("origin")?;
|
||||
let mut fetch_opts = FetchOptions::new();
|
||||
let mut fetch_opts = self.credentials.fetch_options();
|
||||
remote.fetch(&[] as &[&str], Some(&mut fetch_opts), None)?;
|
||||
|
||||
// Fast-forward to origin/HEAD
|
||||
@@ -48,6 +119,15 @@ impl GitOps {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Test that we can access a remote repository (used during add validation)
|
||||
pub fn test_access(git_url: &str, credentials: &RepoCredentials) -> Result<(), AgentError> {
|
||||
let mut remote = git2::Remote::create_detached(git_url)?;
|
||||
let callbacks = credentials.make_callbacks();
|
||||
remote.connect_auth(git2::Direction::Fetch, Some(callbacks), None)?;
|
||||
remote.disconnect()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
|
||||
let repo = Repository::open(repo_path)?;
|
||||
let head = repo.head()?;
|
||||
@@ -63,6 +143,62 @@ impl GitOps {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract structured diff content between two commits
|
||||
pub fn get_diff_content(
|
||||
repo_path: &Path,
|
||||
old_sha: &str,
|
||||
new_sha: &str,
|
||||
) -> Result<Vec<DiffFile>, AgentError> {
|
||||
let repo = Repository::open(repo_path)?;
|
||||
let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
|
||||
let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
|
||||
|
||||
let old_tree = old_commit.tree()?;
|
||||
let new_tree = new_commit.tree()?;
|
||||
|
||||
let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
|
||||
|
||||
let mut diff_files: Vec<DiffFile> = Vec::new();
|
||||
|
||||
diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
|
||||
let file_path = delta
|
||||
.new_file()
|
||||
.path()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Find or create the DiffFile entry
|
||||
let idx = if let Some(pos) = diff_files.iter().position(|f| f.path == file_path) {
|
||||
pos
|
||||
} else {
|
||||
diff_files.push(DiffFile {
|
||||
path: file_path,
|
||||
hunks: String::new(),
|
||||
});
|
||||
diff_files.len() - 1
|
||||
};
|
||||
let diff_file = &mut diff_files[idx];
|
||||
|
||||
let prefix = match line.origin() {
|
||||
'+' => "+",
|
||||
'-' => "-",
|
||||
' ' => " ",
|
||||
_ => "",
|
||||
};
|
||||
|
||||
let content = std::str::from_utf8(line.content()).unwrap_or("");
|
||||
diff_file.hunks.push_str(prefix);
|
||||
diff_file.hunks.push_str(content);
|
||||
|
||||
true
|
||||
})?;
|
||||
|
||||
// Filter out binary files and very large diffs
|
||||
diff_files.retain(|f| !f.hunks.is_empty() && f.hunks.len() < 50_000);
|
||||
|
||||
Ok(diff_files)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_changed_files(
|
||||
repo_path: &Path,
|
||||
@@ -94,3 +230,10 @@ impl GitOps {
|
||||
Ok(files)
|
||||
}
|
||||
}
|
||||
|
||||
/// A file changed between two commits with its diff content
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DiffFile {
|
||||
pub path: String,
|
||||
pub hunks: String,
|
||||
}
|
||||
|
||||
130
compliance-agent/src/pipeline/gitleaks.rs
Normal file
130
compliance-agent/src/pipeline/gitleaks.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::traits::{ScanOutput, Scanner};
|
||||
use compliance_core::CoreError;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
pub struct GitleaksScanner;
|
||||
|
||||
impl Scanner for GitleaksScanner {
|
||||
fn name(&self) -> &str {
|
||||
"gitleaks"
|
||||
}
|
||||
|
||||
fn scan_type(&self) -> ScanType {
|
||||
ScanType::SecretDetection
|
||||
}
|
||||
|
||||
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
||||
let output = tokio::process::Command::new("gitleaks")
|
||||
.args([
|
||||
"detect",
|
||||
"--source",
|
||||
".",
|
||||
"--report-format",
|
||||
"json",
|
||||
"--report-path",
|
||||
"/dev/stdout",
|
||||
"--no-banner",
|
||||
"--exit-code",
|
||||
"0",
|
||||
])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "gitleaks".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(ScanOutput::default());
|
||||
}
|
||||
|
||||
let results: Vec<GitleaksResult> =
|
||||
serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let findings = results
|
||||
.into_iter()
|
||||
.filter(|r| !is_allowlisted(&r.file))
|
||||
.map(|r| {
|
||||
let severity = match r.rule_id.as_str() {
|
||||
s if s.contains("private-key") => Severity::Critical,
|
||||
s if s.contains("token") || s.contains("password") || s.contains("secret") => {
|
||||
Severity::High
|
||||
}
|
||||
s if s.contains("api-key") => Severity::High,
|
||||
_ => Severity::Medium,
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
&r.rule_id,
|
||||
&r.file,
|
||||
&r.start_line.to_string(),
|
||||
]);
|
||||
|
||||
let title = format!("Secret detected: {}", r.description);
|
||||
let description = format!(
|
||||
"Potential secret ({}) found in {}:{}. Match: {}",
|
||||
r.rule_id,
|
||||
r.file,
|
||||
r.start_line,
|
||||
r.r#match.chars().take(80).collect::<String>(),
|
||||
);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"gitleaks".to_string(),
|
||||
ScanType::SecretDetection,
|
||||
title,
|
||||
description,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(r.rule_id);
|
||||
finding.file_path = Some(r.file);
|
||||
finding.line_number = Some(r.start_line);
|
||||
finding.code_snippet = Some(r.r#match);
|
||||
finding
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(ScanOutput {
|
||||
findings,
|
||||
sbom_entries: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip files that commonly contain example/placeholder secrets
|
||||
fn is_allowlisted(file_path: &str) -> bool {
|
||||
let lower = file_path.to_lowercase();
|
||||
lower.ends_with(".env.example")
|
||||
|| lower.ends_with(".env.sample")
|
||||
|| lower.ends_with(".env.template")
|
||||
|| lower.contains("/test/")
|
||||
|| lower.contains("/tests/")
|
||||
|| lower.contains("/fixtures/")
|
||||
|| lower.contains("/testdata/")
|
||||
|| lower.contains("mock")
|
||||
|| lower.ends_with("_test.go")
|
||||
|| lower.ends_with(".test.ts")
|
||||
|| lower.ends_with(".test.js")
|
||||
|| lower.ends_with(".spec.ts")
|
||||
|| lower.ends_with(".spec.js")
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
struct GitleaksResult {
|
||||
description: String,
|
||||
#[serde(rename = "RuleID")]
|
||||
rule_id: String,
|
||||
file: String,
|
||||
start_line: u32,
|
||||
#[serde(rename = "Match")]
|
||||
r#match: String,
|
||||
}
|
||||
364
compliance-agent/src/pipeline/lint.rs
Normal file
364
compliance-agent/src/pipeline/lint.rs
Normal file
@@ -0,0 +1,364 @@
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::traits::{ScanOutput, Scanner};
|
||||
use compliance_core::CoreError;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
/// Timeout for each individual lint command
|
||||
const LINT_TIMEOUT: Duration = Duration::from_secs(120);
|
||||
|
||||
pub struct LintScanner;
|
||||
|
||||
impl Scanner for LintScanner {
|
||||
fn name(&self) -> &str {
|
||||
"lint"
|
||||
}
|
||||
|
||||
fn scan_type(&self) -> ScanType {
|
||||
ScanType::Lint
|
||||
}
|
||||
|
||||
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
||||
let mut all_findings = Vec::new();
|
||||
|
||||
// Detect which languages are present and run appropriate linters
|
||||
if has_rust_project(repo_path) {
|
||||
match run_clippy(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("Clippy failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
if has_js_project(repo_path) {
|
||||
match run_eslint(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("ESLint failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
if has_python_project(repo_path) {
|
||||
match run_ruff(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("Ruff failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ScanOutput {
|
||||
findings: all_findings,
|
||||
sbom_entries: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn has_rust_project(repo_path: &Path) -> bool {
|
||||
repo_path.join("Cargo.toml").exists()
|
||||
}
|
||||
|
||||
fn has_js_project(repo_path: &Path) -> bool {
|
||||
// Only run if eslint is actually installed in the project
|
||||
repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
|
||||
}
|
||||
|
||||
fn has_python_project(repo_path: &Path) -> bool {
|
||||
repo_path.join("pyproject.toml").exists()
|
||||
|| repo_path.join("setup.py").exists()
|
||||
|| repo_path.join("requirements.txt").exists()
|
||||
}
|
||||
|
||||
/// Run a command with a timeout, returning its output or an error
|
||||
async fn run_with_timeout(
|
||||
child: tokio::process::Child,
|
||||
scanner_name: &str,
|
||||
) -> Result<std::process::Output, CoreError> {
|
||||
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
|
||||
match result {
|
||||
Ok(Ok(output)) => Ok(output),
|
||||
Ok(Err(e)) => Err(CoreError::Scanner {
|
||||
scanner: scanner_name.to_string(),
|
||||
source: Box::new(e),
|
||||
}),
|
||||
Err(_) => {
|
||||
// Process is dropped here which sends SIGKILL on Unix
|
||||
Err(CoreError::Scanner {
|
||||
scanner: scanner_name.to_string(),
|
||||
source: Box::new(std::io::Error::new(
|
||||
std::io::ErrorKind::TimedOut,
|
||||
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
|
||||
)),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Clippy ──────────────────────────────────────────────
|
||||
|
||||
async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
let child = Command::new("cargo")
|
||||
.args([
|
||||
"clippy",
|
||||
"--message-format=json",
|
||||
"--quiet",
|
||||
"--",
|
||||
"-W",
|
||||
"clippy::all",
|
||||
])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "clippy".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "clippy").await?;
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let msg: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let message = match msg.get("message") {
|
||||
Some(m) => m,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
if level != "warning" && level != "error" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = message
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let code = message
|
||||
.get("code")
|
||||
.and_then(|v| v.get("code"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
if text.starts_with("aborting due to") || code.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (file_path, line_number) = extract_primary_span(message);
|
||||
|
||||
let severity = if level == "error" {
|
||||
Severity::High
|
||||
} else {
|
||||
Severity::Low
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"clippy",
|
||||
&code,
|
||||
&file_path,
|
||||
&line_number.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"clippy".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[clippy] {text}"),
|
||||
text,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(code);
|
||||
if !file_path.is_empty() {
|
||||
finding.file_path = Some(file_path);
|
||||
}
|
||||
if line_number > 0 {
|
||||
finding.line_number = Some(line_number);
|
||||
}
|
||||
findings.push(finding);
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
|
||||
let spans = match message.get("spans").and_then(|v| v.as_array()) {
|
||||
Some(s) => s,
|
||||
None => return (String::new(), 0),
|
||||
};
|
||||
|
||||
for span in spans {
|
||||
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
|
||||
let file = span
|
||||
.get("file_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
||||
return (file, line);
|
||||
}
|
||||
}
|
||||
|
||||
(String::new(), 0)
|
||||
}
|
||||
|
||||
// ── ESLint ──────────────────────────────────────────────
|
||||
|
||||
async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
// Use the project-local eslint binary directly, not npx (which can hang downloading)
|
||||
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
|
||||
let child = Command::new(eslint_bin)
|
||||
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "eslint".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "eslint").await?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let mut findings = Vec::new();
|
||||
for file_result in results {
|
||||
for msg in file_result.messages {
|
||||
let severity = match msg.severity {
|
||||
2 => Severity::Medium,
|
||||
_ => Severity::Low,
|
||||
};
|
||||
|
||||
let rule_id = msg.rule_id.unwrap_or_default();
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"eslint",
|
||||
&rule_id,
|
||||
&file_result.file_path,
|
||||
&msg.line.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"eslint".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[eslint] {}", msg.message),
|
||||
msg.message,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(rule_id);
|
||||
finding.file_path = Some(file_result.file_path.clone());
|
||||
finding.line_number = Some(msg.line);
|
||||
findings.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EslintFileResult {
|
||||
#[serde(rename = "filePath")]
|
||||
file_path: String,
|
||||
messages: Vec<EslintMessage>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EslintMessage {
|
||||
#[serde(rename = "ruleId")]
|
||||
rule_id: Option<String>,
|
||||
severity: u8,
|
||||
message: String,
|
||||
line: u32,
|
||||
}
|
||||
|
||||
// ── Ruff ────────────────────────────────────────────────
|
||||
|
||||
async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
let child = Command::new("ruff")
|
||||
.args(["check", ".", "--output-format", "json", "--exit-zero"])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "ruff".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "ruff").await?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let findings = results
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
|
||||
Severity::Medium
|
||||
} else {
|
||||
Severity::Low
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"ruff",
|
||||
&r.code,
|
||||
&r.filename,
|
||||
&r.location.row.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"ruff".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[ruff] {}: {}", r.code, r.message),
|
||||
r.message,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(r.code);
|
||||
finding.file_path = Some(r.filename);
|
||||
finding.line_number = Some(r.location.row);
|
||||
finding
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct RuffResult {
|
||||
code: String,
|
||||
message: String,
|
||||
filename: String,
|
||||
location: RuffLocation,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct RuffLocation {
|
||||
row: u32,
|
||||
}
|
||||
@@ -1,6 +1,9 @@
|
||||
pub mod code_review;
|
||||
pub mod cve;
|
||||
pub mod dedup;
|
||||
pub mod git;
|
||||
pub mod gitleaks;
|
||||
pub mod lint;
|
||||
pub mod orchestrator;
|
||||
pub mod patterns;
|
||||
pub mod sbom;
|
||||
|
||||
@@ -9,8 +9,11 @@ use compliance_core::AgentConfig;
|
||||
use crate::database::Database;
|
||||
use crate::error::AgentError;
|
||||
use crate::llm::LlmClient;
|
||||
use crate::pipeline::code_review::CodeReviewScanner;
|
||||
use crate::pipeline::cve::CveScanner;
|
||||
use crate::pipeline::git::GitOps;
|
||||
use crate::pipeline::git::{GitOps, RepoCredentials};
|
||||
use crate::pipeline::gitleaks::GitleaksScanner;
|
||||
use crate::pipeline::lint::LintScanner;
|
||||
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
|
||||
use crate::pipeline::sbom::SbomScanner;
|
||||
use crate::pipeline::semgrep::SemgrepScanner;
|
||||
@@ -114,7 +117,12 @@ impl PipelineOrchestrator {
|
||||
|
||||
// Stage 0: Change detection
|
||||
tracing::info!("[{repo_id}] Stage 0: Change detection");
|
||||
let git_ops = GitOps::new(&self.config.git_clone_base_path);
|
||||
let creds = RepoCredentials {
|
||||
ssh_key_path: Some(self.config.ssh_key_path.clone()),
|
||||
auth_token: repo.auth_token.clone(),
|
||||
auth_username: repo.auth_username.clone(),
|
||||
};
|
||||
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
|
||||
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
|
||||
|
||||
if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
|
||||
@@ -182,6 +190,35 @@ impl PipelineOrchestrator {
|
||||
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
|
||||
}
|
||||
|
||||
// Stage 4a: Secret Detection (Gitleaks)
|
||||
tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
|
||||
self.update_phase(scan_run_id, "secret_detection").await;
|
||||
let gitleaks = GitleaksScanner;
|
||||
match gitleaks.scan(&repo_path, &repo_id).await {
|
||||
Ok(output) => all_findings.extend(output.findings),
|
||||
Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
|
||||
}
|
||||
|
||||
// Stage 4b: Lint Scanning
|
||||
tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
|
||||
self.update_phase(scan_run_id, "lint_scanning").await;
|
||||
let lint = LintScanner;
|
||||
match lint.scan(&repo_path, &repo_id).await {
|
||||
Ok(output) => all_findings.extend(output.findings),
|
||||
Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
|
||||
}
|
||||
|
||||
// Stage 4c: LLM Code Review (only on incremental scans)
|
||||
if let Some(old_sha) = &repo.last_scanned_commit {
|
||||
tracing::info!("[{repo_id}] Stage 4c: LLM Code Review");
|
||||
self.update_phase(scan_run_id, "code_review").await;
|
||||
let reviewer = CodeReviewScanner::new(self.llm.clone());
|
||||
let review_output = reviewer
|
||||
.review_diff(&repo_path, &repo_id, old_sha, ¤t_sha)
|
||||
.await;
|
||||
all_findings.extend(review_output.findings);
|
||||
}
|
||||
|
||||
// Stage 4.5: Graph Building
|
||||
tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
|
||||
self.update_phase(scan_run_id, "graph_building").await;
|
||||
|
||||
Reference in New Issue
Block a user