refactor: modularize codebase and add 404 unit tests (#13)
All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s
All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s
This commit was merged in pull request #13.
This commit is contained in:
@@ -8,3 +8,51 @@ pub fn compute_fingerprint(parts: &[&str]) -> String {
|
||||
}
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn fingerprint_is_deterministic() {
|
||||
let a = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
|
||||
let b = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_changes_with_different_input() {
|
||||
let a = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
|
||||
let b = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "43"]);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_is_valid_hex_sha256() {
|
||||
let fp = compute_fingerprint(&["hello"]);
|
||||
assert_eq!(fp.len(), 64, "SHA-256 hex should be 64 chars");
|
||||
assert!(fp.chars().all(|c| c.is_ascii_hexdigit()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_empty_parts() {
|
||||
let fp = compute_fingerprint(&[]);
|
||||
// Should still produce a valid hash (of empty input)
|
||||
assert_eq!(fp.len(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_order_matters() {
|
||||
let a = compute_fingerprint(&["a", "b"]);
|
||||
let b = compute_fingerprint(&["b", "a"]);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_separator_prevents_collision() {
|
||||
// "ab" + "c" vs "a" + "bc" should differ because of the "|" separator
|
||||
let a = compute_fingerprint(&["ab", "c"]);
|
||||
let b = compute_fingerprint(&["a", "bc"]);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,3 +129,110 @@ struct GitleaksResult {
|
||||
#[serde(rename = "Match")]
|
||||
r#match: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// --- is_allowlisted tests ---
|
||||
|
||||
#[test]
|
||||
fn allowlisted_env_example_files() {
|
||||
assert!(is_allowlisted(".env.example"));
|
||||
assert!(is_allowlisted("config/.env.sample"));
|
||||
assert!(is_allowlisted("deploy/.ENV.TEMPLATE"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowlisted_test_directories() {
|
||||
assert!(is_allowlisted("src/test/config.json"));
|
||||
assert!(is_allowlisted("src/tests/fixtures.rs"));
|
||||
assert!(is_allowlisted("data/fixtures/secret.txt"));
|
||||
assert!(is_allowlisted("pkg/testdata/key.pem"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowlisted_mock_files() {
|
||||
assert!(is_allowlisted("src/mock_service.py"));
|
||||
assert!(is_allowlisted("lib/MockAuth.java"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowlisted_test_suffixes() {
|
||||
assert!(is_allowlisted("auth_test.go"));
|
||||
assert!(is_allowlisted("auth.test.ts"));
|
||||
assert!(is_allowlisted("auth.test.js"));
|
||||
assert!(is_allowlisted("auth.spec.ts"));
|
||||
assert!(is_allowlisted("auth.spec.js"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_allowlisted_regular_files() {
|
||||
assert!(!is_allowlisted("src/main.rs"));
|
||||
assert!(!is_allowlisted("config/.env"));
|
||||
assert!(!is_allowlisted("lib/auth.ts"));
|
||||
assert!(!is_allowlisted("deploy/secrets.yaml"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_allowlisted_partial_matches() {
|
||||
// "test" as substring in a non-directory context should not match
|
||||
assert!(!is_allowlisted("src/attestation.rs"));
|
||||
assert!(!is_allowlisted("src/contest/data.json"));
|
||||
}
|
||||
|
||||
// --- GitleaksResult deserialization tests ---
|
||||
|
||||
#[test]
|
||||
fn deserialize_gitleaks_result() {
|
||||
let json = r#"{
|
||||
"Description": "AWS Access Key",
|
||||
"RuleID": "aws-access-key",
|
||||
"File": "src/config.rs",
|
||||
"StartLine": 10,
|
||||
"Match": "AKIAIOSFODNN7EXAMPLE"
|
||||
}"#;
|
||||
let result: GitleaksResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.description, "AWS Access Key");
|
||||
assert_eq!(result.rule_id, "aws-access-key");
|
||||
assert_eq!(result.file, "src/config.rs");
|
||||
assert_eq!(result.start_line, 10);
|
||||
assert_eq!(result.r#match, "AKIAIOSFODNN7EXAMPLE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_gitleaks_result_array() {
|
||||
let json = r#"[
|
||||
{
|
||||
"Description": "Generic Secret",
|
||||
"RuleID": "generic-secret",
|
||||
"File": "app.py",
|
||||
"StartLine": 5,
|
||||
"Match": "password=hunter2"
|
||||
}
|
||||
]"#;
|
||||
let results: Vec<GitleaksResult> = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].rule_id, "generic-secret");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_mapping_private_key() {
|
||||
// Verify the severity logic from the scan method
|
||||
let rule_id = "some-private-key-rule";
|
||||
assert!(rule_id.contains("private-key"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_mapping_token_password_secret() {
|
||||
for keyword in &["token", "password", "secret"] {
|
||||
let rule_id = format!("some-{}-rule", keyword);
|
||||
assert!(
|
||||
rule_id.contains("token")
|
||||
|| rule_id.contains("password")
|
||||
|| rule_id.contains("secret"),
|
||||
"Expected '{rule_id}' to match token/password/secret"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
106
compliance-agent/src/pipeline/graph_build.rs
Normal file
106
compliance-agent/src/pipeline/graph_build.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
use compliance_core::models::Finding;
|
||||
|
||||
use super::orchestrator::{GraphContext, PipelineOrchestrator};
|
||||
use crate::error::AgentError;
|
||||
|
||||
impl PipelineOrchestrator {
|
||||
/// Build the code knowledge graph for a repo and compute impact analyses
|
||||
pub(super) async fn build_code_graph(
|
||||
&self,
|
||||
repo_path: &std::path::Path,
|
||||
repo_id: &str,
|
||||
findings: &[Finding],
|
||||
) -> Result<GraphContext, AgentError> {
|
||||
let graph_build_id = uuid::Uuid::new_v4().to_string();
|
||||
let engine = compliance_graph::GraphEngine::new(50_000);
|
||||
|
||||
let (mut code_graph, build_run) =
|
||||
engine
|
||||
.build_graph(repo_path, repo_id, &graph_build_id)
|
||||
.map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
|
||||
|
||||
// Apply community detection
|
||||
compliance_graph::graph::community::apply_communities(&mut code_graph);
|
||||
|
||||
// Store graph in MongoDB
|
||||
let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
|
||||
store
|
||||
.delete_repo_graph(repo_id)
|
||||
.await
|
||||
.map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
|
||||
store
|
||||
.store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
|
||||
.await
|
||||
.map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
|
||||
|
||||
// Compute impact analysis for each finding
|
||||
let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
|
||||
let mut impacts = Vec::new();
|
||||
|
||||
for finding in findings {
|
||||
if let Some(file_path) = &finding.file_path {
|
||||
let impact = analyzer.analyze(
|
||||
repo_id,
|
||||
&finding.fingerprint,
|
||||
&graph_build_id,
|
||||
file_path,
|
||||
finding.line_number,
|
||||
);
|
||||
store
|
||||
.store_impact(&impact)
|
||||
.await
|
||||
.map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
|
||||
impacts.push(impact);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(GraphContext {
|
||||
node_count: build_run.node_count,
|
||||
edge_count: build_run.edge_count,
|
||||
community_count: build_run.community_count,
|
||||
impacts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Trigger DAST scan if a target is configured for this repo
|
||||
pub(super) async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
|
||||
use futures_util::TryStreamExt;
|
||||
|
||||
let filter = mongodb::bson::doc! { "repo_id": repo_id };
|
||||
let targets: Vec<compliance_core::models::DastTarget> =
|
||||
match self.db.dast_targets().find(filter).await {
|
||||
Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
if targets.is_empty() {
|
||||
tracing::info!("[{repo_id}] No DAST targets configured, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
for target in targets {
|
||||
let db = self.db.clone();
|
||||
let scan_run_id = scan_run_id.to_string();
|
||||
tokio::spawn(async move {
|
||||
let orchestrator = compliance_dast::DastOrchestrator::new(100);
|
||||
match orchestrator.run_scan(&target, Vec::new()).await {
|
||||
Ok((mut scan_run, findings)) => {
|
||||
scan_run.sast_scan_run_id = Some(scan_run_id);
|
||||
if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
|
||||
tracing::error!("Failed to store DAST scan run: {e}");
|
||||
}
|
||||
for finding in &findings {
|
||||
if let Err(e) = db.dast_findings().insert_one(finding).await {
|
||||
tracing::error!("Failed to store DAST finding: {e}");
|
||||
}
|
||||
}
|
||||
tracing::info!("DAST scan complete: {} findings", findings.len());
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("DAST scan failed: {e}");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
259
compliance-agent/src/pipeline/issue_creation.rs
Normal file
259
compliance-agent/src/pipeline/issue_creation.rs
Normal file
@@ -0,0 +1,259 @@
|
||||
use mongodb::bson::doc;
|
||||
|
||||
use compliance_core::models::*;
|
||||
|
||||
use super::orchestrator::{extract_base_url, PipelineOrchestrator};
|
||||
use super::tracker_dispatch::TrackerDispatch;
|
||||
use crate::error::AgentError;
|
||||
use crate::trackers;
|
||||
|
||||
impl PipelineOrchestrator {
|
||||
/// Build an issue tracker client from a repository's tracker configuration.
|
||||
/// Returns `None` if the repo has no tracker configured.
|
||||
pub(super) fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
|
||||
let tracker_type = repo.tracker_type.as_ref()?;
|
||||
// Per-repo token takes precedence, fall back to global config
|
||||
match tracker_type {
|
||||
TrackerType::GitHub => {
|
||||
let token = repo.tracker_token.clone().or_else(|| {
|
||||
self.config.github_token.as_ref().map(|t| {
|
||||
use secrecy::ExposeSecret;
|
||||
t.expose_secret().to_string()
|
||||
})
|
||||
})?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
match trackers::github::GitHubTracker::new(&secret) {
|
||||
Ok(t) => Some(TrackerDispatch::GitHub(t)),
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to build GitHub tracker: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
TrackerType::GitLab => {
|
||||
let base_url = self
|
||||
.config
|
||||
.gitlab_url
|
||||
.clone()
|
||||
.unwrap_or_else(|| "https://gitlab.com".to_string());
|
||||
let token = repo.tracker_token.clone().or_else(|| {
|
||||
self.config.gitlab_token.as_ref().map(|t| {
|
||||
use secrecy::ExposeSecret;
|
||||
t.expose_secret().to_string()
|
||||
})
|
||||
})?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
Some(TrackerDispatch::GitLab(
|
||||
trackers::gitlab::GitLabTracker::new(base_url, secret),
|
||||
))
|
||||
}
|
||||
TrackerType::Gitea => {
|
||||
let token = repo.tracker_token.clone()?;
|
||||
let base_url = extract_base_url(&repo.git_url)?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
|
||||
base_url, secret,
|
||||
)))
|
||||
}
|
||||
TrackerType::Jira => {
|
||||
let base_url = self.config.jira_url.clone()?;
|
||||
let email = self.config.jira_email.clone()?;
|
||||
let project_key = self.config.jira_project_key.clone()?;
|
||||
let token = repo.tracker_token.clone().or_else(|| {
|
||||
self.config.jira_api_token.as_ref().map(|t| {
|
||||
use secrecy::ExposeSecret;
|
||||
t.expose_secret().to_string()
|
||||
})
|
||||
})?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
|
||||
base_url,
|
||||
email,
|
||||
secret,
|
||||
project_key,
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create tracker issues for new findings (severity >= Medium).
|
||||
/// Checks for duplicates via fingerprint search before creating.
|
||||
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
|
||||
pub(super) async fn create_tracker_issues(
|
||||
&self,
|
||||
repo: &TrackedRepository,
|
||||
repo_id: &str,
|
||||
new_findings: &[Finding],
|
||||
) -> Result<(), AgentError> {
|
||||
let tracker = match self.build_tracker(repo) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
tracing::info!("[{repo_id}] No issue tracker configured, skipping");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let owner = match repo.tracker_owner.as_deref() {
|
||||
Some(o) => o,
|
||||
None => {
|
||||
tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let tracker_repo_name = match repo.tracker_repo.as_deref() {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
// Only create issues for medium+ severity findings
|
||||
let actionable: Vec<&Finding> = new_findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
matches!(
|
||||
f.severity,
|
||||
Severity::Medium | Severity::High | Severity::Critical
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if actionable.is_empty() {
|
||||
tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[{repo_id}] Creating issues for {} findings via {}",
|
||||
actionable.len(),
|
||||
tracker.name()
|
||||
);
|
||||
|
||||
let mut created = 0u32;
|
||||
for finding in actionable {
|
||||
let title = format!(
|
||||
"[{}] {}: {}",
|
||||
finding.severity, finding.scanner, finding.title
|
||||
);
|
||||
|
||||
// Check if an issue already exists by fingerprint first, then by title
|
||||
let mut found_existing = false;
|
||||
for search_term in [&finding.fingerprint, &title] {
|
||||
match tracker
|
||||
.find_existing_issue(owner, tracker_repo_name, search_term)
|
||||
.await
|
||||
{
|
||||
Ok(Some(existing)) => {
|
||||
tracing::debug!(
|
||||
"[{repo_id}] Issue already exists for '{}': {}",
|
||||
search_term,
|
||||
existing.external_url
|
||||
);
|
||||
found_existing = true;
|
||||
break;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(e) => {
|
||||
tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
if found_existing {
|
||||
continue;
|
||||
}
|
||||
let body = format_issue_body(finding);
|
||||
let labels = vec![
|
||||
format!("severity:{}", finding.severity),
|
||||
format!("scanner:{}", finding.scanner),
|
||||
"compliance-scanner".to_string(),
|
||||
];
|
||||
|
||||
match tracker
|
||||
.create_issue(owner, tracker_repo_name, &title, &body, &labels)
|
||||
.await
|
||||
{
|
||||
Ok(mut issue) => {
|
||||
issue.finding_id = finding
|
||||
.id
|
||||
.as_ref()
|
||||
.map(|id| id.to_hex())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Update the finding with the issue URL
|
||||
if let Some(finding_id) = &finding.id {
|
||||
let _ = self
|
||||
.db
|
||||
.findings()
|
||||
.update_one(
|
||||
doc! { "_id": finding_id },
|
||||
doc! { "$set": { "tracker_issue_url": &issue.external_url } },
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Store the tracker issue record
|
||||
if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
|
||||
tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
|
||||
}
|
||||
|
||||
created += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[{repo_id}] Failed to create issue for {}: {e}",
|
||||
finding.fingerprint
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("[{repo_id}] Created {created} tracker issues");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a finding into a markdown issue body for the tracker.
|
||||
pub(super) fn format_issue_body(finding: &Finding) -> String {
|
||||
let mut body = String::new();
|
||||
|
||||
body.push_str(&format!("## {} Finding\n\n", finding.severity));
|
||||
body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
|
||||
body.push_str(&format!("**Severity:** {}\n", finding.severity));
|
||||
|
||||
if let Some(rule) = &finding.rule_id {
|
||||
body.push_str(&format!("**Rule:** {}\n", rule));
|
||||
}
|
||||
if let Some(cwe) = &finding.cwe {
|
||||
body.push_str(&format!("**CWE:** {}\n", cwe));
|
||||
}
|
||||
|
||||
body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
|
||||
|
||||
if let Some(file_path) = &finding.file_path {
|
||||
body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
|
||||
if let Some(line) = finding.line_number {
|
||||
body.push_str(&format!(" (line {})", line));
|
||||
}
|
||||
body.push('\n');
|
||||
}
|
||||
|
||||
if let Some(snippet) = &finding.code_snippet {
|
||||
body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
|
||||
}
|
||||
|
||||
if let Some(remediation) = &finding.remediation {
|
||||
body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
|
||||
}
|
||||
|
||||
if let Some(fix) = &finding.suggested_fix {
|
||||
body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
|
||||
}
|
||||
|
||||
body.push_str(&format!(
|
||||
"\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
|
||||
finding.fingerprint
|
||||
));
|
||||
|
||||
body
|
||||
}
|
||||
@@ -1,366 +0,0 @@
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::traits::{ScanOutput, Scanner};
|
||||
use compliance_core::CoreError;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
/// Timeout for each individual lint command
|
||||
const LINT_TIMEOUT: Duration = Duration::from_secs(120);
|
||||
|
||||
pub struct LintScanner;
|
||||
|
||||
impl Scanner for LintScanner {
|
||||
fn name(&self) -> &str {
|
||||
"lint"
|
||||
}
|
||||
|
||||
fn scan_type(&self) -> ScanType {
|
||||
ScanType::Lint
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
||||
let mut all_findings = Vec::new();
|
||||
|
||||
// Detect which languages are present and run appropriate linters
|
||||
if has_rust_project(repo_path) {
|
||||
match run_clippy(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("Clippy failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
if has_js_project(repo_path) {
|
||||
match run_eslint(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("ESLint failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
if has_python_project(repo_path) {
|
||||
match run_ruff(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("Ruff failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ScanOutput {
|
||||
findings: all_findings,
|
||||
sbom_entries: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn has_rust_project(repo_path: &Path) -> bool {
|
||||
repo_path.join("Cargo.toml").exists()
|
||||
}
|
||||
|
||||
fn has_js_project(repo_path: &Path) -> bool {
|
||||
// Only run if eslint is actually installed in the project
|
||||
repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
|
||||
}
|
||||
|
||||
fn has_python_project(repo_path: &Path) -> bool {
|
||||
repo_path.join("pyproject.toml").exists()
|
||||
|| repo_path.join("setup.py").exists()
|
||||
|| repo_path.join("requirements.txt").exists()
|
||||
}
|
||||
|
||||
/// Run a command with a timeout, returning its output or an error
|
||||
async fn run_with_timeout(
|
||||
child: tokio::process::Child,
|
||||
scanner_name: &str,
|
||||
) -> Result<std::process::Output, CoreError> {
|
||||
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
|
||||
match result {
|
||||
Ok(Ok(output)) => Ok(output),
|
||||
Ok(Err(e)) => Err(CoreError::Scanner {
|
||||
scanner: scanner_name.to_string(),
|
||||
source: Box::new(e),
|
||||
}),
|
||||
Err(_) => {
|
||||
// Process is dropped here which sends SIGKILL on Unix
|
||||
Err(CoreError::Scanner {
|
||||
scanner: scanner_name.to_string(),
|
||||
source: Box::new(std::io::Error::new(
|
||||
std::io::ErrorKind::TimedOut,
|
||||
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
|
||||
)),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Clippy ──────────────────────────────────────────────
|
||||
|
||||
async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
let child = Command::new("cargo")
|
||||
.args([
|
||||
"clippy",
|
||||
"--message-format=json",
|
||||
"--quiet",
|
||||
"--",
|
||||
"-W",
|
||||
"clippy::all",
|
||||
])
|
||||
.current_dir(repo_path)
|
||||
.env("RUSTC_WRAPPER", "")
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "clippy".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "clippy").await?;
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let msg: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let message = match msg.get("message") {
|
||||
Some(m) => m,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
if level != "warning" && level != "error" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = message
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let code = message
|
||||
.get("code")
|
||||
.and_then(|v| v.get("code"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
if text.starts_with("aborting due to") || code.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (file_path, line_number) = extract_primary_span(message);
|
||||
|
||||
let severity = if level == "error" {
|
||||
Severity::High
|
||||
} else {
|
||||
Severity::Low
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"clippy",
|
||||
&code,
|
||||
&file_path,
|
||||
&line_number.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"clippy".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[clippy] {text}"),
|
||||
text,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(code);
|
||||
if !file_path.is_empty() {
|
||||
finding.file_path = Some(file_path);
|
||||
}
|
||||
if line_number > 0 {
|
||||
finding.line_number = Some(line_number);
|
||||
}
|
||||
findings.push(finding);
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
|
||||
let spans = match message.get("spans").and_then(|v| v.as_array()) {
|
||||
Some(s) => s,
|
||||
None => return (String::new(), 0),
|
||||
};
|
||||
|
||||
for span in spans {
|
||||
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
|
||||
let file = span
|
||||
.get("file_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
||||
return (file, line);
|
||||
}
|
||||
}
|
||||
|
||||
(String::new(), 0)
|
||||
}
|
||||
|
||||
// ── ESLint ──────────────────────────────────────────────
|
||||
|
||||
async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
// Use the project-local eslint binary directly, not npx (which can hang downloading)
|
||||
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
|
||||
let child = Command::new(eslint_bin)
|
||||
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "eslint".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "eslint").await?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let mut findings = Vec::new();
|
||||
for file_result in results {
|
||||
for msg in file_result.messages {
|
||||
let severity = match msg.severity {
|
||||
2 => Severity::Medium,
|
||||
_ => Severity::Low,
|
||||
};
|
||||
|
||||
let rule_id = msg.rule_id.unwrap_or_default();
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"eslint",
|
||||
&rule_id,
|
||||
&file_result.file_path,
|
||||
&msg.line.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"eslint".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[eslint] {}", msg.message),
|
||||
msg.message,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(rule_id);
|
||||
finding.file_path = Some(file_result.file_path.clone());
|
||||
finding.line_number = Some(msg.line);
|
||||
findings.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EslintFileResult {
|
||||
#[serde(rename = "filePath")]
|
||||
file_path: String,
|
||||
messages: Vec<EslintMessage>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EslintMessage {
|
||||
#[serde(rename = "ruleId")]
|
||||
rule_id: Option<String>,
|
||||
severity: u8,
|
||||
message: String,
|
||||
line: u32,
|
||||
}
|
||||
|
||||
// ── Ruff ────────────────────────────────────────────────
|
||||
|
||||
async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
let child = Command::new("ruff")
|
||||
.args(["check", ".", "--output-format", "json", "--exit-zero"])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "ruff".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "ruff").await?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let findings = results
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
|
||||
Severity::Medium
|
||||
} else {
|
||||
Severity::Low
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"ruff",
|
||||
&r.code,
|
||||
&r.filename,
|
||||
&r.location.row.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"ruff".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[ruff] {}: {}", r.code, r.message),
|
||||
r.message,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(r.code);
|
||||
finding.file_path = Some(r.filename);
|
||||
finding.line_number = Some(r.location.row);
|
||||
finding
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct RuffResult {
|
||||
code: String,
|
||||
message: String,
|
||||
filename: String,
|
||||
location: RuffLocation,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct RuffLocation {
|
||||
row: u32,
|
||||
}
|
||||
251
compliance-agent/src/pipeline/lint/clippy.rs
Normal file
251
compliance-agent/src/pipeline/lint/clippy.rs
Normal file
@@ -0,0 +1,251 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::CoreError;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
use super::run_with_timeout;
|
||||
|
||||
pub(super) async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
let child = Command::new("cargo")
|
||||
.args([
|
||||
"clippy",
|
||||
"--message-format=json",
|
||||
"--quiet",
|
||||
"--",
|
||||
"-W",
|
||||
"clippy::all",
|
||||
])
|
||||
.current_dir(repo_path)
|
||||
.env("RUSTC_WRAPPER", "")
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "clippy".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "clippy").await?;
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let msg: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let message = match msg.get("message") {
|
||||
Some(m) => m,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
if level != "warning" && level != "error" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = message
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let code = message
|
||||
.get("code")
|
||||
.and_then(|v| v.get("code"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
if text.starts_with("aborting due to") || code.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (file_path, line_number) = extract_primary_span(message);
|
||||
|
||||
let severity = if level == "error" {
|
||||
Severity::High
|
||||
} else {
|
||||
Severity::Low
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"clippy",
|
||||
&code,
|
||||
&file_path,
|
||||
&line_number.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"clippy".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[clippy] {text}"),
|
||||
text,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(code);
|
||||
if !file_path.is_empty() {
|
||||
finding.file_path = Some(file_path);
|
||||
}
|
||||
if line_number > 0 {
|
||||
finding.line_number = Some(line_number);
|
||||
}
|
||||
findings.push(finding);
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
|
||||
let spans = match message.get("spans").and_then(|v| v.as_array()) {
|
||||
Some(s) => s,
|
||||
None => return (String::new(), 0),
|
||||
};
|
||||
|
||||
for span in spans {
|
||||
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
|
||||
let file = span
|
||||
.get("file_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
|
||||
return (file, line);
|
||||
}
|
||||
}
|
||||
|
||||
(String::new(), 0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn extract_primary_span_with_primary() {
|
||||
let msg = serde_json::json!({
|
||||
"spans": [
|
||||
{
|
||||
"file_name": "src/lib.rs",
|
||||
"line_start": 42,
|
||||
"is_primary": true
|
||||
}
|
||||
]
|
||||
});
|
||||
let (file, line) = extract_primary_span(&msg);
|
||||
assert_eq!(file, "src/lib.rs");
|
||||
assert_eq!(line, 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_primary_span_no_primary() {
|
||||
let msg = serde_json::json!({
|
||||
"spans": [
|
||||
{
|
||||
"file_name": "src/lib.rs",
|
||||
"line_start": 42,
|
||||
"is_primary": false
|
||||
}
|
||||
]
|
||||
});
|
||||
let (file, line) = extract_primary_span(&msg);
|
||||
assert_eq!(file, "");
|
||||
assert_eq!(line, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_primary_span_multiple_spans() {
|
||||
let msg = serde_json::json!({
|
||||
"spans": [
|
||||
{
|
||||
"file_name": "src/other.rs",
|
||||
"line_start": 10,
|
||||
"is_primary": false
|
||||
},
|
||||
{
|
||||
"file_name": "src/main.rs",
|
||||
"line_start": 99,
|
||||
"is_primary": true
|
||||
}
|
||||
]
|
||||
});
|
||||
let (file, line) = extract_primary_span(&msg);
|
||||
assert_eq!(file, "src/main.rs");
|
||||
assert_eq!(line, 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_primary_span_no_spans() {
|
||||
let msg = serde_json::json!({});
|
||||
let (file, line) = extract_primary_span(&msg);
|
||||
assert_eq!(file, "");
|
||||
assert_eq!(line, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_primary_span_empty_spans() {
|
||||
let msg = serde_json::json!({ "spans": [] });
|
||||
let (file, line) = extract_primary_span(&msg);
|
||||
assert_eq!(file, "");
|
||||
assert_eq!(line, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_clippy_compiler_message_line() {
|
||||
let line = r#"{"reason":"compiler-message","message":{"level":"warning","message":"unused variable","code":{"code":"unused_variables"},"spans":[{"file_name":"src/main.rs","line_start":5,"is_primary":true}]}}"#;
|
||||
let msg: serde_json::Value = serde_json::from_str(line).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
msg.get("reason").and_then(|v| v.as_str()),
|
||||
Some("compiler-message")
|
||||
);
|
||||
let message = msg.get("message").unwrap();
|
||||
assert_eq!(
|
||||
message.get("level").and_then(|v| v.as_str()),
|
||||
Some("warning")
|
||||
);
|
||||
assert_eq!(
|
||||
message.get("message").and_then(|v| v.as_str()),
|
||||
Some("unused variable")
|
||||
);
|
||||
assert_eq!(
|
||||
message
|
||||
.get("code")
|
||||
.and_then(|v| v.get("code"))
|
||||
.and_then(|v| v.as_str()),
|
||||
Some("unused_variables")
|
||||
);
|
||||
|
||||
let (file, line_num) = extract_primary_span(message);
|
||||
assert_eq!(file, "src/main.rs");
|
||||
assert_eq!(line_num, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_non_compiler_message() {
|
||||
let line = r#"{"reason":"build-script-executed","package_id":"foo 0.1.0"}"#;
|
||||
let msg: serde_json::Value = serde_json::from_str(line).unwrap();
|
||||
assert_ne!(
|
||||
msg.get("reason").and_then(|v| v.as_str()),
|
||||
Some("compiler-message")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_aborting_message() {
|
||||
let text = "aborting due to 3 previous errors";
|
||||
assert!(text.starts_with("aborting due to"));
|
||||
}
|
||||
}
|
||||
183
compliance-agent/src/pipeline/lint/eslint.rs
Normal file
183
compliance-agent/src/pipeline/lint/eslint.rs
Normal file
@@ -0,0 +1,183 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::CoreError;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
use super::run_with_timeout;
|
||||
|
||||
pub(super) async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
// Use the project-local eslint binary directly, not npx (which can hang downloading)
|
||||
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
|
||||
let child = Command::new(eslint_bin)
|
||||
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "eslint".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "eslint").await?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let mut findings = Vec::new();
|
||||
for file_result in results {
|
||||
for msg in file_result.messages {
|
||||
let severity = match msg.severity {
|
||||
2 => Severity::Medium,
|
||||
_ => Severity::Low,
|
||||
};
|
||||
|
||||
let rule_id = msg.rule_id.unwrap_or_default();
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"eslint",
|
||||
&rule_id,
|
||||
&file_result.file_path,
|
||||
&msg.line.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"eslint".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[eslint] {}", msg.message),
|
||||
msg.message,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(rule_id);
|
||||
finding.file_path = Some(file_result.file_path.clone());
|
||||
finding.line_number = Some(msg.line);
|
||||
findings.push(finding);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EslintFileResult {
|
||||
#[serde(rename = "filePath")]
|
||||
file_path: String,
|
||||
messages: Vec<EslintMessage>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EslintMessage {
|
||||
#[serde(rename = "ruleId")]
|
||||
rule_id: Option<String>,
|
||||
severity: u8,
|
||||
message: String,
|
||||
line: u32,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn deserialize_eslint_output() {
|
||||
let json = r#"[
|
||||
{
|
||||
"filePath": "/home/user/project/src/app.js",
|
||||
"messages": [
|
||||
{
|
||||
"ruleId": "no-unused-vars",
|
||||
"severity": 2,
|
||||
"message": "'x' is defined but never used.",
|
||||
"line": 10
|
||||
},
|
||||
{
|
||||
"ruleId": "semi",
|
||||
"severity": 1,
|
||||
"message": "Missing semicolon.",
|
||||
"line": 15
|
||||
}
|
||||
]
|
||||
}
|
||||
]"#;
|
||||
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].file_path, "/home/user/project/src/app.js");
|
||||
assert_eq!(results[0].messages.len(), 2);
|
||||
|
||||
assert_eq!(
|
||||
results[0].messages[0].rule_id,
|
||||
Some("no-unused-vars".to_string())
|
||||
);
|
||||
assert_eq!(results[0].messages[0].severity, 2);
|
||||
assert_eq!(results[0].messages[0].line, 10);
|
||||
|
||||
assert_eq!(results[0].messages[1].severity, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_eslint_null_rule_id() {
|
||||
let json = r#"[
|
||||
{
|
||||
"filePath": "src/index.js",
|
||||
"messages": [
|
||||
{
|
||||
"ruleId": null,
|
||||
"severity": 2,
|
||||
"message": "Parsing error: Unexpected token",
|
||||
"line": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]"#;
|
||||
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(results[0].messages[0].rule_id, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_eslint_empty_messages() {
|
||||
let json = r#"[{"filePath": "src/clean.js", "messages": []}]"#;
|
||||
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(results[0].messages.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_eslint_empty_array() {
|
||||
let json = "[]";
|
||||
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eslint_severity_mapping() {
|
||||
// severity 2 = error -> Medium, anything else -> Low
|
||||
assert_eq!(
|
||||
match 2u8 {
|
||||
2 => "Medium",
|
||||
_ => "Low",
|
||||
},
|
||||
"Medium"
|
||||
);
|
||||
assert_eq!(
|
||||
match 1u8 {
|
||||
2 => "Medium",
|
||||
_ => "Low",
|
||||
},
|
||||
"Low"
|
||||
);
|
||||
assert_eq!(
|
||||
match 0u8 {
|
||||
2 => "Medium",
|
||||
_ => "Low",
|
||||
},
|
||||
"Low"
|
||||
);
|
||||
}
|
||||
}
|
||||
97
compliance-agent/src/pipeline/lint/mod.rs
Normal file
97
compliance-agent/src/pipeline/lint/mod.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
mod clippy;
|
||||
mod eslint;
|
||||
mod ruff;
|
||||
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use compliance_core::models::ScanType;
|
||||
use compliance_core::traits::{ScanOutput, Scanner};
|
||||
use compliance_core::CoreError;
|
||||
|
||||
/// Timeout for each individual lint command
|
||||
pub(crate) const LINT_TIMEOUT: Duration = Duration::from_secs(120);
|
||||
|
||||
pub struct LintScanner;
|
||||
|
||||
impl Scanner for LintScanner {
|
||||
fn name(&self) -> &str {
|
||||
"lint"
|
||||
}
|
||||
|
||||
fn scan_type(&self) -> ScanType {
|
||||
ScanType::Lint
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
||||
let mut all_findings = Vec::new();
|
||||
|
||||
// Detect which languages are present and run appropriate linters
|
||||
if has_rust_project(repo_path) {
|
||||
match clippy::run_clippy(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("Clippy failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
if has_js_project(repo_path) {
|
||||
match eslint::run_eslint(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("ESLint failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
if has_python_project(repo_path) {
|
||||
match ruff::run_ruff(repo_path, repo_id).await {
|
||||
Ok(findings) => all_findings.extend(findings),
|
||||
Err(e) => tracing::warn!("Ruff failed: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ScanOutput {
|
||||
findings: all_findings,
|
||||
sbom_entries: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn has_rust_project(repo_path: &Path) -> bool {
|
||||
repo_path.join("Cargo.toml").exists()
|
||||
}
|
||||
|
||||
fn has_js_project(repo_path: &Path) -> bool {
|
||||
// Only run if eslint is actually installed in the project
|
||||
repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
|
||||
}
|
||||
|
||||
fn has_python_project(repo_path: &Path) -> bool {
|
||||
repo_path.join("pyproject.toml").exists()
|
||||
|| repo_path.join("setup.py").exists()
|
||||
|| repo_path.join("requirements.txt").exists()
|
||||
}
|
||||
|
||||
/// Run a command with a timeout, returning its output or an error
|
||||
pub(crate) async fn run_with_timeout(
|
||||
child: tokio::process::Child,
|
||||
scanner_name: &str,
|
||||
) -> Result<std::process::Output, CoreError> {
|
||||
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
|
||||
match result {
|
||||
Ok(Ok(output)) => Ok(output),
|
||||
Ok(Err(e)) => Err(CoreError::Scanner {
|
||||
scanner: scanner_name.to_string(),
|
||||
source: Box::new(e),
|
||||
}),
|
||||
Err(_) => {
|
||||
// Process is dropped here which sends SIGKILL on Unix
|
||||
Err(CoreError::Scanner {
|
||||
scanner: scanner_name.to_string(),
|
||||
source: Box::new(std::io::Error::new(
|
||||
std::io::ErrorKind::TimedOut,
|
||||
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
|
||||
)),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
150
compliance-agent/src/pipeline/lint/ruff.rs
Normal file
150
compliance-agent/src/pipeline/lint/ruff.rs
Normal file
@@ -0,0 +1,150 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::CoreError;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
use super::run_with_timeout;
|
||||
|
||||
pub(super) async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
|
||||
let child = Command::new("ruff")
|
||||
.args(["check", ".", "--output-format", "json", "--exit-zero"])
|
||||
.current_dir(repo_path)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "ruff".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let output = run_with_timeout(child, "ruff").await?;
|
||||
|
||||
if output.stdout.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
|
||||
|
||||
let findings = results
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
|
||||
Severity::Medium
|
||||
} else {
|
||||
Severity::Low
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"ruff",
|
||||
&r.code,
|
||||
&r.filename,
|
||||
&r.location.row.to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
"ruff".to_string(),
|
||||
ScanType::Lint,
|
||||
format!("[ruff] {}: {}", r.code, r.message),
|
||||
r.message,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(r.code);
|
||||
finding.file_path = Some(r.filename);
|
||||
finding.line_number = Some(r.location.row);
|
||||
finding
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct RuffResult {
|
||||
code: String,
|
||||
message: String,
|
||||
filename: String,
|
||||
location: RuffLocation,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct RuffLocation {
|
||||
row: u32,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn deserialize_ruff_output() {
|
||||
let json = r#"[
|
||||
{
|
||||
"code": "E501",
|
||||
"message": "Line too long (120 > 79 characters)",
|
||||
"filename": "src/main.py",
|
||||
"location": {"row": 42}
|
||||
},
|
||||
{
|
||||
"code": "F401",
|
||||
"message": "`os` imported but unused",
|
||||
"filename": "src/utils.py",
|
||||
"location": {"row": 1}
|
||||
}
|
||||
]"#;
|
||||
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
|
||||
assert_eq!(results[0].code, "E501");
|
||||
assert_eq!(results[0].filename, "src/main.py");
|
||||
assert_eq!(results[0].location.row, 42);
|
||||
|
||||
assert_eq!(results[1].code, "F401");
|
||||
assert_eq!(results[1].location.row, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_ruff_empty() {
|
||||
let json = "[]";
|
||||
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ruff_severity_e_and_f_are_medium() {
|
||||
for code in &["E501", "E302", "F401", "F811"] {
|
||||
let is_medium = code.starts_with('E') || code.starts_with('F');
|
||||
assert!(is_medium, "Expected {code} to be Medium severity");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ruff_severity_others_are_low() {
|
||||
for code in &["W291", "I001", "D100", "C901", "N801"] {
|
||||
let is_medium = code.starts_with('E') || code.starts_with('F');
|
||||
assert!(!is_medium, "Expected {code} to be Low severity");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_ruff_with_extra_fields() {
|
||||
// Ruff output may contain additional fields we don't use
|
||||
let json = r#"[{
|
||||
"code": "W291",
|
||||
"message": "Trailing whitespace",
|
||||
"filename": "app.py",
|
||||
"location": {"row": 3, "column": 10},
|
||||
"end_location": {"row": 3, "column": 11},
|
||||
"fix": null,
|
||||
"noqa_row": 3
|
||||
}]"#;
|
||||
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].code, "W291");
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,12 @@ pub mod cve;
|
||||
pub mod dedup;
|
||||
pub mod git;
|
||||
pub mod gitleaks;
|
||||
mod graph_build;
|
||||
mod issue_creation;
|
||||
pub mod lint;
|
||||
pub mod orchestrator;
|
||||
pub mod patterns;
|
||||
mod pr_review;
|
||||
pub mod sbom;
|
||||
pub mod semgrep;
|
||||
mod tracker_dispatch;
|
||||
|
||||
@@ -4,7 +4,6 @@ use mongodb::bson::doc;
|
||||
use tracing::Instrument;
|
||||
|
||||
use compliance_core::models::*;
|
||||
use compliance_core::traits::issue_tracker::IssueTracker;
|
||||
use compliance_core::traits::Scanner;
|
||||
use compliance_core::AgentConfig;
|
||||
|
||||
@@ -19,84 +18,6 @@ use crate::pipeline::lint::LintScanner;
|
||||
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
|
||||
use crate::pipeline::sbom::SbomScanner;
|
||||
use crate::pipeline::semgrep::SemgrepScanner;
|
||||
use crate::trackers;
|
||||
|
||||
/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
|
||||
enum TrackerDispatch {
|
||||
GitHub(trackers::github::GitHubTracker),
|
||||
GitLab(trackers::gitlab::GitLabTracker),
|
||||
Gitea(trackers::gitea::GiteaTracker),
|
||||
Jira(trackers::jira::JiraTracker),
|
||||
}
|
||||
|
||||
impl TrackerDispatch {
|
||||
fn name(&self) -> &str {
|
||||
match self {
|
||||
Self::GitHub(t) => t.name(),
|
||||
Self::GitLab(t) => t.name(),
|
||||
Self::Gitea(t) => t.name(),
|
||||
Self::Jira(t) => t.name(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_issue(
|
||||
&self,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
title: &str,
|
||||
body: &str,
|
||||
labels: &[String],
|
||||
) -> Result<TrackerIssue, compliance_core::error::CoreError> {
|
||||
match self {
|
||||
Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn find_existing_issue(
|
||||
&self,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
fingerprint: &str,
|
||||
) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
|
||||
match self {
|
||||
Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_pr_review(
|
||||
&self,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
pr_number: u64,
|
||||
body: &str,
|
||||
comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
|
||||
) -> Result<(), compliance_core::error::CoreError> {
|
||||
match self {
|
||||
Self::GitHub(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
Self::GitLab(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
Self::Gitea(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
Self::Jira(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Context from graph analysis passed to LLM triage for enhanced filtering
|
||||
#[derive(Debug)]
|
||||
@@ -109,10 +30,10 @@ pub struct GraphContext {
|
||||
}
|
||||
|
||||
pub struct PipelineOrchestrator {
|
||||
config: AgentConfig,
|
||||
db: Database,
|
||||
llm: Arc<LlmClient>,
|
||||
http: reqwest::Client,
|
||||
pub(super) config: AgentConfig,
|
||||
pub(super) db: Database,
|
||||
pub(super) llm: Arc<LlmClient>,
|
||||
pub(super) http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl PipelineOrchestrator {
|
||||
@@ -460,446 +381,7 @@ impl PipelineOrchestrator {
|
||||
Ok(new_count)
|
||||
}
|
||||
|
||||
/// Build the code knowledge graph for a repo and compute impact analyses
|
||||
async fn build_code_graph(
|
||||
&self,
|
||||
repo_path: &std::path::Path,
|
||||
repo_id: &str,
|
||||
findings: &[Finding],
|
||||
) -> Result<GraphContext, AgentError> {
|
||||
let graph_build_id = uuid::Uuid::new_v4().to_string();
|
||||
let engine = compliance_graph::GraphEngine::new(50_000);
|
||||
|
||||
let (mut code_graph, build_run) =
|
||||
engine
|
||||
.build_graph(repo_path, repo_id, &graph_build_id)
|
||||
.map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
|
||||
|
||||
// Apply community detection
|
||||
compliance_graph::graph::community::apply_communities(&mut code_graph);
|
||||
|
||||
// Store graph in MongoDB
|
||||
let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
|
||||
store
|
||||
.delete_repo_graph(repo_id)
|
||||
.await
|
||||
.map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
|
||||
store
|
||||
.store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
|
||||
.await
|
||||
.map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
|
||||
|
||||
// Compute impact analysis for each finding
|
||||
let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
|
||||
let mut impacts = Vec::new();
|
||||
|
||||
for finding in findings {
|
||||
if let Some(file_path) = &finding.file_path {
|
||||
let impact = analyzer.analyze(
|
||||
repo_id,
|
||||
&finding.fingerprint,
|
||||
&graph_build_id,
|
||||
file_path,
|
||||
finding.line_number,
|
||||
);
|
||||
store
|
||||
.store_impact(&impact)
|
||||
.await
|
||||
.map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
|
||||
impacts.push(impact);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(GraphContext {
|
||||
node_count: build_run.node_count,
|
||||
edge_count: build_run.edge_count,
|
||||
community_count: build_run.community_count,
|
||||
impacts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Trigger DAST scan if a target is configured for this repo
|
||||
async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
|
||||
use futures_util::TryStreamExt;
|
||||
|
||||
let filter = mongodb::bson::doc! { "repo_id": repo_id };
|
||||
let targets: Vec<compliance_core::models::DastTarget> =
|
||||
match self.db.dast_targets().find(filter).await {
|
||||
Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
if targets.is_empty() {
|
||||
tracing::info!("[{repo_id}] No DAST targets configured, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
for target in targets {
|
||||
let db = self.db.clone();
|
||||
let scan_run_id = scan_run_id.to_string();
|
||||
tokio::spawn(async move {
|
||||
let orchestrator = compliance_dast::DastOrchestrator::new(100);
|
||||
match orchestrator.run_scan(&target, Vec::new()).await {
|
||||
Ok((mut scan_run, findings)) => {
|
||||
scan_run.sast_scan_run_id = Some(scan_run_id);
|
||||
if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
|
||||
tracing::error!("Failed to store DAST scan run: {e}");
|
||||
}
|
||||
for finding in &findings {
|
||||
if let Err(e) = db.dast_findings().insert_one(finding).await {
|
||||
tracing::error!("Failed to store DAST finding: {e}");
|
||||
}
|
||||
}
|
||||
tracing::info!("DAST scan complete: {} findings", findings.len());
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("DAST scan failed: {e}");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an issue tracker client from a repository's tracker configuration.
|
||||
/// Returns `None` if the repo has no tracker configured.
|
||||
fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
|
||||
let tracker_type = repo.tracker_type.as_ref()?;
|
||||
// Per-repo token takes precedence, fall back to global config
|
||||
match tracker_type {
|
||||
TrackerType::GitHub => {
|
||||
let token = repo.tracker_token.clone().or_else(|| {
|
||||
self.config.github_token.as_ref().map(|t| {
|
||||
use secrecy::ExposeSecret;
|
||||
t.expose_secret().to_string()
|
||||
})
|
||||
})?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
match trackers::github::GitHubTracker::new(&secret) {
|
||||
Ok(t) => Some(TrackerDispatch::GitHub(t)),
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to build GitHub tracker: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
TrackerType::GitLab => {
|
||||
let base_url = self
|
||||
.config
|
||||
.gitlab_url
|
||||
.clone()
|
||||
.unwrap_or_else(|| "https://gitlab.com".to_string());
|
||||
let token = repo.tracker_token.clone().or_else(|| {
|
||||
self.config.gitlab_token.as_ref().map(|t| {
|
||||
use secrecy::ExposeSecret;
|
||||
t.expose_secret().to_string()
|
||||
})
|
||||
})?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
Some(TrackerDispatch::GitLab(
|
||||
trackers::gitlab::GitLabTracker::new(base_url, secret),
|
||||
))
|
||||
}
|
||||
TrackerType::Gitea => {
|
||||
let token = repo.tracker_token.clone()?;
|
||||
let base_url = extract_base_url(&repo.git_url)?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
|
||||
base_url, secret,
|
||||
)))
|
||||
}
|
||||
TrackerType::Jira => {
|
||||
let base_url = self.config.jira_url.clone()?;
|
||||
let email = self.config.jira_email.clone()?;
|
||||
let project_key = self.config.jira_project_key.clone()?;
|
||||
let token = repo.tracker_token.clone().or_else(|| {
|
||||
self.config.jira_api_token.as_ref().map(|t| {
|
||||
use secrecy::ExposeSecret;
|
||||
t.expose_secret().to_string()
|
||||
})
|
||||
})?;
|
||||
let secret = secrecy::SecretString::from(token);
|
||||
Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
|
||||
base_url,
|
||||
email,
|
||||
secret,
|
||||
project_key,
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create tracker issues for new findings (severity >= Medium).
|
||||
/// Checks for duplicates via fingerprint search before creating.
|
||||
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
|
||||
async fn create_tracker_issues(
|
||||
&self,
|
||||
repo: &TrackedRepository,
|
||||
repo_id: &str,
|
||||
new_findings: &[Finding],
|
||||
) -> Result<(), AgentError> {
|
||||
let tracker = match self.build_tracker(repo) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
tracing::info!("[{repo_id}] No issue tracker configured, skipping");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let owner = match repo.tracker_owner.as_deref() {
|
||||
Some(o) => o,
|
||||
None => {
|
||||
tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let tracker_repo_name = match repo.tracker_repo.as_deref() {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
// Only create issues for medium+ severity findings
|
||||
let actionable: Vec<&Finding> = new_findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
matches!(
|
||||
f.severity,
|
||||
Severity::Medium | Severity::High | Severity::Critical
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if actionable.is_empty() {
|
||||
tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[{repo_id}] Creating issues for {} findings via {}",
|
||||
actionable.len(),
|
||||
tracker.name()
|
||||
);
|
||||
|
||||
let mut created = 0u32;
|
||||
for finding in actionable {
|
||||
let title = format!(
|
||||
"[{}] {}: {}",
|
||||
finding.severity, finding.scanner, finding.title
|
||||
);
|
||||
|
||||
// Check if an issue already exists by fingerprint first, then by title
|
||||
let mut found_existing = false;
|
||||
for search_term in [&finding.fingerprint, &title] {
|
||||
match tracker
|
||||
.find_existing_issue(owner, tracker_repo_name, search_term)
|
||||
.await
|
||||
{
|
||||
Ok(Some(existing)) => {
|
||||
tracing::debug!(
|
||||
"[{repo_id}] Issue already exists for '{}': {}",
|
||||
search_term,
|
||||
existing.external_url
|
||||
);
|
||||
found_existing = true;
|
||||
break;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(e) => {
|
||||
tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
if found_existing {
|
||||
continue;
|
||||
}
|
||||
let body = format_issue_body(finding);
|
||||
let labels = vec![
|
||||
format!("severity:{}", finding.severity),
|
||||
format!("scanner:{}", finding.scanner),
|
||||
"compliance-scanner".to_string(),
|
||||
];
|
||||
|
||||
match tracker
|
||||
.create_issue(owner, tracker_repo_name, &title, &body, &labels)
|
||||
.await
|
||||
{
|
||||
Ok(mut issue) => {
|
||||
issue.finding_id = finding
|
||||
.id
|
||||
.as_ref()
|
||||
.map(|id| id.to_hex())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Update the finding with the issue URL
|
||||
if let Some(finding_id) = &finding.id {
|
||||
let _ = self
|
||||
.db
|
||||
.findings()
|
||||
.update_one(
|
||||
doc! { "_id": finding_id },
|
||||
doc! { "$set": { "tracker_issue_url": &issue.external_url } },
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Store the tracker issue record
|
||||
if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
|
||||
tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
|
||||
}
|
||||
|
||||
created += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[{repo_id}] Failed to create issue for {}: {e}",
|
||||
finding.fingerprint
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("[{repo_id}] Created {created} tracker issues");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run an incremental scan on a PR diff and post review comments.
|
||||
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
|
||||
pub async fn run_pr_review(
|
||||
&self,
|
||||
repo: &TrackedRepository,
|
||||
repo_id: &str,
|
||||
pr_number: u64,
|
||||
base_sha: &str,
|
||||
head_sha: &str,
|
||||
) -> Result<(), AgentError> {
|
||||
let tracker = match self.build_tracker(repo) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let owner = repo.tracker_owner.as_deref().unwrap_or("");
|
||||
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
|
||||
if owner.is_empty() || tracker_repo_name.is_empty() {
|
||||
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Clone/fetch the repo
|
||||
let creds = GitOps::make_repo_credentials(&self.config, repo);
|
||||
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
|
||||
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
|
||||
|
||||
// Get diff between base and head
|
||||
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
|
||||
if diff_files.is_empty() {
|
||||
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Run semgrep on the full repo but we'll filter findings to changed files
|
||||
let changed_paths: std::collections::HashSet<String> =
|
||||
diff_files.iter().map(|f| f.path.clone()).collect();
|
||||
|
||||
let mut pr_findings: Vec<Finding> = Vec::new();
|
||||
|
||||
// SAST scan (semgrep)
|
||||
match SemgrepScanner.scan(&repo_path, repo_id).await {
|
||||
Ok(output) => {
|
||||
for f in output.findings {
|
||||
if let Some(fp) = &f.file_path {
|
||||
if changed_paths.contains(fp.as_str()) {
|
||||
pr_findings.push(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
|
||||
}
|
||||
|
||||
// LLM code review on the diff
|
||||
let reviewer = CodeReviewScanner::new(self.llm.clone());
|
||||
let review_output = reviewer
|
||||
.review_diff(&repo_path, repo_id, base_sha, head_sha)
|
||||
.await;
|
||||
pr_findings.extend(review_output.findings);
|
||||
|
||||
if pr_findings.is_empty() {
|
||||
// Post a clean review
|
||||
if let Err(e) = tracker
|
||||
.create_pr_review(
|
||||
owner,
|
||||
tracker_repo_name,
|
||||
pr_number,
|
||||
"Compliance scan: no issues found in this PR.",
|
||||
Vec::new(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Build review comments from findings
|
||||
let mut review_comments = Vec::new();
|
||||
for finding in &pr_findings {
|
||||
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
|
||||
let comment_body = format!(
|
||||
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
|
||||
finding.severity,
|
||||
finding.title,
|
||||
finding.description,
|
||||
finding.scanner,
|
||||
finding
|
||||
.cwe
|
||||
.as_deref()
|
||||
.map(|c| format!("CWE: {c}"))
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
|
||||
path: path.clone(),
|
||||
line,
|
||||
body: comment_body,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let summary = format!(
|
||||
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
|
||||
pr_findings.len(),
|
||||
pr_findings
|
||||
.iter()
|
||||
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n"),
|
||||
);
|
||||
|
||||
if let Err(e) = tracker
|
||||
.create_pr_review(
|
||||
owner,
|
||||
tracker_repo_name,
|
||||
pr_number,
|
||||
&summary,
|
||||
review_comments,
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
|
||||
} else {
|
||||
tracing::info!(
|
||||
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
|
||||
pr_findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
|
||||
pub(super) async fn update_phase(&self, scan_run_id: &str, phase: &str) {
|
||||
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
|
||||
let _ = self
|
||||
.db
|
||||
@@ -917,9 +399,9 @@ impl PipelineOrchestrator {
|
||||
}
|
||||
|
||||
/// Extract the scheme + host from a git URL.
|
||||
/// e.g. "https://gitea.example.com/owner/repo.git" → "https://gitea.example.com"
|
||||
/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" → "https://gitea.example.com"
|
||||
fn extract_base_url(git_url: &str) -> Option<String> {
|
||||
/// e.g. "https://gitea.example.com/owner/repo.git" -> "https://gitea.example.com"
|
||||
/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" -> "https://gitea.example.com"
|
||||
pub(super) fn extract_base_url(git_url: &str) -> Option<String> {
|
||||
if let Some(rest) = git_url.strip_prefix("https://") {
|
||||
let host = rest.split('/').next()?;
|
||||
Some(format!("https://{host}"))
|
||||
@@ -927,7 +409,7 @@ fn extract_base_url(git_url: &str) -> Option<String> {
|
||||
let host = rest.split('/').next()?;
|
||||
Some(format!("http://{host}"))
|
||||
} else if let Some(rest) = git_url.strip_prefix("ssh://") {
|
||||
// ssh://git@host:port/path → extract host
|
||||
// ssh://git@host:port/path -> extract host
|
||||
let after_at = rest.find('@').map(|i| &rest[i + 1..]).unwrap_or(rest);
|
||||
let host = after_at.split(&[':', '/'][..]).next()?;
|
||||
Some(format!("https://{host}"))
|
||||
@@ -940,48 +422,3 @@ fn extract_base_url(git_url: &str) -> Option<String> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a finding into a markdown issue body for the tracker.
|
||||
fn format_issue_body(finding: &Finding) -> String {
|
||||
let mut body = String::new();
|
||||
|
||||
body.push_str(&format!("## {} Finding\n\n", finding.severity));
|
||||
body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
|
||||
body.push_str(&format!("**Severity:** {}\n", finding.severity));
|
||||
|
||||
if let Some(rule) = &finding.rule_id {
|
||||
body.push_str(&format!("**Rule:** {}\n", rule));
|
||||
}
|
||||
if let Some(cwe) = &finding.cwe {
|
||||
body.push_str(&format!("**CWE:** {}\n", cwe));
|
||||
}
|
||||
|
||||
body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
|
||||
|
||||
if let Some(file_path) = &finding.file_path {
|
||||
body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
|
||||
if let Some(line) = finding.line_number {
|
||||
body.push_str(&format!(" (line {})", line));
|
||||
}
|
||||
body.push('\n');
|
||||
}
|
||||
|
||||
if let Some(snippet) = &finding.code_snippet {
|
||||
body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
|
||||
}
|
||||
|
||||
if let Some(remediation) = &finding.remediation {
|
||||
body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
|
||||
}
|
||||
|
||||
if let Some(fix) = &finding.suggested_fix {
|
||||
body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
|
||||
}
|
||||
|
||||
body.push_str(&format!(
|
||||
"\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
|
||||
finding.fingerprint
|
||||
));
|
||||
|
||||
body
|
||||
}
|
||||
|
||||
@@ -256,3 +256,159 @@ fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// --- compile_regex tests ---
|
||||
|
||||
#[test]
|
||||
fn compile_regex_valid_pattern() {
|
||||
let re = compile_regex(r"\bfoo\b");
|
||||
assert!(re.is_match("hello foo bar"));
|
||||
assert!(!re.is_match("foobar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_regex_invalid_pattern_returns_fallback() {
|
||||
// An invalid regex should return the fallback "^$" that only matches empty strings
|
||||
let re = compile_regex(r"[invalid");
|
||||
assert!(re.is_match(""));
|
||||
assert!(!re.is_match("anything"));
|
||||
}
|
||||
|
||||
// --- GDPR pattern tests ---
|
||||
|
||||
#[test]
|
||||
fn gdpr_pii_logging_matches() {
|
||||
let scanner = GdprPatternScanner::new();
|
||||
let pattern = &scanner.patterns[0]; // gdpr-pii-logging
|
||||
// Regex: (log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(pii_keyword)\b
|
||||
assert!(pattern.pattern.is_match("console.log(email)"));
|
||||
assert!(pattern.pattern.is_match("console.log(user.ssn)"));
|
||||
assert!(pattern.pattern.is_match("print(phone_number)"));
|
||||
assert!(pattern.pattern.is_match("tracing::(ip_addr)"));
|
||||
assert!(pattern.pattern.is_match("log.debug(credit_card)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gdpr_pii_logging_no_false_positive() {
|
||||
let scanner = GdprPatternScanner::new();
|
||||
let pattern = &scanner.patterns[0];
|
||||
// Regular logging without PII fields should not match
|
||||
assert!(!pattern
|
||||
.pattern
|
||||
.is_match("logger.info(\"request completed\")"));
|
||||
assert!(!pattern.pattern.is_match("let email = user.email;"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gdpr_no_consent_matches() {
|
||||
let scanner = GdprPatternScanner::new();
|
||||
let pattern = &scanner.patterns[1]; // gdpr-no-consent
|
||||
assert!(pattern.pattern.is_match("collect personal data"));
|
||||
assert!(pattern.pattern.is_match("store user_data in db"));
|
||||
assert!(pattern.pattern.is_match("save pii to disk"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gdpr_user_model_matches() {
|
||||
let scanner = GdprPatternScanner::new();
|
||||
let pattern = &scanner.patterns[2]; // gdpr-no-delete-endpoint
|
||||
assert!(pattern.pattern.is_match("struct User {"));
|
||||
assert!(pattern.pattern.is_match("class User(Model):"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gdpr_hardcoded_retention_matches() {
|
||||
let scanner = GdprPatternScanner::new();
|
||||
let pattern = &scanner.patterns[3]; // gdpr-hardcoded-retention
|
||||
assert!(pattern.pattern.is_match("retention = 30"));
|
||||
assert!(pattern.pattern.is_match("ttl: 3600"));
|
||||
assert!(pattern.pattern.is_match("expire = 86400"));
|
||||
}
|
||||
|
||||
// --- OAuth pattern tests ---
|
||||
|
||||
#[test]
|
||||
fn oauth_implicit_grant_matches() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[0]; // oauth-implicit-grant
|
||||
assert!(pattern.pattern.is_match("response_type = \"token\""));
|
||||
assert!(pattern.pattern.is_match("grant_type: implicit"));
|
||||
assert!(pattern.pattern.is_match("response_type='token'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oauth_implicit_grant_no_false_positive() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[0];
|
||||
assert!(!pattern.pattern.is_match("response_type = \"code\""));
|
||||
assert!(!pattern.pattern.is_match("grant_type: authorization_code"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oauth_authorization_code_matches() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[1]; // oauth-missing-pkce
|
||||
assert!(pattern.pattern.is_match("uses authorization_code flow"));
|
||||
assert!(pattern.pattern.is_match("authorization code grant"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oauth_token_localstorage_matches() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[2]; // oauth-token-localstorage
|
||||
assert!(pattern
|
||||
.pattern
|
||||
.is_match("localStorage.setItem('access_token', tok)"));
|
||||
assert!(pattern
|
||||
.pattern
|
||||
.is_match("localStorage.getItem(\"refresh_token\")"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oauth_token_localstorage_no_false_positive() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[2];
|
||||
assert!(!pattern
|
||||
.pattern
|
||||
.is_match("localStorage.setItem('theme', 'dark')"));
|
||||
assert!(!pattern
|
||||
.pattern
|
||||
.is_match("sessionStorage.setItem('token', t)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oauth_token_url_matches() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[3]; // oauth-token-url
|
||||
assert!(pattern.pattern.is_match("access_token = build_url(query)"));
|
||||
assert!(pattern.pattern.is_match("bearer = url.param"));
|
||||
}
|
||||
|
||||
// --- Pattern rule file extension filtering ---
|
||||
|
||||
#[test]
|
||||
fn gdpr_patterns_cover_common_languages() {
|
||||
let scanner = GdprPatternScanner::new();
|
||||
for pattern in &scanner.patterns {
|
||||
assert!(
|
||||
pattern.file_extensions.contains(&"rs".to_string()),
|
||||
"Pattern {} should cover .rs files",
|
||||
pattern.id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oauth_localstorage_only_js_ts() {
|
||||
let scanner = OAuthPatternScanner::new();
|
||||
let pattern = &scanner.patterns[2]; // oauth-token-localstorage
|
||||
assert!(pattern.file_extensions.contains(&"js".to_string()));
|
||||
assert!(pattern.file_extensions.contains(&"ts".to_string()));
|
||||
assert!(!pattern.file_extensions.contains(&"rs".to_string()));
|
||||
assert!(!pattern.file_extensions.contains(&"py".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
146
compliance-agent/src/pipeline/pr_review.rs
Normal file
146
compliance-agent/src/pipeline/pr_review.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
use compliance_core::models::*;
|
||||
|
||||
use super::orchestrator::PipelineOrchestrator;
|
||||
use crate::error::AgentError;
|
||||
use crate::pipeline::code_review::CodeReviewScanner;
|
||||
use crate::pipeline::git::GitOps;
|
||||
use crate::pipeline::semgrep::SemgrepScanner;
|
||||
|
||||
use compliance_core::traits::Scanner;
|
||||
|
||||
impl PipelineOrchestrator {
|
||||
/// Run an incremental scan on a PR diff and post review comments.
|
||||
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
|
||||
pub async fn run_pr_review(
|
||||
&self,
|
||||
repo: &TrackedRepository,
|
||||
repo_id: &str,
|
||||
pr_number: u64,
|
||||
base_sha: &str,
|
||||
head_sha: &str,
|
||||
) -> Result<(), AgentError> {
|
||||
let tracker = match self.build_tracker(repo) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let owner = repo.tracker_owner.as_deref().unwrap_or("");
|
||||
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
|
||||
if owner.is_empty() || tracker_repo_name.is_empty() {
|
||||
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Clone/fetch the repo
|
||||
let creds = GitOps::make_repo_credentials(&self.config, repo);
|
||||
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
|
||||
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
|
||||
|
||||
// Get diff between base and head
|
||||
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
|
||||
if diff_files.is_empty() {
|
||||
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Run semgrep on the full repo but we'll filter findings to changed files
|
||||
let changed_paths: std::collections::HashSet<String> =
|
||||
diff_files.iter().map(|f| f.path.clone()).collect();
|
||||
|
||||
let mut pr_findings: Vec<Finding> = Vec::new();
|
||||
|
||||
// SAST scan (semgrep)
|
||||
match SemgrepScanner.scan(&repo_path, repo_id).await {
|
||||
Ok(output) => {
|
||||
for f in output.findings {
|
||||
if let Some(fp) = &f.file_path {
|
||||
if changed_paths.contains(fp.as_str()) {
|
||||
pr_findings.push(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
|
||||
}
|
||||
|
||||
// LLM code review on the diff
|
||||
let reviewer = CodeReviewScanner::new(self.llm.clone());
|
||||
let review_output = reviewer
|
||||
.review_diff(&repo_path, repo_id, base_sha, head_sha)
|
||||
.await;
|
||||
pr_findings.extend(review_output.findings);
|
||||
|
||||
if pr_findings.is_empty() {
|
||||
// Post a clean review
|
||||
if let Err(e) = tracker
|
||||
.create_pr_review(
|
||||
owner,
|
||||
tracker_repo_name,
|
||||
pr_number,
|
||||
"Compliance scan: no issues found in this PR.",
|
||||
Vec::new(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Build review comments from findings
|
||||
let mut review_comments = Vec::new();
|
||||
for finding in &pr_findings {
|
||||
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
|
||||
let comment_body = format!(
|
||||
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
|
||||
finding.severity,
|
||||
finding.title,
|
||||
finding.description,
|
||||
finding.scanner,
|
||||
finding
|
||||
.cwe
|
||||
.as_deref()
|
||||
.map(|c| format!("CWE: {c}"))
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
|
||||
path: path.clone(),
|
||||
line,
|
||||
body: comment_body,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let summary = format!(
|
||||
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
|
||||
pr_findings.len(),
|
||||
pr_findings
|
||||
.iter()
|
||||
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n"),
|
||||
);
|
||||
|
||||
if let Err(e) = tracker
|
||||
.create_pr_review(
|
||||
owner,
|
||||
tracker_repo_name,
|
||||
pr_number,
|
||||
&summary,
|
||||
review_comments,
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
|
||||
} else {
|
||||
tracing::info!(
|
||||
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
|
||||
pr_findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
72
compliance-agent/src/pipeline/sbom/cargo_audit.rs
Normal file
72
compliance-agent/src/pipeline/sbom/cargo_audit.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::CoreError;
|
||||
|
||||
pub(super) struct AuditVuln {
|
||||
pub package: String,
|
||||
pub id: String,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(super) async fn run_cargo_audit(
|
||||
repo_path: &Path,
|
||||
_repo_id: &str,
|
||||
) -> Result<Vec<AuditVuln>, CoreError> {
|
||||
let cargo_lock = repo_path.join("Cargo.lock");
|
||||
if !cargo_lock.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let output = tokio::process::Command::new("cargo")
|
||||
.args(["audit", "--json"])
|
||||
.current_dir(repo_path)
|
||||
.env("RUSTC_WRAPPER", "")
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "cargo-audit".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let result: CargoAuditOutput =
|
||||
serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
|
||||
vulnerabilities: CargoAuditVulns { list: Vec::new() },
|
||||
});
|
||||
|
||||
let vulns = result
|
||||
.vulnerabilities
|
||||
.list
|
||||
.into_iter()
|
||||
.map(|v| AuditVuln {
|
||||
package: v.advisory.package,
|
||||
id: v.advisory.id,
|
||||
url: v.advisory.url,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(vulns)
|
||||
}
|
||||
|
||||
// Cargo audit types
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditOutput {
|
||||
vulnerabilities: CargoAuditVulns,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditVulns {
|
||||
list: Vec<CargoAuditEntry>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditEntry {
|
||||
advisory: CargoAuditAdvisory,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditAdvisory {
|
||||
id: String,
|
||||
package: String,
|
||||
url: String,
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
mod cargo_audit;
|
||||
mod syft;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::{SbomEntry, ScanType, VulnRef};
|
||||
@@ -23,7 +26,7 @@ impl Scanner for SbomScanner {
|
||||
generate_lockfiles(repo_path).await;
|
||||
|
||||
// Run syft for SBOM generation
|
||||
match run_syft(repo_path, repo_id).await {
|
||||
match syft::run_syft(repo_path, repo_id).await {
|
||||
Ok(syft_entries) => entries.extend(syft_entries),
|
||||
Err(e) => tracing::warn!("syft failed: {e}"),
|
||||
}
|
||||
@@ -32,7 +35,7 @@ impl Scanner for SbomScanner {
|
||||
enrich_cargo_licenses(repo_path, &mut entries).await;
|
||||
|
||||
// Run cargo-audit for Rust-specific vulns
|
||||
match run_cargo_audit(repo_path, repo_id).await {
|
||||
match cargo_audit::run_cargo_audit(repo_path, repo_id).await {
|
||||
Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
|
||||
Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
|
||||
}
|
||||
@@ -186,95 +189,7 @@ async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
|
||||
async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
|
||||
let output = tokio::process::Command::new("syft")
|
||||
.arg(repo_path)
|
||||
.args(["-o", "cyclonedx-json"])
|
||||
// Enable remote license lookups for all ecosystems
|
||||
.env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
|
||||
.env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
|
||||
.env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
|
||||
.env("SYFT_JAVA_USE_NETWORK", "true")
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "syft".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err(CoreError::Scanner {
|
||||
scanner: "syft".to_string(),
|
||||
source: format!("syft exited with {}: {stderr}", output.status).into(),
|
||||
});
|
||||
}
|
||||
|
||||
let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
|
||||
let entries = cdx
|
||||
.components
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
let package_manager = c
|
||||
.purl
|
||||
.as_deref()
|
||||
.and_then(extract_ecosystem_from_purl)
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let mut entry = SbomEntry::new(
|
||||
repo_id.to_string(),
|
||||
c.name,
|
||||
c.version.unwrap_or_else(|| "unknown".to_string()),
|
||||
package_manager,
|
||||
);
|
||||
entry.purl = c.purl;
|
||||
entry.license = c.licenses.and_then(|ls| extract_license(&ls));
|
||||
entry
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVuln>, CoreError> {
|
||||
let cargo_lock = repo_path.join("Cargo.lock");
|
||||
if !cargo_lock.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let output = tokio::process::Command::new("cargo")
|
||||
.args(["audit", "--json"])
|
||||
.current_dir(repo_path)
|
||||
.env("RUSTC_WRAPPER", "")
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "cargo-audit".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
let result: CargoAuditOutput =
|
||||
serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
|
||||
vulnerabilities: CargoAuditVulns { list: Vec::new() },
|
||||
});
|
||||
|
||||
let vulns = result
|
||||
.vulnerabilities
|
||||
.list
|
||||
.into_iter()
|
||||
.map(|v| AuditVuln {
|
||||
package: v.advisory.package,
|
||||
id: v.advisory.id,
|
||||
url: v.advisory.url,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(vulns)
|
||||
}
|
||||
|
||||
fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<AuditVuln>) {
|
||||
fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<cargo_audit::AuditVuln>) {
|
||||
for vuln in vulns {
|
||||
if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
|
||||
entry.known_vulnerabilities.push(VulnRef {
|
||||
@@ -287,65 +202,6 @@ fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<AuditVuln>) {
|
||||
}
|
||||
}
|
||||
|
||||
// CycloneDX JSON types
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CycloneDxBom {
|
||||
components: Option<Vec<CdxComponent>>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CdxComponent {
|
||||
name: String,
|
||||
version: Option<String>,
|
||||
#[serde(rename = "type")]
|
||||
#[allow(dead_code)]
|
||||
component_type: Option<String>,
|
||||
purl: Option<String>,
|
||||
licenses: Option<Vec<CdxLicenseWrapper>>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CdxLicenseWrapper {
|
||||
license: Option<CdxLicense>,
|
||||
/// SPDX license expression (e.g. "MIT OR Apache-2.0")
|
||||
expression: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CdxLicense {
|
||||
id: Option<String>,
|
||||
name: Option<String>,
|
||||
}
|
||||
|
||||
// Cargo audit types
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditOutput {
|
||||
vulnerabilities: CargoAuditVulns,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditVulns {
|
||||
list: Vec<CargoAuditEntry>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditEntry {
|
||||
advisory: CargoAuditAdvisory,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoAuditAdvisory {
|
||||
id: String,
|
||||
package: String,
|
||||
url: String,
|
||||
}
|
||||
|
||||
struct AuditVuln {
|
||||
package: String,
|
||||
id: String,
|
||||
url: String,
|
||||
}
|
||||
|
||||
// Cargo metadata types
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CargoMetadata {
|
||||
@@ -358,49 +214,3 @@ struct CargoPackage {
|
||||
version: String,
|
||||
license: Option<String>,
|
||||
}
|
||||
|
||||
/// Extract the best license string from CycloneDX license entries.
|
||||
/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License").
|
||||
fn extract_license(entries: &[CdxLicenseWrapper]) -> Option<String> {
|
||||
// First pass: look for SPDX expressions (most precise for dual-licensed packages)
|
||||
for entry in entries {
|
||||
if let Some(ref expr) = entry.expression {
|
||||
if !expr.is_empty() {
|
||||
return Some(expr.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass: collect license.id or license.name from all entries
|
||||
let parts: Vec<String> = entries
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
e.license.as_ref().and_then(|lic| {
|
||||
lic.id
|
||||
.clone()
|
||||
.or_else(|| lic.name.clone())
|
||||
.filter(|s| !s.is_empty())
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if parts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(parts.join(" OR "))
|
||||
}
|
||||
|
||||
/// Extract the ecosystem/package-manager from a PURL string.
|
||||
/// e.g. "pkg:npm/lodash@4.17.21" → "npm", "pkg:cargo/serde@1.0" → "cargo"
|
||||
fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
|
||||
let rest = purl.strip_prefix("pkg:")?;
|
||||
let ecosystem = rest.split('/').next()?;
|
||||
if ecosystem.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// Normalise common PURL types to user-friendly names
|
||||
let normalised = match ecosystem {
|
||||
"golang" => "go",
|
||||
"pypi" => "pip",
|
||||
_ => ecosystem,
|
||||
};
|
||||
Some(normalised.to_string())
|
||||
}
|
||||
355
compliance-agent/src/pipeline/sbom/syft.rs
Normal file
355
compliance-agent/src/pipeline/sbom/syft.rs
Normal file
@@ -0,0 +1,355 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::SbomEntry;
|
||||
use compliance_core::CoreError;
|
||||
|
||||
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
|
||||
pub(super) async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
|
||||
let output = tokio::process::Command::new("syft")
|
||||
.arg(repo_path)
|
||||
.args(["-o", "cyclonedx-json"])
|
||||
// Enable remote license lookups for all ecosystems
|
||||
.env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
|
||||
.env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
|
||||
.env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
|
||||
.env("SYFT_JAVA_USE_NETWORK", "true")
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CoreError::Scanner {
|
||||
scanner: "syft".to_string(),
|
||||
source: Box::new(e),
|
||||
})?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err(CoreError::Scanner {
|
||||
scanner: "syft".to_string(),
|
||||
source: format!("syft exited with {}: {stderr}", output.status).into(),
|
||||
});
|
||||
}
|
||||
|
||||
let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
|
||||
let entries = cdx
|
||||
.components
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
let package_manager = c
|
||||
.purl
|
||||
.as_deref()
|
||||
.and_then(extract_ecosystem_from_purl)
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let mut entry = SbomEntry::new(
|
||||
repo_id.to_string(),
|
||||
c.name,
|
||||
c.version.unwrap_or_else(|| "unknown".to_string()),
|
||||
package_manager,
|
||||
);
|
||||
entry.purl = c.purl;
|
||||
entry.license = c.licenses.and_then(|ls| extract_license(&ls));
|
||||
entry
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
// CycloneDX JSON types
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CycloneDxBom {
|
||||
components: Option<Vec<CdxComponent>>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CdxComponent {
|
||||
name: String,
|
||||
version: Option<String>,
|
||||
#[serde(rename = "type")]
|
||||
#[allow(dead_code)]
|
||||
component_type: Option<String>,
|
||||
purl: Option<String>,
|
||||
licenses: Option<Vec<CdxLicenseWrapper>>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CdxLicenseWrapper {
|
||||
license: Option<CdxLicense>,
|
||||
/// SPDX license expression (e.g. "MIT OR Apache-2.0")
|
||||
expression: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct CdxLicense {
|
||||
id: Option<String>,
|
||||
name: Option<String>,
|
||||
}
|
||||
|
||||
/// Extract the best license string from CycloneDX license entries.
|
||||
/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License").
|
||||
fn extract_license(entries: &[CdxLicenseWrapper]) -> Option<String> {
|
||||
// First pass: look for SPDX expressions (most precise for dual-licensed packages)
|
||||
for entry in entries {
|
||||
if let Some(ref expr) = entry.expression {
|
||||
if !expr.is_empty() {
|
||||
return Some(expr.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass: collect license.id or license.name from all entries
|
||||
let parts: Vec<String> = entries
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
e.license.as_ref().and_then(|lic| {
|
||||
lic.id
|
||||
.clone()
|
||||
.or_else(|| lic.name.clone())
|
||||
.filter(|s| !s.is_empty())
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if parts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(parts.join(" OR "))
|
||||
}
|
||||
|
||||
/// Extract the ecosystem/package-manager from a PURL string.
|
||||
/// e.g. "pkg:npm/lodash@4.17.21" -> "npm", "pkg:cargo/serde@1.0" -> "cargo"
|
||||
fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
|
||||
let rest = purl.strip_prefix("pkg:")?;
|
||||
let ecosystem = rest.split('/').next()?;
|
||||
if ecosystem.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// Normalise common PURL types to user-friendly names
|
||||
let normalised = match ecosystem {
|
||||
"golang" => "go",
|
||||
"pypi" => "pip",
|
||||
_ => ecosystem,
|
||||
};
|
||||
Some(normalised.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// --- extract_ecosystem_from_purl tests ---
|
||||
|
||||
#[test]
|
||||
fn purl_npm() {
|
||||
assert_eq!(
|
||||
extract_ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
|
||||
Some("npm".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_cargo() {
|
||||
assert_eq!(
|
||||
extract_ecosystem_from_purl("pkg:cargo/serde@1.0.197"),
|
||||
Some("cargo".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_golang_normalised() {
|
||||
assert_eq!(
|
||||
extract_ecosystem_from_purl("pkg:golang/github.com/gin-gonic/gin@1.9.1"),
|
||||
Some("go".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_pypi_normalised() {
|
||||
assert_eq!(
|
||||
extract_ecosystem_from_purl("pkg:pypi/requests@2.31.0"),
|
||||
Some("pip".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_maven() {
|
||||
assert_eq!(
|
||||
extract_ecosystem_from_purl("pkg:maven/org.apache.commons/commons-lang3@3.14.0"),
|
||||
Some("maven".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_missing_prefix() {
|
||||
assert_eq!(extract_ecosystem_from_purl("npm/lodash@4.17.21"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_empty_ecosystem() {
|
||||
assert_eq!(extract_ecosystem_from_purl("pkg:/lodash@4.17.21"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_empty_string() {
|
||||
assert_eq!(extract_ecosystem_from_purl(""), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn purl_just_prefix() {
|
||||
assert_eq!(extract_ecosystem_from_purl("pkg:"), None);
|
||||
}
|
||||
|
||||
// --- extract_license tests ---
|
||||
|
||||
#[test]
|
||||
fn license_from_expression() {
|
||||
let entries = vec![CdxLicenseWrapper {
|
||||
license: None,
|
||||
expression: Some("MIT OR Apache-2.0".to_string()),
|
||||
}];
|
||||
assert_eq!(
|
||||
extract_license(&entries),
|
||||
Some("MIT OR Apache-2.0".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_from_id() {
|
||||
let entries = vec![CdxLicenseWrapper {
|
||||
license: Some(CdxLicense {
|
||||
id: Some("MIT".to_string()),
|
||||
name: None,
|
||||
}),
|
||||
expression: None,
|
||||
}];
|
||||
assert_eq!(extract_license(&entries), Some("MIT".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_from_name_fallback() {
|
||||
let entries = vec![CdxLicenseWrapper {
|
||||
license: Some(CdxLicense {
|
||||
id: None,
|
||||
name: Some("MIT License".to_string()),
|
||||
}),
|
||||
expression: None,
|
||||
}];
|
||||
assert_eq!(extract_license(&entries), Some("MIT License".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_expression_preferred_over_id() {
|
||||
let entries = vec![
|
||||
CdxLicenseWrapper {
|
||||
license: Some(CdxLicense {
|
||||
id: Some("MIT".to_string()),
|
||||
name: None,
|
||||
}),
|
||||
expression: None,
|
||||
},
|
||||
CdxLicenseWrapper {
|
||||
license: None,
|
||||
expression: Some("MIT AND Apache-2.0".to_string()),
|
||||
},
|
||||
];
|
||||
// Expression should be preferred (first pass finds it)
|
||||
assert_eq!(
|
||||
extract_license(&entries),
|
||||
Some("MIT AND Apache-2.0".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_multiple_ids_joined() {
|
||||
let entries = vec![
|
||||
CdxLicenseWrapper {
|
||||
license: Some(CdxLicense {
|
||||
id: Some("MIT".to_string()),
|
||||
name: None,
|
||||
}),
|
||||
expression: None,
|
||||
},
|
||||
CdxLicenseWrapper {
|
||||
license: Some(CdxLicense {
|
||||
id: Some("Apache-2.0".to_string()),
|
||||
name: None,
|
||||
}),
|
||||
expression: None,
|
||||
},
|
||||
];
|
||||
assert_eq!(
|
||||
extract_license(&entries),
|
||||
Some("MIT OR Apache-2.0".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_empty_entries() {
|
||||
let entries: Vec<CdxLicenseWrapper> = vec![];
|
||||
assert_eq!(extract_license(&entries), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_all_empty_strings() {
|
||||
let entries = vec![CdxLicenseWrapper {
|
||||
license: Some(CdxLicense {
|
||||
id: Some(String::new()),
|
||||
name: Some(String::new()),
|
||||
}),
|
||||
expression: Some(String::new()),
|
||||
}];
|
||||
assert_eq!(extract_license(&entries), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_none_fields() {
|
||||
let entries = vec![CdxLicenseWrapper {
|
||||
license: None,
|
||||
expression: None,
|
||||
}];
|
||||
assert_eq!(extract_license(&entries), None);
|
||||
}
|
||||
|
||||
// --- CycloneDX deserialization tests ---
|
||||
|
||||
#[test]
|
||||
fn deserialize_cyclonedx_bom() {
|
||||
let json = r#"{
|
||||
"components": [
|
||||
{
|
||||
"name": "serde",
|
||||
"version": "1.0.197",
|
||||
"type": "library",
|
||||
"purl": "pkg:cargo/serde@1.0.197",
|
||||
"licenses": [
|
||||
{"expression": "MIT OR Apache-2.0"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
|
||||
let components = bom.components.unwrap();
|
||||
assert_eq!(components.len(), 1);
|
||||
assert_eq!(components[0].name, "serde");
|
||||
assert_eq!(components[0].version, Some("1.0.197".to_string()));
|
||||
assert_eq!(
|
||||
components[0].purl,
|
||||
Some("pkg:cargo/serde@1.0.197".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_cyclonedx_no_components() {
|
||||
let json = r#"{}"#;
|
||||
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
|
||||
assert!(bom.components.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_cyclonedx_minimal_component() {
|
||||
let json = r#"{"components": [{"name": "foo"}]}"#;
|
||||
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
|
||||
let c = &bom.components.unwrap()[0];
|
||||
assert_eq!(c.name, "foo");
|
||||
assert!(c.version.is_none());
|
||||
assert!(c.purl.is_none());
|
||||
assert!(c.licenses.is_none());
|
||||
}
|
||||
}
|
||||
@@ -108,3 +108,124 @@ struct SemgrepExtra {
|
||||
#[serde(default)]
|
||||
metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn deserialize_semgrep_output() {
|
||||
let json = r#"{
|
||||
"results": [
|
||||
{
|
||||
"check_id": "python.lang.security.audit.exec-detected",
|
||||
"path": "src/main.py",
|
||||
"start": {"line": 15},
|
||||
"extra": {
|
||||
"message": "Detected use of exec()",
|
||||
"severity": "ERROR",
|
||||
"lines": "exec(user_input)",
|
||||
"metadata": {"cwe": "CWE-78"}
|
||||
}
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(output.results.len(), 1);
|
||||
|
||||
let r = &output.results[0];
|
||||
assert_eq!(r.check_id, "python.lang.security.audit.exec-detected");
|
||||
assert_eq!(r.path, "src/main.py");
|
||||
assert_eq!(r.start.line, 15);
|
||||
assert_eq!(r.extra.message, "Detected use of exec()");
|
||||
assert_eq!(r.extra.severity, "ERROR");
|
||||
assert_eq!(r.extra.lines, "exec(user_input)");
|
||||
assert_eq!(
|
||||
r.extra
|
||||
.metadata
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.get("cwe")
|
||||
.unwrap()
|
||||
.as_str(),
|
||||
Some("CWE-78")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_semgrep_empty_results() {
|
||||
let json = r#"{"results": []}"#;
|
||||
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
|
||||
assert!(output.results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_semgrep_no_metadata() {
|
||||
let json = r#"{
|
||||
"results": [
|
||||
{
|
||||
"check_id": "rule-1",
|
||||
"path": "app.py",
|
||||
"start": {"line": 1},
|
||||
"extra": {
|
||||
"message": "found something",
|
||||
"severity": "WARNING",
|
||||
"lines": "import os"
|
||||
}
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
|
||||
assert!(output.results[0].extra.metadata.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn semgrep_severity_mapping() {
|
||||
let cases = vec![
|
||||
("ERROR", "High"),
|
||||
("WARNING", "Medium"),
|
||||
("INFO", "Low"),
|
||||
("UNKNOWN", "Info"),
|
||||
];
|
||||
for (input, expected) in cases {
|
||||
let result = match input {
|
||||
"ERROR" => "High",
|
||||
"WARNING" => "Medium",
|
||||
"INFO" => "Low",
|
||||
_ => "Info",
|
||||
};
|
||||
assert_eq!(result, expected, "Severity for '{input}'");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_semgrep_multiple_results() {
|
||||
let json = r#"{
|
||||
"results": [
|
||||
{
|
||||
"check_id": "rule-a",
|
||||
"path": "a.py",
|
||||
"start": {"line": 1},
|
||||
"extra": {
|
||||
"message": "msg a",
|
||||
"severity": "ERROR",
|
||||
"lines": "line a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"check_id": "rule-b",
|
||||
"path": "b.py",
|
||||
"start": {"line": 99},
|
||||
"extra": {
|
||||
"message": "msg b",
|
||||
"severity": "INFO",
|
||||
"lines": "line b"
|
||||
}
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(output.results.len(), 2);
|
||||
assert_eq!(output.results[1].start.line, 99);
|
||||
}
|
||||
}
|
||||
|
||||
81
compliance-agent/src/pipeline/tracker_dispatch.rs
Normal file
81
compliance-agent/src/pipeline/tracker_dispatch.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
use compliance_core::models::TrackerIssue;
|
||||
use compliance_core::traits::issue_tracker::IssueTracker;
|
||||
|
||||
use crate::trackers;
|
||||
|
||||
/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
|
||||
pub(crate) enum TrackerDispatch {
|
||||
GitHub(trackers::github::GitHubTracker),
|
||||
GitLab(trackers::gitlab::GitLabTracker),
|
||||
Gitea(trackers::gitea::GiteaTracker),
|
||||
Jira(trackers::jira::JiraTracker),
|
||||
}
|
||||
|
||||
impl TrackerDispatch {
|
||||
pub(crate) fn name(&self) -> &str {
|
||||
match self {
|
||||
Self::GitHub(t) => t.name(),
|
||||
Self::GitLab(t) => t.name(),
|
||||
Self::Gitea(t) => t.name(),
|
||||
Self::Jira(t) => t.name(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn create_issue(
|
||||
&self,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
title: &str,
|
||||
body: &str,
|
||||
labels: &[String],
|
||||
) -> Result<TrackerIssue, compliance_core::error::CoreError> {
|
||||
match self {
|
||||
Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn find_existing_issue(
|
||||
&self,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
fingerprint: &str,
|
||||
) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
|
||||
match self {
|
||||
Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn create_pr_review(
|
||||
&self,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
pr_number: u64,
|
||||
body: &str,
|
||||
comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
|
||||
) -> Result<(), compliance_core::error::CoreError> {
|
||||
match self {
|
||||
Self::GitHub(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
Self::GitLab(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
Self::Gitea(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
Self::Jira(t) => {
|
||||
t.create_pr_review(owner, repo, pr_number, body, comments)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user