feat: findings refinement, new scanners, and deployment tooling (#6)
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s

This commit was merged in pull request #6.
This commit is contained in:
2026-03-09 12:53:12 +00:00
parent 32e5fc21e7
commit 46bf9de549
40 changed files with 2048 additions and 118 deletions

View File

@@ -187,7 +187,13 @@ pub async fn build_embeddings(
}
};
let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
let creds = crate::pipeline::git::RepoCredentials {
ssh_key_path: Some(agent_clone.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let git_ops =
crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path, creds);
let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
Ok(p) => p,
Err(e) => {

View File

@@ -291,7 +291,13 @@ pub async fn trigger_build(
}
};
let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
let creds = crate::pipeline::git::RepoCredentials {
ssh_key_path: Some(agent_clone.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let git_ops =
crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path, creds);
let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
Ok(p) => p,
Err(e) => {

View File

@@ -41,6 +41,12 @@ pub struct FindingsFilter {
pub scan_type: Option<String>,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub q: Option<String>,
#[serde(default)]
pub sort_by: Option<String>,
#[serde(default)]
pub sort_order: Option<String>,
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
@@ -76,6 +82,8 @@ pub struct AddRepositoryRequest {
pub git_url: String,
#[serde(default = "default_branch")]
pub default_branch: String,
pub auth_token: Option<String>,
pub auth_username: Option<String>,
pub tracker_type: Option<TrackerType>,
pub tracker_owner: Option<String>,
pub tracker_repo: Option<String>,
@@ -91,6 +99,17 @@ pub struct UpdateStatusRequest {
pub status: String,
}
#[derive(Deserialize)]
pub struct BulkUpdateStatusRequest {
pub ids: Vec<String>,
pub status: String,
}
#[derive(Deserialize)]
pub struct UpdateFeedbackRequest {
pub feedback: String,
}
#[derive(Deserialize)]
pub struct SbomFilter {
#[serde(default)]
@@ -267,9 +286,25 @@ pub async fn list_repositories(
pub async fn add_repository(
Extension(agent): AgentExt,
Json(req): Json<AddRepositoryRequest>,
) -> Result<Json<ApiResponse<TrackedRepository>>, StatusCode> {
) -> Result<Json<ApiResponse<TrackedRepository>>, (StatusCode, String)> {
// Validate repository access before saving
let creds = crate::pipeline::git::RepoCredentials {
ssh_key_path: Some(agent.config.ssh_key_path.clone()),
auth_token: req.auth_token.clone(),
auth_username: req.auth_username.clone(),
};
if let Err(e) = crate::pipeline::git::GitOps::test_access(&req.git_url, &creds) {
return Err((
StatusCode::BAD_REQUEST,
format!("Cannot access repository: {e}"),
));
}
let mut repo = TrackedRepository::new(req.name, req.git_url);
repo.default_branch = req.default_branch;
repo.auth_token = req.auth_token;
repo.auth_username = req.auth_username;
repo.tracker_type = req.tracker_type;
repo.tracker_owner = req.tracker_owner;
repo.tracker_repo = req.tracker_repo;
@@ -280,7 +315,12 @@ pub async fn add_repository(
.repositories()
.insert_one(&repo)
.await
.map_err(|_| StatusCode::CONFLICT)?;
.map_err(|_| {
(
StatusCode::CONFLICT,
"Repository already exists".to_string(),
)
})?;
Ok(Json(ApiResponse {
data: repo,
@@ -289,6 +329,14 @@ pub async fn add_repository(
}))
}
pub async fn get_ssh_public_key(
Extension(agent): AgentExt,
) -> Result<Json<serde_json::Value>, StatusCode> {
let public_path = format!("{}.pub", agent.config.ssh_key_path);
let public_key = std::fs::read_to_string(&public_path).map_err(|_| StatusCode::NOT_FOUND)?;
Ok(Json(serde_json::json!({ "public_key": public_key.trim() })))
}
pub async fn trigger_scan(
Extension(agent): AgentExt,
Path(id): Path<String>,
@@ -367,6 +415,29 @@ pub async fn list_findings(
if let Some(status) = &filter.status {
query.insert("status", status);
}
// Text search across title, description, file_path, rule_id
if let Some(q) = &filter.q {
if !q.is_empty() {
let regex = doc! { "$regex": q, "$options": "i" };
query.insert(
"$or",
mongodb::bson::bson!([
{ "title": regex.clone() },
{ "description": regex.clone() },
{ "file_path": regex.clone() },
{ "rule_id": regex },
]),
);
}
}
// Dynamic sort
let sort_field = filter.sort_by.as_deref().unwrap_or("created_at");
let sort_dir: i32 = match filter.sort_order.as_deref() {
Some("asc") => 1,
_ => -1,
};
let sort_doc = doc! { sort_field: sort_dir };
let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
let total = db
@@ -378,7 +449,7 @@ pub async fn list_findings(
let findings = match db
.findings()
.find(query)
.sort(doc! { "created_at": -1 })
.sort(sort_doc)
.skip(skip)
.limit(filter.limit)
.await
@@ -434,6 +505,55 @@ pub async fn update_finding_status(
Ok(Json(serde_json::json!({ "status": "updated" })))
}
pub async fn bulk_update_finding_status(
Extension(agent): AgentExt,
Json(req): Json<BulkUpdateStatusRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oids: Vec<mongodb::bson::oid::ObjectId> = req
.ids
.iter()
.filter_map(|id| mongodb::bson::oid::ObjectId::parse_str(id).ok())
.collect();
if oids.is_empty() {
return Err(StatusCode::BAD_REQUEST);
}
let result = agent
.db
.findings()
.update_many(
doc! { "_id": { "$in": oids } },
doc! { "$set": { "status": &req.status, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(
serde_json::json!({ "status": "updated", "modified_count": result.modified_count }),
))
}
pub async fn update_finding_feedback(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<UpdateFeedbackRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
agent
.db
.findings()
.update_one(
doc! { "_id": oid },
doc! { "$set": { "developer_feedback": &req.feedback, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(serde_json::json!({ "status": "updated" })))
}
pub async fn list_sbom(
Extension(agent): AgentExt,
Query(filter): Query<SbomFilter>,

View File

@@ -7,6 +7,10 @@ pub fn build_router() -> Router {
Router::new()
.route("/api/v1/health", get(handlers::health))
.route("/api/v1/stats/overview", get(handlers::stats_overview))
.route(
"/api/v1/settings/ssh-public-key",
get(handlers::get_ssh_public_key),
)
.route("/api/v1/repositories", get(handlers::list_repositories))
.route("/api/v1/repositories", post(handlers::add_repository))
.route(
@@ -23,6 +27,14 @@ pub fn build_router() -> Router {
"/api/v1/findings/{id}/status",
patch(handlers::update_finding_status),
)
.route(
"/api/v1/findings/bulk-status",
patch(handlers::bulk_update_finding_status),
)
.route(
"/api/v1/findings/{id}/feedback",
patch(handlers::update_finding_feedback),
)
.route("/api/v1/sbom", get(handlers::list_sbom))
.route("/api/v1/sbom/export", get(handlers::export_sbom))
.route("/api/v1/sbom/licenses", get(handlers::license_summary))

View File

@@ -45,6 +45,8 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
.unwrap_or_else(|| "0 0 0 * * *".to_string()),
git_clone_base_path: env_var_opt("GIT_CLONE_BASE_PATH")
.unwrap_or_else(|| "/tmp/compliance-scanner/repos".to_string()),
ssh_key_path: env_var_opt("SSH_KEY_PATH")
.unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
keycloak_url: env_var_opt("KEYCLOAK_URL"),
keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
})

View File

@@ -5,6 +5,7 @@ pub mod descriptions;
pub mod fixes;
#[allow(dead_code)]
pub mod pr_review;
pub mod review_prompts;
pub mod triage;
pub use client::LlmClient;

View File

@@ -0,0 +1,77 @@
// System prompts for multi-pass LLM code review.
// Each pass focuses on a different aspect to avoid overloading a single prompt.
pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing code changes. Focus ONLY on logic and correctness issues.
Look for:
- Off-by-one errors, wrong comparisons, missing edge cases
- Incorrect control flow (unreachable code, missing returns, wrong loop conditions)
- Race conditions or concurrency bugs
- Resource leaks (unclosed handles, missing cleanup)
- Wrong variable used (copy-paste errors)
- Incorrect error handling (swallowed errors, wrong error type)
Ignore: style, naming, formatting, documentation, minor improvements.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing code changes. Focus ONLY on security vulnerabilities.
Look for:
- Injection vulnerabilities (SQL, command, XSS, template injection)
- Authentication/authorization bypasses
- Sensitive data exposure (logging secrets, hardcoded credentials)
- Insecure cryptography (weak algorithms, predictable randomness)
- Path traversal, SSRF, open redirects
- Unsafe deserialization
- Missing input validation at trust boundaries
Ignore: code style, performance, general quality.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking adherence to project conventions. Focus ONLY on patterns that indicate likely bugs or maintenance problems.
Look for:
- Inconsistent error handling patterns within the same module
- Public API that doesn't follow the project's established patterns
- Missing or incorrect type annotations that could cause runtime issues
- Anti-patterns specific to the language (e.g. unwrap in Rust library code, any in TypeScript)
Do NOT report: minor style preferences, documentation gaps, formatting.
Only report issues with HIGH confidence that they deviate from the visible codebase conventions.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for excessive complexity that could lead to bugs.
Look for:
- Functions over 50 lines that should be decomposed
- Deeply nested control flow (4+ levels)
- Complex boolean expressions that are hard to reason about
- Functions with 5+ parameters
- Code duplication within the changed files
Only report complexity issues that are HIGH risk for future bugs. Ignore acceptable complexity in configuration, CLI argument parsing, or generated code.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
/// All review types with their prompts
pub const REVIEW_PASSES: &[(&str, &str)] = &[
("logic", LOGIC_REVIEW_PROMPT),
("security", SECURITY_REVIEW_PROMPT),
("convention", CONVENTION_REVIEW_PROMPT),
("complexity", COMPLEXITY_REVIEW_PROMPT),
];

View File

@@ -5,13 +5,22 @@ use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::GraphContext;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
1. Is this a true positive? (yes/no)
2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
3. Brief remediation suggestion (1-2 sentences)
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
Actions:
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
- "downgrade": The finding is real but over-reported. Lower severity recommended.
- "upgrade": The finding is under-reported. Higher severity recommended.
- "dismiss": The finding is a false positive. Should be removed.
Consider:
- Is the code in a test, example, or generated file? (lower confidence for test code)
- Does the surrounding code context confirm or refute the finding?
- Is the finding actionable by a developer?
- Would a real attacker be able to exploit this?
Respond in JSON format:
{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
pub async fn triage_findings(
llm: &Arc<LlmClient>,
@@ -21,8 +30,10 @@ pub async fn triage_findings(
let mut passed = 0;
for finding in findings.iter_mut() {
let file_classification = classify_file_path(finding.file_path.as_deref());
let mut user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
@@ -31,8 +42,16 @@ pub async fn triage_findings(
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
);
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
@@ -69,32 +88,55 @@ pub async fn triage_findings(
.await
{
Ok(response) => {
// Strip markdown code fences if present (e.g. ```json ... ```)
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
let inner = cleaned
cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
inner
.trim()
} else {
cleaned
};
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
finding.confidence = Some(result.confidence);
// Apply file-path confidence adjustment
let adjusted_confidence =
adjust_confidence(result.confidence, &file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
if result.confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
// Downgrade severity by one level
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
}
}
} else {
// If LLM response doesn't parse, keep the finding
// Parse failure — keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!(
@@ -117,12 +159,122 @@ pub async fn triage_findings(
passed
}
/// Read ~50 lines of surrounding code from the file at the finding's location
fn read_surrounding_context(finding: &Finding) -> Option<String> {
let file_path = finding.file_path.as_deref()?;
let line = finding.line_number? as usize;
// Try to read the file — this works because the repo is cloned locally
let content = std::fs::read_to_string(file_path).ok()?;
let lines: Vec<&str> = content.lines().collect();
let start = line.saturating_sub(25);
let end = (line + 25).min(lines.len());
Some(
lines[start..end]
.iter()
.enumerate()
.map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
.collect::<Vec<_>>()
.join("\n"),
)
}
/// Classify a file path to inform triage confidence adjustment
fn classify_file_path(path: Option<&str>) -> String {
let path = match path {
Some(p) => p.to_lowercase(),
None => return "unknown".to_string(),
};
if path.contains("/test/")
|| path.contains("/tests/")
|| path.contains("_test.")
|| path.contains(".test.")
|| path.contains(".spec.")
|| path.contains("/fixtures/")
|| path.contains("/testdata/")
{
return "test".to_string();
}
if path.contains("/example")
|| path.contains("/examples/")
|| path.contains("/demo/")
|| path.contains("/sample")
{
return "example".to_string();
}
if path.contains("/generated/")
|| path.contains("/gen/")
|| path.contains(".generated.")
|| path.contains(".pb.go")
|| path.contains("_generated.rs")
{
return "generated".to_string();
}
if path.contains("/vendor/")
|| path.contains("/node_modules/")
|| path.contains("/third_party/")
{
return "vendored".to_string();
}
"production".to_string()
}
/// Adjust confidence based on file classification
fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 {
let multiplier = match classification {
"test" => 0.5,
"example" => 0.6,
"generated" => 0.3,
"vendored" => 0.4,
_ => 1.0,
};
raw_confidence * multiplier
}
fn downgrade_severity(
severity: &compliance_core::models::Severity,
) -> compliance_core::models::Severity {
use compliance_core::models::Severity;
match severity {
Severity::Critical => Severity::High,
Severity::High => Severity::Medium,
Severity::Medium => Severity::Low,
Severity::Low => Severity::Info,
Severity::Info => Severity::Info,
}
}
fn upgrade_severity(
severity: &compliance_core::models::Severity,
) -> compliance_core::models::Severity {
use compliance_core::models::Severity;
match severity {
Severity::Info => Severity::Low,
Severity::Low => Severity::Medium,
Severity::Medium => Severity::High,
Severity::High => Severity::Critical,
Severity::Critical => Severity::Critical,
}
}
#[derive(serde::Deserialize)]
struct TriageResult {
#[serde(default)]
#[allow(dead_code)]
true_positive: bool,
#[serde(default = "default_action")]
action: String,
#[serde(default)]
confidence: f64,
#[serde(default)]
rationale: String,
remediation: Option<String>,
}
fn default_action() -> String {
"confirm".to_string()
}

View File

@@ -7,6 +7,7 @@ mod llm;
mod pipeline;
mod rag;
mod scheduler;
mod ssh;
#[allow(dead_code)]
mod trackers;
mod webhooks;
@@ -20,6 +21,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing::info!("Loading configuration...");
let config = config::load_config()?;
// Ensure SSH key pair exists for cloning private repos
match ssh::ensure_ssh_key(&config.ssh_key_path) {
Ok(pubkey) => tracing::info!("SSH public key: {}", pubkey.trim()),
Err(e) => tracing::warn!("SSH key generation skipped: {e}"),
}
tracing::info!("Connecting to MongoDB...");
let db = database::Database::connect(&config.mongodb_uri, &config.mongodb_database).await?;
db.ensure_indexes().await?;

View File

@@ -0,0 +1,186 @@
use std::path::Path;
use std::sync::Arc;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::ScanOutput;
use crate::llm::review_prompts::REVIEW_PASSES;
use crate::llm::LlmClient;
use crate::pipeline::dedup;
use crate::pipeline::git::{DiffFile, GitOps};
pub struct CodeReviewScanner {
llm: Arc<LlmClient>,
}
impl CodeReviewScanner {
pub fn new(llm: Arc<LlmClient>) -> Self {
Self { llm }
}
/// Run multi-pass LLM code review on the diff between old and new commits.
pub async fn review_diff(
&self,
repo_path: &Path,
repo_id: &str,
old_sha: &str,
new_sha: &str,
) -> ScanOutput {
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
Ok(files) => files,
Err(e) => {
tracing::warn!("Failed to extract diff for code review: {e}");
return ScanOutput::default();
}
};
if diff_files.is_empty() {
return ScanOutput::default();
}
let mut all_findings = Vec::new();
// Chunk diff files into groups to avoid exceeding context limits
let chunks = chunk_diff_files(&diff_files, 8000);
for (pass_name, system_prompt) in REVIEW_PASSES {
for chunk in &chunks {
let user_prompt = format!(
"Review the following code changes:\n\n{}",
chunk
.iter()
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
.collect::<Vec<_>>()
.join("\n\n")
);
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
Ok(response) => {
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
all_findings.extend(parsed);
}
Err(e) => {
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
}
}
}
}
ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
}
}
}
/// Group diff files into chunks that fit within a token budget (rough char estimate)
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
let mut current_chunk: Vec<&DiffFile> = Vec::new();
let mut current_size = 0;
for file in files {
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
chunks.push(std::mem::take(&mut current_chunk));
current_size = 0;
}
current_chunk.push(file);
current_size += file.hunks.len();
}
if !current_chunk.is_empty() {
chunks.push(current_chunk);
}
chunks
}
fn parse_review_response(
response: &str,
pass_name: &str,
repo_id: &str,
chunk: &[&DiffFile],
) -> Vec<Finding> {
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
} else {
cleaned
};
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
Ok(v) => v,
Err(_) => {
if cleaned != "[]" {
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
}
return Vec::new();
}
};
issues
.into_iter()
.filter(|issue| {
// Verify the file exists in the diff chunk
chunk.iter().any(|f| f.path == issue.file)
})
.map(|issue| {
let severity = match issue.severity.as_str() {
"critical" => Severity::Critical,
"high" => Severity::High,
"medium" => Severity::Medium,
"low" => Severity::Low,
_ => Severity::Info,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"code-review",
pass_name,
&issue.file,
&issue.line.to_string(),
&issue.title,
]);
let description = if let Some(suggestion) = &issue.suggestion {
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
} else {
issue.description.clone()
};
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
format!("code-review/{pass_name}"),
ScanType::CodeReview,
issue.title,
description,
severity,
);
finding.rule_id = Some(format!("review/{pass_name}"));
finding.file_path = Some(issue.file);
finding.line_number = Some(issue.line);
finding.cwe = issue.cwe;
finding.suggested_fix = issue.suggestion;
finding
})
.collect()
}
#[derive(serde::Deserialize)]
struct ReviewIssue {
title: String,
description: String,
severity: String,
file: String,
#[serde(default)]
line: u32,
#[serde(default)]
cwe: Option<String>,
#[serde(default)]
suggestion: Option<String>,
}

View File

@@ -64,6 +64,8 @@ impl CveScanner {
}
async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
const OSV_BATCH_SIZE: usize = 500;
let queries: Vec<_> = entries
.iter()
.filter_map(|e| {
@@ -79,32 +81,34 @@ impl CveScanner {
return Ok(Vec::new());
}
let body = serde_json::json!({ "queries": queries });
let mut all_vulns: Vec<Vec<OsvVuln>> = Vec::with_capacity(queries.len());
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
for chunk in queries.chunks(OSV_BATCH_SIZE) {
let body = serde_json::json!({ "queries": chunk });
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
return Ok(Vec::new());
}
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
let result: OsvBatchResponse = resp
.json()
.await
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
// Push empty results for this chunk so indices stay aligned
all_vulns.extend(std::iter::repeat_with(Vec::new).take(chunk.len()));
continue;
}
let vulns = result
.results
.into_iter()
.map(|r| {
let result: OsvBatchResponse = resp
.json()
.await
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
let chunk_vulns = result.results.into_iter().map(|r| {
r.vulns
.unwrap_or_default()
.into_iter()
@@ -116,10 +120,12 @@ impl CveScanner {
}),
})
.collect()
})
.collect();
});
Ok(vulns)
all_vulns.extend(chunk_vulns);
}
Ok(all_vulns)
}
async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {

View File

@@ -1,17 +1,80 @@
use std::path::{Path, PathBuf};
use git2::{FetchOptions, Repository};
use git2::{Cred, FetchOptions, RemoteCallbacks, Repository};
use crate::error::AgentError;
/// Credentials for accessing a private repository
#[derive(Debug, Clone, Default)]
pub struct RepoCredentials {
/// Path to the SSH private key (for SSH URLs)
pub ssh_key_path: Option<String>,
/// Auth token / password (for HTTPS URLs)
pub auth_token: Option<String>,
/// Username for HTTPS auth (defaults to "x-access-token")
pub auth_username: Option<String>,
}
impl RepoCredentials {
pub(crate) fn make_callbacks(&self) -> RemoteCallbacks<'_> {
let mut callbacks = RemoteCallbacks::new();
let ssh_key = self.ssh_key_path.clone();
let token = self.auth_token.clone();
let username = self.auth_username.clone();
callbacks.credentials(move |_url, username_from_url, allowed_types| {
// SSH key authentication
if allowed_types.contains(git2::CredentialType::SSH_KEY) {
if let Some(ref key_path) = ssh_key {
let key = Path::new(key_path);
if key.exists() {
let user = username_from_url.unwrap_or("git");
return Cred::ssh_key(user, None, key, None);
}
}
}
// HTTPS userpass authentication
if allowed_types.contains(git2::CredentialType::USER_PASS_PLAINTEXT) {
if let Some(ref tok) = token {
let user = username.as_deref().unwrap_or("x-access-token");
return Cred::userpass_plaintext(user, tok);
}
}
Cred::default()
});
callbacks
}
fn fetch_options(&self) -> FetchOptions<'_> {
let mut fetch_opts = FetchOptions::new();
if self.has_credentials() {
fetch_opts.remote_callbacks(self.make_callbacks());
}
fetch_opts
}
fn has_credentials(&self) -> bool {
self.ssh_key_path
.as_ref()
.map(|p| Path::new(p).exists())
.unwrap_or(false)
|| self.auth_token.is_some()
}
}
pub struct GitOps {
base_path: PathBuf,
credentials: RepoCredentials,
}
impl GitOps {
pub fn new(base_path: &str) -> Self {
pub fn new(base_path: &str, credentials: RepoCredentials) -> Self {
Self {
base_path: PathBuf::from(base_path),
credentials,
}
}
@@ -22,17 +85,25 @@ impl GitOps {
self.fetch(&repo_path)?;
} else {
std::fs::create_dir_all(&repo_path)?;
Repository::clone(git_url, &repo_path)?;
self.clone_repo(git_url, &repo_path)?;
tracing::info!("Cloned {git_url} to {}", repo_path.display());
}
Ok(repo_path)
}
fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> {
let mut builder = git2::build::RepoBuilder::new();
let fetch_opts = self.credentials.fetch_options();
builder.fetch_options(fetch_opts);
builder.clone(git_url, repo_path)?;
Ok(())
}
fn fetch(&self, repo_path: &Path) -> Result<(), AgentError> {
let repo = Repository::open(repo_path)?;
let mut remote = repo.find_remote("origin")?;
let mut fetch_opts = FetchOptions::new();
let mut fetch_opts = self.credentials.fetch_options();
remote.fetch(&[] as &[&str], Some(&mut fetch_opts), None)?;
// Fast-forward to origin/HEAD
@@ -48,6 +119,15 @@ impl GitOps {
Ok(())
}
/// Test that we can access a remote repository (used during add validation)
pub fn test_access(git_url: &str, credentials: &RepoCredentials) -> Result<(), AgentError> {
let mut remote = git2::Remote::create_detached(git_url)?;
let callbacks = credentials.make_callbacks();
remote.connect_auth(git2::Direction::Fetch, Some(callbacks), None)?;
remote.disconnect()?;
Ok(())
}
pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
let repo = Repository::open(repo_path)?;
let head = repo.head()?;
@@ -63,6 +143,62 @@ impl GitOps {
}
}
/// Extract structured diff content between two commits
pub fn get_diff_content(
repo_path: &Path,
old_sha: &str,
new_sha: &str,
) -> Result<Vec<DiffFile>, AgentError> {
let repo = Repository::open(repo_path)?;
let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
let old_tree = old_commit.tree()?;
let new_tree = new_commit.tree()?;
let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
let mut diff_files: Vec<DiffFile> = Vec::new();
diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
let file_path = delta
.new_file()
.path()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// Find or create the DiffFile entry
let idx = if let Some(pos) = diff_files.iter().position(|f| f.path == file_path) {
pos
} else {
diff_files.push(DiffFile {
path: file_path,
hunks: String::new(),
});
diff_files.len() - 1
};
let diff_file = &mut diff_files[idx];
let prefix = match line.origin() {
'+' => "+",
'-' => "-",
' ' => " ",
_ => "",
};
let content = std::str::from_utf8(line.content()).unwrap_or("");
diff_file.hunks.push_str(prefix);
diff_file.hunks.push_str(content);
true
})?;
// Filter out binary files and very large diffs
diff_files.retain(|f| !f.hunks.is_empty() && f.hunks.len() < 50_000);
Ok(diff_files)
}
#[allow(dead_code)]
pub fn get_changed_files(
repo_path: &Path,
@@ -94,3 +230,10 @@ impl GitOps {
Ok(files)
}
}
/// A file changed between two commits with its diff content
#[derive(Debug, Clone)]
pub struct DiffFile {
pub path: String,
pub hunks: String,
}

View File

@@ -0,0 +1,130 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use crate::pipeline::dedup;
pub struct GitleaksScanner;
impl Scanner for GitleaksScanner {
fn name(&self) -> &str {
"gitleaks"
}
fn scan_type(&self) -> ScanType {
ScanType::SecretDetection
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let output = tokio::process::Command::new("gitleaks")
.args([
"detect",
"--source",
".",
"--report-format",
"json",
"--report-path",
"/dev/stdout",
"--no-banner",
"--exit-code",
"0",
])
.current_dir(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "gitleaks".to_string(),
source: Box::new(e),
})?;
if output.stdout.is_empty() {
return Ok(ScanOutput::default());
}
let results: Vec<GitleaksResult> =
serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.filter(|r| !is_allowlisted(&r.file))
.map(|r| {
let severity = match r.rule_id.as_str() {
s if s.contains("private-key") => Severity::Critical,
s if s.contains("token") || s.contains("password") || s.contains("secret") => {
Severity::High
}
s if s.contains("api-key") => Severity::High,
_ => Severity::Medium,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
&r.rule_id,
&r.file,
&r.start_line.to_string(),
]);
let title = format!("Secret detected: {}", r.description);
let description = format!(
"Potential secret ({}) found in {}:{}. Match: {}",
r.rule_id,
r.file,
r.start_line,
r.r#match.chars().take(80).collect::<String>(),
);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"gitleaks".to_string(),
ScanType::SecretDetection,
title,
description,
severity,
);
finding.rule_id = Some(r.rule_id);
finding.file_path = Some(r.file);
finding.line_number = Some(r.start_line);
finding.code_snippet = Some(r.r#match);
finding
})
.collect();
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
/// Skip files that commonly contain example/placeholder secrets
fn is_allowlisted(file_path: &str) -> bool {
let lower = file_path.to_lowercase();
lower.ends_with(".env.example")
|| lower.ends_with(".env.sample")
|| lower.ends_with(".env.template")
|| lower.contains("/test/")
|| lower.contains("/tests/")
|| lower.contains("/fixtures/")
|| lower.contains("/testdata/")
|| lower.contains("mock")
|| lower.ends_with("_test.go")
|| lower.ends_with(".test.ts")
|| lower.ends_with(".test.js")
|| lower.ends_with(".spec.ts")
|| lower.ends_with(".spec.js")
}
#[derive(serde::Deserialize)]
#[serde(rename_all = "PascalCase")]
struct GitleaksResult {
description: String,
#[serde(rename = "RuleID")]
rule_id: String,
file: String,
start_line: u32,
#[serde(rename = "Match")]
r#match: String,
}

View File

@@ -0,0 +1,364 @@
use std::path::Path;
use std::time::Duration;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
/// Timeout for each individual lint command
const LINT_TIMEOUT: Duration = Duration::from_secs(120);
pub struct LintScanner;
impl Scanner for LintScanner {
fn name(&self) -> &str {
"lint"
}
fn scan_type(&self) -> ScanType {
ScanType::Lint
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut all_findings = Vec::new();
// Detect which languages are present and run appropriate linters
if has_rust_project(repo_path) {
match run_clippy(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Clippy failed: {e}"),
}
}
if has_js_project(repo_path) {
match run_eslint(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("ESLint failed: {e}"),
}
}
if has_python_project(repo_path) {
match run_ruff(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Ruff failed: {e}"),
}
}
Ok(ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
})
}
}
fn has_rust_project(repo_path: &Path) -> bool {
repo_path.join("Cargo.toml").exists()
}
fn has_js_project(repo_path: &Path) -> bool {
// Only run if eslint is actually installed in the project
repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
}
fn has_python_project(repo_path: &Path) -> bool {
repo_path.join("pyproject.toml").exists()
|| repo_path.join("setup.py").exists()
|| repo_path.join("requirements.txt").exists()
}
/// Run a command with a timeout, returning its output or an error
async fn run_with_timeout(
child: tokio::process::Child,
scanner_name: &str,
) -> Result<std::process::Output, CoreError> {
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
match result {
Ok(Ok(output)) => Ok(output),
Ok(Err(e)) => Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(e),
}),
Err(_) => {
// Process is dropped here which sends SIGKILL on Unix
Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
)),
})
}
}
}
// ── Clippy ──────────────────────────────────────────────
async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("cargo")
.args([
"clippy",
"--message-format=json",
"--quiet",
"--",
"-W",
"clippy::all",
])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "clippy".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "clippy").await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let mut findings = Vec::new();
for line in stdout.lines() {
let msg: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
continue;
}
let message = match msg.get("message") {
Some(m) => m,
None => continue,
};
let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
if level != "warning" && level != "error" {
continue;
}
let text = message
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let code = message
.get("code")
.and_then(|v| v.get("code"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
if text.starts_with("aborting due to") || code.is_empty() {
continue;
}
let (file_path, line_number) = extract_primary_span(message);
let severity = if level == "error" {
Severity::High
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"clippy",
&code,
&file_path,
&line_number.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"clippy".to_string(),
ScanType::Lint,
format!("[clippy] {text}"),
text,
severity,
);
finding.rule_id = Some(code);
if !file_path.is_empty() {
finding.file_path = Some(file_path);
}
if line_number > 0 {
finding.line_number = Some(line_number);
}
findings.push(finding);
}
Ok(findings)
}
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
let spans = match message.get("spans").and_then(|v| v.as_array()) {
Some(s) => s,
None => return (String::new(), 0),
};
for span in spans {
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
let file = span
.get("file_name")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
return (file, line);
}
}
(String::new(), 0)
}
// ── ESLint ──────────────────────────────────────────────
async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
// Use the project-local eslint binary directly, not npx (which can hang downloading)
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
let child = Command::new(eslint_bin)
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "eslint".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "eslint").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
let mut findings = Vec::new();
for file_result in results {
for msg in file_result.messages {
let severity = match msg.severity {
2 => Severity::Medium,
_ => Severity::Low,
};
let rule_id = msg.rule_id.unwrap_or_default();
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"eslint",
&rule_id,
&file_result.file_path,
&msg.line.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"eslint".to_string(),
ScanType::Lint,
format!("[eslint] {}", msg.message),
msg.message,
severity,
);
finding.rule_id = Some(rule_id);
finding.file_path = Some(file_result.file_path.clone());
finding.line_number = Some(msg.line);
findings.push(finding);
}
}
Ok(findings)
}
#[derive(serde::Deserialize)]
struct EslintFileResult {
#[serde(rename = "filePath")]
file_path: String,
messages: Vec<EslintMessage>,
}
#[derive(serde::Deserialize)]
struct EslintMessage {
#[serde(rename = "ruleId")]
rule_id: Option<String>,
severity: u8,
message: String,
line: u32,
}
// ── Ruff ────────────────────────────────────────────────
async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("ruff")
.args(["check", ".", "--output-format", "json", "--exit-zero"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "ruff".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "ruff").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.map(|r| {
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
Severity::Medium
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"ruff",
&r.code,
&r.filename,
&r.location.row.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"ruff".to_string(),
ScanType::Lint,
format!("[ruff] {}: {}", r.code, r.message),
r.message,
severity,
);
finding.rule_id = Some(r.code);
finding.file_path = Some(r.filename);
finding.line_number = Some(r.location.row);
finding
})
.collect();
Ok(findings)
}
#[derive(serde::Deserialize)]
struct RuffResult {
code: String,
message: String,
filename: String,
location: RuffLocation,
}
#[derive(serde::Deserialize)]
struct RuffLocation {
row: u32,
}

View File

@@ -1,6 +1,9 @@
pub mod code_review;
pub mod cve;
pub mod dedup;
pub mod git;
pub mod gitleaks;
pub mod lint;
pub mod orchestrator;
pub mod patterns;
pub mod sbom;

View File

@@ -9,8 +9,11 @@ use compliance_core::AgentConfig;
use crate::database::Database;
use crate::error::AgentError;
use crate::llm::LlmClient;
use crate::pipeline::code_review::CodeReviewScanner;
use crate::pipeline::cve::CveScanner;
use crate::pipeline::git::GitOps;
use crate::pipeline::git::{GitOps, RepoCredentials};
use crate::pipeline::gitleaks::GitleaksScanner;
use crate::pipeline::lint::LintScanner;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
use crate::pipeline::sbom::SbomScanner;
use crate::pipeline::semgrep::SemgrepScanner;
@@ -114,7 +117,12 @@ impl PipelineOrchestrator {
// Stage 0: Change detection
tracing::info!("[{repo_id}] Stage 0: Change detection");
let git_ops = GitOps::new(&self.config.git_clone_base_path);
let creds = RepoCredentials {
ssh_key_path: Some(self.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
@@ -182,6 +190,35 @@ impl PipelineOrchestrator {
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
}
// Stage 4a: Secret Detection (Gitleaks)
tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
self.update_phase(scan_run_id, "secret_detection").await;
let gitleaks = GitleaksScanner;
match gitleaks.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
}
// Stage 4b: Lint Scanning
tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
self.update_phase(scan_run_id, "lint_scanning").await;
let lint = LintScanner;
match lint.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
}
// Stage 4c: LLM Code Review (only on incremental scans)
if let Some(old_sha) = &repo.last_scanned_commit {
tracing::info!("[{repo_id}] Stage 4c: LLM Code Review");
self.update_phase(scan_run_id, "code_review").await;
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, &repo_id, old_sha, &current_sha)
.await;
all_findings.extend(review_output.findings);
}
// Stage 4.5: Graph Building
tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
self.update_phase(scan_run_id, "graph_building").await;

View File

@@ -0,0 +1,53 @@
use std::path::Path;
use crate::error::AgentError;
/// Ensure the SSH key pair exists at the given path, generating it if missing.
/// Returns the public key contents.
pub fn ensure_ssh_key(key_path: &str) -> Result<String, AgentError> {
let private_path = Path::new(key_path);
let public_path = private_path.with_extension("pub");
if private_path.exists() && public_path.exists() {
return std::fs::read_to_string(&public_path)
.map_err(|e| AgentError::Config(format!("Failed to read SSH public key: {e}")));
}
// Create parent directory
if let Some(parent) = private_path.parent() {
std::fs::create_dir_all(parent)?;
}
// Generate ed25519 key pair using ssh-keygen
let output = std::process::Command::new("ssh-keygen")
.args([
"-t",
"ed25519",
"-f",
key_path,
"-N",
"", // no passphrase
"-C",
"compliance-scanner-agent",
])
.output()
.map_err(|e| AgentError::Config(format!("Failed to run ssh-keygen: {e}")))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(AgentError::Config(format!("ssh-keygen failed: {stderr}")));
}
// Set correct permissions
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(private_path, std::fs::Permissions::from_mode(0o600))?;
}
let public_key = std::fs::read_to_string(&public_path)
.map_err(|e| AgentError::Config(format!("Failed to read generated SSH public key: {e}")))?;
tracing::info!("Generated new SSH key pair at {key_path}");
Ok(public_key)
}