feat: findings refinement, new scanners, and deployment tooling #6

Merged
sharang merged 6 commits from feat/findings-refinement into main 2026-03-09 12:53:13 +00:00
31 changed files with 1602 additions and 95 deletions
Showing only changes of commit 23ba52276b - Show all commits

View File

@@ -5,11 +5,21 @@ COPY . .
RUN cargo build --release -p compliance-agent
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y ca-certificates libssl3 git curl && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y ca-certificates libssl3 git curl python3 python3-pip && rm -rf /var/lib/apt/lists/*
# Install syft for SBOM generation
RUN curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin
# Install gitleaks for secret detection
RUN curl -sSfL https://github.com/gitleaks/gitleaks/releases/download/v8.21.2/gitleaks_8.21.2_linux_x64.tar.gz \
| tar -xz -C /usr/local/bin gitleaks
# Install semgrep for static analysis
RUN pip3 install --break-system-packages semgrep
# Install ruff for Python linting
RUN pip3 install --break-system-packages ruff
COPY --from=builder /app/target/release/compliance-agent /usr/local/bin/compliance-agent
EXPOSE 3001 3002

View File

@@ -41,6 +41,12 @@ pub struct FindingsFilter {
pub scan_type: Option<String>,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub q: Option<String>,
#[serde(default)]
pub sort_by: Option<String>,
#[serde(default)]
pub sort_order: Option<String>,
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
@@ -91,6 +97,17 @@ pub struct UpdateStatusRequest {
pub status: String,
}
#[derive(Deserialize)]
pub struct BulkUpdateStatusRequest {
pub ids: Vec<String>,
pub status: String,
}
#[derive(Deserialize)]
pub struct UpdateFeedbackRequest {
pub feedback: String,
}
#[derive(Deserialize)]
pub struct SbomFilter {
#[serde(default)]
@@ -367,6 +384,29 @@ pub async fn list_findings(
if let Some(status) = &filter.status {
query.insert("status", status);
}
// Text search across title, description, file_path, rule_id
if let Some(q) = &filter.q {
if !q.is_empty() {
let regex = doc! { "$regex": q, "$options": "i" };
query.insert(
"$or",
mongodb::bson::bson!([
{ "title": regex.clone() },
{ "description": regex.clone() },
{ "file_path": regex.clone() },
{ "rule_id": regex },
]),
);
}
}
// Dynamic sort
let sort_field = filter.sort_by.as_deref().unwrap_or("created_at");
let sort_dir: i32 = match filter.sort_order.as_deref() {
Some("asc") => 1,
_ => -1,
};
let sort_doc = doc! { sort_field: sort_dir };
let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
let total = db
@@ -378,7 +418,7 @@ pub async fn list_findings(
let findings = match db
.findings()
.find(query)
.sort(doc! { "created_at": -1 })
.sort(sort_doc)
.skip(skip)
.limit(filter.limit)
.await
@@ -434,6 +474,55 @@ pub async fn update_finding_status(
Ok(Json(serde_json::json!({ "status": "updated" })))
}
pub async fn bulk_update_finding_status(
Extension(agent): AgentExt,
Json(req): Json<BulkUpdateStatusRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oids: Vec<mongodb::bson::oid::ObjectId> = req
.ids
.iter()
.filter_map(|id| mongodb::bson::oid::ObjectId::parse_str(id).ok())
.collect();
if oids.is_empty() {
return Err(StatusCode::BAD_REQUEST);
}
let result = agent
.db
.findings()
.update_many(
doc! { "_id": { "$in": oids } },
doc! { "$set": { "status": &req.status, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(
serde_json::json!({ "status": "updated", "modified_count": result.modified_count }),
))
}
pub async fn update_finding_feedback(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<UpdateFeedbackRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
agent
.db
.findings()
.update_one(
doc! { "_id": oid },
doc! { "$set": { "developer_feedback": &req.feedback, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(serde_json::json!({ "status": "updated" })))
}
pub async fn list_sbom(
Extension(agent): AgentExt,
Query(filter): Query<SbomFilter>,

View File

@@ -23,6 +23,14 @@ pub fn build_router() -> Router {
"/api/v1/findings/{id}/status",
patch(handlers::update_finding_status),
)
.route(
"/api/v1/findings/bulk-status",
patch(handlers::bulk_update_finding_status),
)
.route(
"/api/v1/findings/{id}/feedback",
patch(handlers::update_finding_feedback),
)
.route("/api/v1/sbom", get(handlers::list_sbom))
.route("/api/v1/sbom/export", get(handlers::export_sbom))
.route("/api/v1/sbom/licenses", get(handlers::license_summary))

View File

@@ -5,6 +5,7 @@ pub mod descriptions;
pub mod fixes;
#[allow(dead_code)]
pub mod pr_review;
pub mod review_prompts;
pub mod triage;
pub use client::LlmClient;

View File

@@ -0,0 +1,77 @@
// System prompts for multi-pass LLM code review.
// Each pass focuses on a different aspect to avoid overloading a single prompt.
pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing code changes. Focus ONLY on logic and correctness issues.
Look for:
- Off-by-one errors, wrong comparisons, missing edge cases
- Incorrect control flow (unreachable code, missing returns, wrong loop conditions)
- Race conditions or concurrency bugs
- Resource leaks (unclosed handles, missing cleanup)
- Wrong variable used (copy-paste errors)
- Incorrect error handling (swallowed errors, wrong error type)
Ignore: style, naming, formatting, documentation, minor improvements.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing code changes. Focus ONLY on security vulnerabilities.
Look for:
- Injection vulnerabilities (SQL, command, XSS, template injection)
- Authentication/authorization bypasses
- Sensitive data exposure (logging secrets, hardcoded credentials)
- Insecure cryptography (weak algorithms, predictable randomness)
- Path traversal, SSRF, open redirects
- Unsafe deserialization
- Missing input validation at trust boundaries
Ignore: code style, performance, general quality.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking adherence to project conventions. Focus ONLY on patterns that indicate likely bugs or maintenance problems.
Look for:
- Inconsistent error handling patterns within the same module
- Public API that doesn't follow the project's established patterns
- Missing or incorrect type annotations that could cause runtime issues
- Anti-patterns specific to the language (e.g. unwrap in Rust library code, any in TypeScript)
Do NOT report: minor style preferences, documentation gaps, formatting.
Only report issues with HIGH confidence that they deviate from the visible codebase conventions.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for excessive complexity that could lead to bugs.
Look for:
- Functions over 50 lines that should be decomposed
- Deeply nested control flow (4+ levels)
- Complex boolean expressions that are hard to reason about
- Functions with 5+ parameters
- Code duplication within the changed files
Only report complexity issues that are HIGH risk for future bugs. Ignore acceptable complexity in configuration, CLI argument parsing, or generated code.
For each issue found, respond with a JSON array:
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
/// All review types with their prompts
pub const REVIEW_PASSES: &[(&str, &str)] = &[
("logic", LOGIC_REVIEW_PROMPT),
("security", SECURITY_REVIEW_PROMPT),
("convention", CONVENTION_REVIEW_PROMPT),
("complexity", COMPLEXITY_REVIEW_PROMPT),
];

View File

@@ -5,13 +5,22 @@ use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::GraphContext;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
1. Is this a true positive? (yes/no)
2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
3. Brief remediation suggestion (1-2 sentences)
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
Actions:
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
- "downgrade": The finding is real but over-reported. Lower severity recommended.
- "upgrade": The finding is under-reported. Higher severity recommended.
- "dismiss": The finding is a false positive. Should be removed.
Consider:
- Is the code in a test, example, or generated file? (lower confidence for test code)
- Does the surrounding code context confirm or refute the finding?
- Is the finding actionable by a developer?
- Would a real attacker be able to exploit this?
Respond in JSON format:
{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
pub async fn triage_findings(
llm: &Arc<LlmClient>,
@@ -21,8 +30,10 @@ pub async fn triage_findings(
let mut passed = 0;
for finding in findings.iter_mut() {
let file_classification = classify_file_path(finding.file_path.as_deref());
let mut user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
@@ -31,8 +42,14 @@ pub async fn triage_findings(
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
);
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!("\n\n--- Surrounding Code (50 lines) ---\n{context}"));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
@@ -69,32 +86,54 @@ pub async fn triage_findings(
.await
{
Ok(response) => {
// Strip markdown code fences if present (e.g. ```json ... ```)
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
let inner = cleaned
cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
inner
.trim()
} else {
cleaned
};
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
finding.confidence = Some(result.confidence);
// Apply file-path confidence adjustment
let adjusted_confidence = adjust_confidence(result.confidence, &file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
if result.confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
// Downgrade severity by one level
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
}
}
} else {
// If LLM response doesn't parse, keep the finding
// Parse failure — keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!(
@@ -117,12 +156,118 @@ pub async fn triage_findings(
passed
}
/// Read ~50 lines of surrounding code from the file at the finding's location
fn read_surrounding_context(finding: &Finding) -> Option<String> {
let file_path = finding.file_path.as_deref()?;
let line = finding.line_number? as usize;
// Try to read the file — this works because the repo is cloned locally
let content = std::fs::read_to_string(file_path).ok()?;
let lines: Vec<&str> = content.lines().collect();
let start = line.saturating_sub(25);
let end = (line + 25).min(lines.len());
Some(
lines[start..end]
.iter()
.enumerate()
.map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
.collect::<Vec<_>>()
.join("\n"),
)
}
/// Classify a file path to inform triage confidence adjustment
fn classify_file_path(path: Option<&str>) -> String {
let path = match path {
Some(p) => p.to_lowercase(),
None => return "unknown".to_string(),
};
if path.contains("/test/")
|| path.contains("/tests/")
|| path.contains("_test.")
|| path.contains(".test.")
|| path.contains(".spec.")
|| path.contains("/fixtures/")
|| path.contains("/testdata/")
{
return "test".to_string();
}
if path.contains("/example")
|| path.contains("/examples/")
|| path.contains("/demo/")
|| path.contains("/sample")
{
return "example".to_string();
}
if path.contains("/generated/")
|| path.contains("/gen/")
|| path.contains(".generated.")
|| path.contains(".pb.go")
|| path.contains("_generated.rs")
{
return "generated".to_string();
}
if path.contains("/vendor/")
|| path.contains("/node_modules/")
|| path.contains("/third_party/")
{
return "vendored".to_string();
}
"production".to_string()
}
/// Adjust confidence based on file classification
fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 {
let multiplier = match classification {
"test" => 0.5,
"example" => 0.6,
"generated" => 0.3,
"vendored" => 0.4,
_ => 1.0,
};
raw_confidence * multiplier
}
fn downgrade_severity(severity: &compliance_core::models::Severity) -> compliance_core::models::Severity {
use compliance_core::models::Severity;
match severity {
Severity::Critical => Severity::High,
Severity::High => Severity::Medium,
Severity::Medium => Severity::Low,
Severity::Low => Severity::Info,
Severity::Info => Severity::Info,
}
}
fn upgrade_severity(severity: &compliance_core::models::Severity) -> compliance_core::models::Severity {
use compliance_core::models::Severity;
match severity {
Severity::Info => Severity::Low,
Severity::Low => Severity::Medium,
Severity::Medium => Severity::High,
Severity::High => Severity::Critical,
Severity::Critical => Severity::Critical,
}
}
#[derive(serde::Deserialize)]
struct TriageResult {
#[serde(default)]
#[allow(dead_code)]
true_positive: bool,
#[serde(default = "default_action")]
action: String,
#[serde(default)]
confidence: f64,
#[serde(default)]
rationale: String,
remediation: Option<String>,
}
fn default_action() -> String {
"confirm".to_string()
}

View File

@@ -0,0 +1,186 @@
use std::path::Path;
use std::sync::Arc;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::ScanOutput;
use crate::llm::review_prompts::REVIEW_PASSES;
use crate::llm::LlmClient;
use crate::pipeline::dedup;
use crate::pipeline::git::{DiffFile, GitOps};
pub struct CodeReviewScanner {
llm: Arc<LlmClient>,
}
impl CodeReviewScanner {
pub fn new(llm: Arc<LlmClient>) -> Self {
Self { llm }
}
/// Run multi-pass LLM code review on the diff between old and new commits.
pub async fn review_diff(
&self,
repo_path: &Path,
repo_id: &str,
old_sha: &str,
new_sha: &str,
) -> ScanOutput {
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
Ok(files) => files,
Err(e) => {
tracing::warn!("Failed to extract diff for code review: {e}");
return ScanOutput::default();
}
};
if diff_files.is_empty() {
return ScanOutput::default();
}
let mut all_findings = Vec::new();
// Chunk diff files into groups to avoid exceeding context limits
let chunks = chunk_diff_files(&diff_files, 8000);
for (pass_name, system_prompt) in REVIEW_PASSES {
for chunk in &chunks {
let user_prompt = format!(
"Review the following code changes:\n\n{}",
chunk
.iter()
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
.collect::<Vec<_>>()
.join("\n\n")
);
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
Ok(response) => {
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
all_findings.extend(parsed);
}
Err(e) => {
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
}
}
}
}
ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
}
}
}
/// Group diff files into chunks that fit within a token budget (rough char estimate)
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
let mut current_chunk: Vec<&DiffFile> = Vec::new();
let mut current_size = 0;
for file in files {
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
chunks.push(std::mem::take(&mut current_chunk));
current_size = 0;
}
current_chunk.push(file);
current_size += file.hunks.len();
}
if !current_chunk.is_empty() {
chunks.push(current_chunk);
}
chunks
}
fn parse_review_response(
response: &str,
pass_name: &str,
repo_id: &str,
chunk: &[&DiffFile],
) -> Vec<Finding> {
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
} else {
cleaned
};
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
Ok(v) => v,
Err(_) => {
if cleaned != "[]" {
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
}
return Vec::new();
}
};
issues
.into_iter()
.filter(|issue| {
// Verify the file exists in the diff chunk
chunk.iter().any(|f| f.path == issue.file)
})
.map(|issue| {
let severity = match issue.severity.as_str() {
"critical" => Severity::Critical,
"high" => Severity::High,
"medium" => Severity::Medium,
"low" => Severity::Low,
_ => Severity::Info,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"code-review",
pass_name,
&issue.file,
&issue.line.to_string(),
&issue.title,
]);
let description = if let Some(suggestion) = &issue.suggestion {
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
} else {
issue.description.clone()
};
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
format!("code-review/{pass_name}"),
ScanType::CodeReview,
issue.title,
description,
severity,
);
finding.rule_id = Some(format!("review/{pass_name}"));
finding.file_path = Some(issue.file);
finding.line_number = Some(issue.line);
finding.cwe = issue.cwe;
finding.suggested_fix = issue.suggestion;
finding
})
.collect()
}
#[derive(serde::Deserialize)]
struct ReviewIssue {
title: String,
description: String,
severity: String,
file: String,
#[serde(default)]
line: u32,
#[serde(default)]
cwe: Option<String>,
#[serde(default)]
suggestion: Option<String>,
}

View File

@@ -64,6 +64,8 @@ impl CveScanner {
}
async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
const OSV_BATCH_SIZE: usize = 500;
let queries: Vec<_> = entries
.iter()
.filter_map(|e| {
@@ -79,47 +81,54 @@ impl CveScanner {
return Ok(Vec::new());
}
let body = serde_json::json!({ "queries": queries });
let mut all_vulns: Vec<Vec<OsvVuln>> = Vec::with_capacity(queries.len());
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
for chunk in queries.chunks(OSV_BATCH_SIZE) {
let body = serde_json::json!({ "queries": chunk });
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
return Ok(Vec::new());
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
// Push empty results for this chunk so indices stay aligned
all_vulns.extend(std::iter::repeat_with(Vec::new).take(chunk.len()));
continue;
}
let result: OsvBatchResponse = resp
.json()
.await
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
let chunk_vulns = result
.results
.into_iter()
.map(|r| {
r.vulns
.unwrap_or_default()
.into_iter()
.map(|v| OsvVuln {
id: v.id,
summary: v.summary,
severity: v.database_specific.and_then(|d| {
d.get("severity").and_then(|s| s.as_str()).map(String::from)
}),
})
.collect()
});
all_vulns.extend(chunk_vulns);
}
let result: OsvBatchResponse = resp
.json()
.await
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
let vulns = result
.results
.into_iter()
.map(|r| {
r.vulns
.unwrap_or_default()
.into_iter()
.map(|v| OsvVuln {
id: v.id,
summary: v.summary,
severity: v.database_specific.and_then(|d| {
d.get("severity").and_then(|s| s.as_str()).map(String::from)
}),
})
.collect()
})
.collect();
Ok(vulns)
Ok(all_vulns)
}
async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {

View File

@@ -63,6 +63,62 @@ impl GitOps {
}
}
/// Extract structured diff content between two commits
pub fn get_diff_content(
repo_path: &Path,
old_sha: &str,
new_sha: &str,
) -> Result<Vec<DiffFile>, AgentError> {
let repo = Repository::open(repo_path)?;
let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
let old_tree = old_commit.tree()?;
let new_tree = new_commit.tree()?;
let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
let mut diff_files: Vec<DiffFile> = Vec::new();
diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
let file_path = delta
.new_file()
.path()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// Find or create the DiffFile entry
let idx = if let Some(pos) = diff_files.iter().position(|f| f.path == file_path) {
pos
} else {
diff_files.push(DiffFile {
path: file_path,
hunks: String::new(),
});
diff_files.len() - 1
};
let diff_file = &mut diff_files[idx];
let prefix = match line.origin() {
'+' => "+",
'-' => "-",
' ' => " ",
_ => "",
};
let content = std::str::from_utf8(line.content()).unwrap_or("");
diff_file.hunks.push_str(prefix);
diff_file.hunks.push_str(content);
true
})?;
// Filter out binary files and very large diffs
diff_files.retain(|f| !f.hunks.is_empty() && f.hunks.len() < 50_000);
Ok(diff_files)
}
#[allow(dead_code)]
pub fn get_changed_files(
repo_path: &Path,
@@ -94,3 +150,10 @@ impl GitOps {
Ok(files)
}
}
/// A file changed between two commits with its diff content
#[derive(Debug, Clone)]
pub struct DiffFile {
pub path: String,
pub hunks: String,
}

View File

@@ -0,0 +1,117 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use crate::pipeline::dedup;
pub struct GitleaksScanner;
impl Scanner for GitleaksScanner {
fn name(&self) -> &str {
"gitleaks"
}
fn scan_type(&self) -> ScanType {
ScanType::SecretDetection
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let output = tokio::process::Command::new("gitleaks")
.args(["detect", "--source", ".", "--report-format", "json", "--report-path", "/dev/stdout", "--no-banner", "--exit-code", "0"])
.current_dir(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "gitleaks".to_string(),
source: Box::new(e),
})?;
if output.stdout.is_empty() {
return Ok(ScanOutput::default());
}
let results: Vec<GitleaksResult> = serde_json::from_slice(&output.stdout)
.unwrap_or_default();
let findings = results
.into_iter()
.filter(|r| !is_allowlisted(&r.file))
.map(|r| {
let severity = match r.rule_id.as_str() {
s if s.contains("private-key") => Severity::Critical,
s if s.contains("token") || s.contains("password") || s.contains("secret") => Severity::High,
s if s.contains("api-key") => Severity::High,
_ => Severity::Medium,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
&r.rule_id,
&r.file,
&r.start_line.to_string(),
]);
let title = format!("Secret detected: {}", r.description);
let description = format!(
"Potential secret ({}) found in {}:{}. Match: {}",
r.rule_id,
r.file,
r.start_line,
r.r#match.chars().take(80).collect::<String>(),
);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"gitleaks".to_string(),
ScanType::SecretDetection,
title,
description,
severity,
);
finding.rule_id = Some(r.rule_id);
finding.file_path = Some(r.file);
finding.line_number = Some(r.start_line);
finding.code_snippet = Some(r.r#match);
finding
})
.collect();
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
/// Skip files that commonly contain example/placeholder secrets
fn is_allowlisted(file_path: &str) -> bool {
let lower = file_path.to_lowercase();
lower.ends_with(".env.example")
|| lower.ends_with(".env.sample")
|| lower.ends_with(".env.template")
|| lower.contains("/test/")
|| lower.contains("/tests/")
|| lower.contains("/fixtures/")
|| lower.contains("/testdata/")
|| lower.contains("mock")
|| lower.ends_with("_test.go")
|| lower.ends_with(".test.ts")
|| lower.ends_with(".test.js")
|| lower.ends_with(".spec.ts")
|| lower.ends_with(".spec.js")
}
#[derive(serde::Deserialize)]
#[serde(rename_all = "PascalCase")]
struct GitleaksResult {
description: String,
#[serde(rename = "RuleID")]
rule_id: String,
file: String,
start_line: u32,
#[serde(rename = "Match")]
r#match: String,
}

View File

@@ -0,0 +1,361 @@
use std::path::Path;
use std::time::Duration;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
/// Timeout for each individual lint command
const LINT_TIMEOUT: Duration = Duration::from_secs(120);
pub struct LintScanner;
impl Scanner for LintScanner {
fn name(&self) -> &str {
"lint"
}
fn scan_type(&self) -> ScanType {
ScanType::Lint
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut all_findings = Vec::new();
// Detect which languages are present and run appropriate linters
if has_rust_project(repo_path) {
match run_clippy(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Clippy failed: {e}"),
}
}
if has_js_project(repo_path) {
match run_eslint(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("ESLint failed: {e}"),
}
}
if has_python_project(repo_path) {
match run_ruff(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Ruff failed: {e}"),
}
}
Ok(ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
})
}
}
fn has_rust_project(repo_path: &Path) -> bool {
repo_path.join("Cargo.toml").exists()
}
fn has_js_project(repo_path: &Path) -> bool {
// Only run if eslint is actually installed in the project
repo_path.join("package.json").exists()
&& repo_path.join("node_modules/.bin/eslint").exists()
}
fn has_python_project(repo_path: &Path) -> bool {
repo_path.join("pyproject.toml").exists()
|| repo_path.join("setup.py").exists()
|| repo_path.join("requirements.txt").exists()
}
/// Run a command with a timeout, returning its output or an error
async fn run_with_timeout(
child: tokio::process::Child,
scanner_name: &str,
) -> Result<std::process::Output, CoreError> {
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
match result {
Ok(Ok(output)) => Ok(output),
Ok(Err(e)) => Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(e),
}),
Err(_) => {
// Process is dropped here which sends SIGKILL on Unix
Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
)),
})
}
}
}
// ── Clippy ──────────────────────────────────────────────
async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("cargo")
.args(["clippy", "--message-format=json", "--quiet", "--", "-W", "clippy::all"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "clippy".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "clippy").await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let mut findings = Vec::new();
for line in stdout.lines() {
let msg: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
continue;
}
let message = match msg.get("message") {
Some(m) => m,
None => continue,
};
let level = message
.get("level")
.and_then(|v| v.as_str())
.unwrap_or("");
if level != "warning" && level != "error" {
continue;
}
let text = message
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let code = message
.get("code")
.and_then(|v| v.get("code"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
if text.starts_with("aborting due to") || code.is_empty() {
continue;
}
let (file_path, line_number) = extract_primary_span(message);
let severity = if level == "error" {
Severity::High
} else {
Severity::Low
};
let fingerprint =
dedup::compute_fingerprint(&[repo_id, "clippy", &code, &file_path, &line_number.to_string()]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"clippy".to_string(),
ScanType::Lint,
format!("[clippy] {text}"),
text,
severity,
);
finding.rule_id = Some(code);
if !file_path.is_empty() {
finding.file_path = Some(file_path);
}
if line_number > 0 {
finding.line_number = Some(line_number);
}
findings.push(finding);
}
Ok(findings)
}
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
let spans = match message.get("spans").and_then(|v| v.as_array()) {
Some(s) => s,
None => return (String::new(), 0),
};
for span in spans {
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
let file = span
.get("file_name")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let line = span
.get("line_start")
.and_then(|v| v.as_u64())
.unwrap_or(0) as u32;
return (file, line);
}
}
(String::new(), 0)
}
// ── ESLint ──────────────────────────────────────────────
async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
// Use the project-local eslint binary directly, not npx (which can hang downloading)
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
let child = Command::new(eslint_bin)
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "eslint".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "eslint").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<EslintFileResult> =
serde_json::from_slice(&output.stdout).unwrap_or_default();
let mut findings = Vec::new();
for file_result in results {
for msg in file_result.messages {
let severity = match msg.severity {
2 => Severity::Medium,
_ => Severity::Low,
};
let rule_id = msg.rule_id.unwrap_or_default();
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"eslint",
&rule_id,
&file_result.file_path,
&msg.line.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"eslint".to_string(),
ScanType::Lint,
format!("[eslint] {}", msg.message),
msg.message,
severity,
);
finding.rule_id = Some(rule_id);
finding.file_path = Some(file_result.file_path.clone());
finding.line_number = Some(msg.line);
findings.push(finding);
}
}
Ok(findings)
}
#[derive(serde::Deserialize)]
struct EslintFileResult {
#[serde(rename = "filePath")]
file_path: String,
messages: Vec<EslintMessage>,
}
#[derive(serde::Deserialize)]
struct EslintMessage {
#[serde(rename = "ruleId")]
rule_id: Option<String>,
severity: u8,
message: String,
line: u32,
}
// ── Ruff ────────────────────────────────────────────────
async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("ruff")
.args(["check", ".", "--output-format", "json", "--exit-zero"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "ruff".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "ruff").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<RuffResult> =
serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.map(|r| {
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
Severity::Medium
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"ruff",
&r.code,
&r.filename,
&r.location.row.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"ruff".to_string(),
ScanType::Lint,
format!("[ruff] {}: {}", r.code, r.message),
r.message,
severity,
);
finding.rule_id = Some(r.code);
finding.file_path = Some(r.filename);
finding.line_number = Some(r.location.row);
finding
})
.collect();
Ok(findings)
}
#[derive(serde::Deserialize)]
struct RuffResult {
code: String,
message: String,
filename: String,
location: RuffLocation,
}
#[derive(serde::Deserialize)]
struct RuffLocation {
row: u32,
}

View File

@@ -1,6 +1,9 @@
pub mod code_review;
pub mod cve;
pub mod dedup;
pub mod git;
pub mod gitleaks;
pub mod lint;
pub mod orchestrator;
pub mod patterns;
pub mod sbom;

View File

@@ -9,8 +9,11 @@ use compliance_core::AgentConfig;
use crate::database::Database;
use crate::error::AgentError;
use crate::llm::LlmClient;
use crate::pipeline::code_review::CodeReviewScanner;
use crate::pipeline::cve::CveScanner;
use crate::pipeline::git::GitOps;
use crate::pipeline::gitleaks::GitleaksScanner;
use crate::pipeline::lint::LintScanner;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
use crate::pipeline::sbom::SbomScanner;
use crate::pipeline::semgrep::SemgrepScanner;
@@ -182,6 +185,35 @@ impl PipelineOrchestrator {
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
}
// Stage 4a: Secret Detection (Gitleaks)
tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
self.update_phase(scan_run_id, "secret_detection").await;
let gitleaks = GitleaksScanner;
match gitleaks.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
}
// Stage 4b: Lint Scanning
tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
self.update_phase(scan_run_id, "lint_scanning").await;
let lint = LintScanner;
match lint.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
}
// Stage 4c: LLM Code Review (only on incremental scans)
if let Some(old_sha) = &repo.last_scanned_commit {
tracing::info!("[{repo_id}] Stage 4c: LLM Code Review");
self.update_phase(scan_run_id, "code_review").await;
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, &repo_id, old_sha, &current_sha)
.await;
all_findings.extend(review_output.findings);
}
// Stage 4.5: Graph Building
tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
self.update_phase(scan_run_id, "graph_building").await;

View File

@@ -23,6 +23,7 @@ pub struct CveAlert {
pub summary: Option<String>,
pub llm_impact_summary: Option<String>,
pub references: Vec<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
}

View File

@@ -58,7 +58,9 @@ pub struct DastTarget {
pub rate_limit: u32,
/// Whether destructive tests (DELETE, PUT) are allowed
pub allow_destructive: bool,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub updated_at: DateTime<Utc>,
}
@@ -135,7 +137,9 @@ pub struct DastScanRun {
pub error_message: Option<String>,
/// Linked SAST scan run ID (if triggered as part of pipeline)
pub sast_scan_run_id: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub started_at: DateTime<Utc>,
#[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
pub completed_at: Option<DateTime<Utc>>,
}
@@ -240,6 +244,7 @@ pub struct DastFinding {
pub remediation: Option<String>,
/// Linked SAST finding ID (if correlated)
pub linked_sast_finding_id: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
}

View File

@@ -71,7 +71,14 @@ pub struct Finding {
pub status: FindingStatus,
pub tracker_issue_url: Option<String>,
pub scan_run_id: Option<String>,
/// LLM triage action and reasoning
pub triage_action: Option<String>,
pub triage_rationale: Option<String>,
/// Developer feedback on finding quality
pub developer_feedback: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub updated_at: DateTime<Utc>,
}
@@ -108,6 +115,9 @@ impl Finding {
status: FindingStatus::Open,
tracker_issue_url: None,
scan_run_id: None,
triage_action: None,
triage_rationale: None,
developer_feedback: None,
created_at: now,
updated_at: now,
}

View File

@@ -122,7 +122,9 @@ pub struct GraphBuildRun {
pub community_count: u32,
pub languages_parsed: Vec<String>,
pub error_message: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub started_at: DateTime<Utc>,
#[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
pub completed_at: Option<DateTime<Utc>>,
}
@@ -164,6 +166,7 @@ pub struct ImpactAnalysis {
pub direct_callers: Vec<String>,
/// Direct callees of the affected function
pub direct_callees: Vec<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
}

View File

@@ -49,7 +49,9 @@ pub struct TrackerIssue {
pub external_url: String,
pub title: String,
pub status: IssueStatus,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub updated_at: DateTime<Utc>,
}

View File

@@ -62,6 +62,8 @@ pub struct McpServerConfig {
pub mongodb_uri: Option<String>,
/// Database name
pub mongodb_database: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub updated_at: DateTime<Utc>,
}

View File

@@ -1,4 +1,5 @@
pub mod auth;
pub(crate) mod serde_helpers;
pub mod chat;
pub mod cve;
pub mod dast;

View File

@@ -31,15 +31,9 @@ pub struct TrackedRepository {
pub last_scanned_commit: Option<String>,
#[serde(default, deserialize_with = "deserialize_findings_count")]
pub findings_count: u32,
#[serde(
default = "chrono::Utc::now",
deserialize_with = "deserialize_datetime"
)]
#[serde(default = "chrono::Utc::now", with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
#[serde(
default = "chrono::Utc::now",
deserialize_with = "deserialize_datetime"
)]
#[serde(default = "chrono::Utc::now", with = "super::serde_helpers::bson_datetime")]
pub updated_at: DateTime<Utc>,
}
@@ -47,23 +41,6 @@ fn default_branch() -> String {
"main".to_string()
}
/// Handles findings_count stored as either a plain integer or a BSON Int64
/// which the driver may present as a map `{"low": N, "high": N, "unsigned": bool}`.
/// Handles datetime stored as either a BSON DateTime or an RFC 3339 string.
fn deserialize_datetime<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
D: Deserializer<'de>,
{
let bson = bson::Bson::deserialize(deserializer)?;
match bson {
bson::Bson::DateTime(dt) => Ok(dt.into()),
bson::Bson::String(s) => s.parse::<DateTime<Utc>>().map_err(serde::de::Error::custom),
other => Err(serde::de::Error::custom(format!(
"expected DateTime or string, got: {other:?}"
))),
}
}
fn deserialize_findings_count<'de, D>(deserializer: D) -> Result<u32, D::Error>
where
D: Deserializer<'de>,

View File

@@ -20,7 +20,9 @@ pub struct SbomEntry {
pub license: Option<String>,
pub purl: Option<String>,
pub known_vulnerabilities: Vec<VulnRef>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub created_at: DateTime<Utc>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub updated_at: DateTime<Utc>,
}

View File

@@ -13,6 +13,9 @@ pub enum ScanType {
OAuth,
Graph,
Dast,
SecretDetection,
Lint,
CodeReview,
}
impl std::fmt::Display for ScanType {
@@ -25,6 +28,9 @@ impl std::fmt::Display for ScanType {
Self::OAuth => write!(f, "oauth"),
Self::Graph => write!(f, "graph"),
Self::Dast => write!(f, "dast"),
Self::SecretDetection => write!(f, "secret_detection"),
Self::Lint => write!(f, "lint"),
Self::CodeReview => write!(f, "code_review"),
}
}
}
@@ -45,6 +51,9 @@ pub enum ScanPhase {
SbomGeneration,
CveScanning,
PatternScanning,
SecretDetection,
LintScanning,
CodeReview,
GraphBuilding,
LlmTriage,
IssueCreation,
@@ -64,7 +73,9 @@ pub struct ScanRun {
pub phases_completed: Vec<ScanPhase>,
pub new_findings_count: u32,
pub error_message: Option<String>,
#[serde(with = "super::serde_helpers::bson_datetime")]
pub started_at: DateTime<Utc>,
#[serde(default, with = "super::serde_helpers::opt_bson_datetime")]
pub completed_at: Option<DateTime<Utc>>,
}

View File

@@ -0,0 +1,70 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Deserializer, Serializer};
/// Serialize/deserialize `DateTime<Utc>` as BSON DateTime.
/// Handles both BSON DateTime objects and RFC 3339 strings on deserialization.
pub mod bson_datetime {
use super::*;
use serde::Serialize as _;
pub fn serialize<S>(dt: &DateTime<Utc>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let bson_dt: bson::DateTime = (*dt).into();
bson_dt.serialize(serializer)
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
D: Deserializer<'de>,
{
let bson_val = bson::Bson::deserialize(deserializer)?;
match bson_val {
bson::Bson::DateTime(dt) => Ok(dt.into()),
bson::Bson::String(s) => {
s.parse::<DateTime<Utc>>().map_err(serde::de::Error::custom)
}
other => Err(serde::de::Error::custom(format!(
"expected DateTime or string, got: {other:?}"
))),
}
}
}
/// Serialize/deserialize `Option<DateTime<Utc>>` as BSON DateTime.
pub mod opt_bson_datetime {
use super::*;
use serde::Serialize as _;
pub fn serialize<S>(dt: &Option<DateTime<Utc>>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match dt {
Some(dt) => {
let bson_dt: bson::DateTime = (*dt).into();
bson_dt.serialize(serializer)
}
None => serializer.serialize_none(),
}
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
where
D: Deserializer<'de>,
{
let bson_val = Option::<bson::Bson>::deserialize(deserializer)?;
match bson_val {
Some(bson::Bson::DateTime(dt)) => Ok(Some(dt.into())),
Some(bson::Bson::String(s)) => s
.parse::<DateTime<Utc>>()
.map(Some)
.map_err(serde::de::Error::custom),
Some(bson::Bson::Null) | None => Ok(None),
Some(other) => Err(serde::de::Error::custom(format!(
"expected DateTime, string, or null, got: {other:?}"
))),
}
}
}

View File

@@ -609,6 +609,24 @@ tbody tr:last-child td {
background: var(--danger-bg);
}
.btn-scanning {
opacity: 0.7;
cursor: not-allowed;
display: inline-flex;
align-items: center;
gap: 6px;
}
.spinner {
display: inline-block;
width: 14px;
height: 14px;
border: 2px solid var(--border-bright);
border-top-color: var(--accent);
border-radius: 50%;
animation: spin 0.8s linear infinite;
}
.btn-danger {
background: var(--danger);
color: #fff;

View File

@@ -11,12 +11,16 @@ pub struct FindingsListResponse {
}
#[server]
#[allow(clippy::too_many_arguments)]
pub async fn fetch_findings(
page: u64,
severity: String,
scan_type: String,
status: String,
repo_id: String,
q: String,
sort_by: String,
sort_order: String,
) -> Result<FindingsListResponse, ServerFnError> {
let state: super::server_state::ServerState =
dioxus_fullstack::FullstackContext::extract().await?;
@@ -37,6 +41,18 @@ pub async fn fetch_findings(
if !repo_id.is_empty() {
url.push_str(&format!("&repo_id={repo_id}"));
}
if !q.is_empty() {
url.push_str(&format!(
"&q={}",
url::form_urlencoded::byte_serialize(q.as_bytes()).collect::<String>()
));
}
if !sort_by.is_empty() {
url.push_str(&format!("&sort_by={sort_by}"));
}
if !sort_order.is_empty() {
url.push_str(&format!("&sort_order={sort_order}"));
}
let resp = reqwest::get(&url)
.await
@@ -82,3 +98,43 @@ pub async fn update_finding_status(id: String, status: String) -> Result<(), Ser
Ok(())
}
#[server]
pub async fn bulk_update_finding_status(
ids: Vec<String>,
status: String,
) -> Result<(), ServerFnError> {
let state: super::server_state::ServerState =
dioxus_fullstack::FullstackContext::extract().await?;
let url = format!("{}/api/v1/findings/bulk-status", state.agent_api_url);
let client = reqwest::Client::new();
client
.patch(&url)
.json(&serde_json::json!({ "ids": ids, "status": status }))
.send()
.await
.map_err(|e| ServerFnError::new(e.to_string()))?;
Ok(())
}
#[server]
pub async fn update_finding_feedback(
id: String,
feedback: String,
) -> Result<(), ServerFnError> {
let state: super::server_state::ServerState =
dioxus_fullstack::FullstackContext::extract().await?;
let url = format!("{}/api/v1/findings/{id}/feedback", state.agent_api_url);
let client = reqwest::Client::new();
client
.patch(&url)
.json(&serde_json::json!({ "feedback": feedback }))
.send()
.await
.map_err(|e| ServerFnError::new(e.to_string()))?;
Ok(())
}

View File

@@ -99,3 +99,35 @@ pub async fn trigger_repo_scan(repo_id: String) -> Result<(), ServerFnError> {
Ok(())
}
/// Check if a repository has any running scans
#[server]
pub async fn check_repo_scanning(repo_id: String) -> Result<bool, ServerFnError> {
let state: super::server_state::ServerState =
dioxus_fullstack::FullstackContext::extract().await?;
let url = format!(
"{}/api/v1/scan-runs?page=1&limit=1",
state.agent_api_url
);
let resp = reqwest::get(&url)
.await
.map_err(|e| ServerFnError::new(e.to_string()))?;
let body: serde_json::Value = resp
.json()
.await
.map_err(|e| ServerFnError::new(e.to_string()))?;
// Check if the most recent scan for this repo is still running
if let Some(scans) = body.get("data").and_then(|d| d.as_array()) {
for scan in scans {
let scan_repo = scan.get("repo_id").and_then(|v| v.as_str()).unwrap_or("");
let status = scan.get("status").and_then(|v| v.as_str()).unwrap_or("");
if scan_repo == repo_id && status == "running" {
return Ok(true);
}
}
}
Ok(false)
}

View File

@@ -8,7 +8,7 @@ use crate::components::severity_badge::SeverityBadge;
pub fn FindingDetailPage(id: String) -> Element {
let finding_id = id.clone();
let finding = use_resource(move || {
let mut finding = use_resource(move || {
let fid = finding_id.clone();
async move {
crate::infrastructure::findings::fetch_finding_detail(fid)
@@ -22,6 +22,8 @@ pub fn FindingDetailPage(id: String) -> Element {
match snapshot {
Some(Some(f)) => {
let finding_id_for_status = id.clone();
let finding_id_for_feedback = id.clone();
let existing_feedback = f.developer_feedback.clone().unwrap_or_default();
rsx! {
PageHeader {
title: f.title.clone(),
@@ -39,6 +41,9 @@ pub fn FindingDetailPage(id: String) -> Element {
if let Some(score) = f.cvss_score {
span { class: "badge badge-medium", "CVSS: {score}" }
}
if let Some(confidence) = f.confidence {
span { class: "badge badge-info", "Confidence: {confidence:.1}" }
}
}
div { class: "card",
@@ -46,6 +51,19 @@ pub fn FindingDetailPage(id: String) -> Element {
p { "{f.description}" }
}
if let Some(rationale) = &f.triage_rationale {
div { class: "card",
div { class: "card-header", "Triage Rationale" }
div {
style: "display: flex; align-items: center; gap: 8px; margin-bottom: 8px;",
if let Some(action) = &f.triage_action {
span { class: "badge badge-info", "{action}" }
}
}
p { style: "color: var(--text-secondary); font-size: 14px;", "{rationale}" }
}
}
if let Some(code) = &f.code_snippet {
div { class: "card",
div { class: "card-header", "Code Evidence" }
@@ -99,6 +117,7 @@ pub fn FindingDetailPage(id: String) -> Element {
spawn(async move {
let _ = crate::infrastructure::findings::update_finding_status(id, s).await;
});
finding.restart();
},
"{status}"
}
@@ -107,6 +126,25 @@ pub fn FindingDetailPage(id: String) -> Element {
}
}
}
div { class: "card",
div { class: "card-header", "Developer Feedback" }
p {
style: "font-size: 13px; color: var(--text-secondary); margin-bottom: 8px;",
"Share your assessment of this finding (e.g. false positive, actionable, needs context)"
}
textarea {
style: "width: 100%; min-height: 80px; background: var(--bg-primary); border: 1px solid var(--border); border-radius: 8px; padding: 10px 14px; color: var(--text-primary); font-size: 14px; resize: vertical;",
value: "{existing_feedback}",
oninput: move |e| {
let feedback = e.value();
let id = finding_id_for_feedback.clone();
spawn(async move {
let _ = crate::infrastructure::findings::update_finding_feedback(id, feedback).await;
});
},
}
}
}
}
Some(None) => rsx! {

View File

@@ -12,6 +12,10 @@ pub fn FindingsPage() -> Element {
let mut type_filter = use_signal(String::new);
let mut status_filter = use_signal(String::new);
let mut repo_filter = use_signal(String::new);
let mut search_query = use_signal(String::new);
let mut sort_by = use_signal(|| "created_at".to_string());
let mut sort_order = use_signal(|| "desc".to_string());
let mut selected_ids = use_signal(Vec::<String>::new);
let repos = use_resource(|| async {
crate::infrastructure::repositories::fetch_repositories(1)
@@ -25,13 +29,44 @@ pub fn FindingsPage() -> Element {
let typ = type_filter();
let stat = status_filter();
let repo = repo_filter();
let q = search_query();
let sb = sort_by();
let so = sort_order();
async move {
crate::infrastructure::findings::fetch_findings(p, sev, typ, stat, repo)
crate::infrastructure::findings::fetch_findings(p, sev, typ, stat, repo, q, sb, so)
.await
.ok()
}
});
let toggle_sort = move |field: &'static str| {
move |_: MouseEvent| {
if sort_by() == field {
sort_order.set(if sort_order() == "asc" {
"desc".to_string()
} else {
"asc".to_string()
});
} else {
sort_by.set(field.to_string());
sort_order.set("desc".to_string());
}
page.set(1);
}
};
let sort_indicator = move |field: &str| -> String {
if sort_by() == field {
if sort_order() == "asc" {
" \u{25B2}".to_string()
} else {
" \u{25BC}".to_string()
}
} else {
String::new()
}
};
rsx! {
PageHeader {
title: "Findings",
@@ -39,6 +74,12 @@ pub fn FindingsPage() -> Element {
}
div { class: "filter-bar",
input {
r#type: "text",
placeholder: "Search findings...",
style: "min-width: 200px;",
oninput: move |e| { search_query.set(e.value()); page.set(1); },
}
select {
onchange: move |e| { repo_filter.set(e.value()); page.set(1); },
option { value: "", "All Repositories" }
@@ -76,6 +117,9 @@ pub fn FindingsPage() -> Element {
option { value: "cve", "CVE" }
option { value: "gdpr", "GDPR" }
option { value: "oauth", "OAuth" }
option { value: "secret_detection", "Secrets" }
option { value: "lint", "Lint" }
option { value: "code_review", "Code Review" }
}
select {
onchange: move |e| { status_filter.set(e.value()); page.set(1); },
@@ -88,29 +132,123 @@ pub fn FindingsPage() -> Element {
}
}
// Bulk action bar
if !selected_ids().is_empty() {
div {
class: "card",
style: "display: flex; align-items: center; gap: 12px; padding: 12px 16px; margin-bottom: 16px; background: rgba(56, 189, 248, 0.08); border-color: rgba(56, 189, 248, 0.2);",
span {
style: "font-size: 14px; color: var(--text-secondary);",
"{selected_ids().len()} selected"
}
for status in ["triaged", "resolved", "false_positive", "ignored"] {
{
let status_str = status.to_string();
let label = match status {
"false_positive" => "False Positive",
other => {
// Capitalize first letter
let mut s = other.to_string();
if let Some(c) = s.get_mut(0..1) { c.make_ascii_uppercase(); }
// Leak to get a &str that lives long enough - this is fine for static-ish UI strings
&*Box::leak(s.into_boxed_str())
}
};
rsx! {
button {
class: "btn btn-sm btn-ghost",
onclick: move |_| {
let ids = selected_ids();
let s = status_str.clone();
spawn(async move {
let _ = crate::infrastructure::findings::bulk_update_finding_status(ids, s).await;
});
selected_ids.set(Vec::new());
},
"Mark {label}"
}
}
}
}
button {
class: "btn btn-sm btn-ghost",
onclick: move |_| { selected_ids.set(Vec::new()); },
"Clear"
}
}
}
match &*findings.read() {
Some(Some(resp)) => {
let total_pages = resp.total.unwrap_or(0).div_ceil(20).max(1);
let all_ids: Vec<String> = resp.data.iter().filter_map(|f| f.id.as_ref().map(|id| id.to_hex())).collect();
rsx! {
div { class: "card",
div { class: "table-wrapper",
table {
thead {
tr {
th { "Severity" }
th { "Title" }
th { "Type" }
th {
style: "width: 40px;",
input {
r#type: "checkbox",
checked: !all_ids.is_empty() && selected_ids().len() == all_ids.len(),
onchange: move |_| {
if selected_ids().len() == all_ids.len() {
selected_ids.set(Vec::new());
} else {
selected_ids.set(all_ids.clone());
}
},
}
}
th {
style: "cursor: pointer; user-select: none;",
onclick: toggle_sort("severity"),
"Severity{sort_indicator(\"severity\")}"
}
th {
style: "cursor: pointer; user-select: none;",
onclick: toggle_sort("title"),
"Title{sort_indicator(\"title\")}"
}
th {
style: "cursor: pointer; user-select: none;",
onclick: toggle_sort("scan_type"),
"Type{sort_indicator(\"scan_type\")}"
}
th { "Scanner" }
th { "File" }
th { "Status" }
th {
style: "cursor: pointer; user-select: none;",
onclick: toggle_sort("status"),
"Status{sort_indicator(\"status\")}"
}
}
}
tbody {
for finding in &resp.data {
{
let id = finding.id.as_ref().map(|id| id.to_hex()).unwrap_or_default();
let id_for_check = id.clone();
let is_selected = selected_ids().contains(&id);
rsx! {
tr {
td {
input {
r#type: "checkbox",
checked: is_selected,
onchange: move |_| {
let mut ids = selected_ids();
if ids.contains(&id_for_check) {
ids.retain(|i| i != &id_for_check);
} else {
ids.push(id_for_check.clone());
}
selected_ids.set(ids);
},
}
}
td { SeverityBadge { severity: finding.severity.to_string() } }
td {
Link {

View File

@@ -5,6 +5,17 @@ use crate::components::page_header::PageHeader;
use crate::components::pagination::Pagination;
use crate::components::toast::{ToastType, Toasts};
async fn async_sleep_5s() {
#[cfg(feature = "web")]
{
gloo_timers::future::TimeoutFuture::new(5_000).await;
}
#[cfg(not(feature = "web"))]
{
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
}
#[component]
pub fn RepositoriesPage() -> Element {
let mut page = use_signal(|| 1u64);
@@ -14,6 +25,7 @@ pub fn RepositoriesPage() -> Element {
let mut branch = use_signal(|| "main".to_string());
let mut toasts = use_context::<Toasts>();
let mut confirm_delete = use_signal(|| Option::<(String, String)>::None); // (id, name)
let mut scanning_ids = use_signal(Vec::<String>::new);
let mut repos = use_resource(move || {
let p = page();
@@ -158,6 +170,7 @@ pub fn RepositoriesPage() -> Element {
let repo_id_scan = repo_id.clone();
let repo_id_del = repo_id.clone();
let repo_name_del = repo.name.clone();
let is_scanning = scanning_ids().contains(&repo_id);
rsx! {
tr {
td { "{repo.name}" }
@@ -192,17 +205,44 @@ pub fn RepositoriesPage() -> Element {
"Graph"
}
button {
class: "btn btn-ghost",
class: if is_scanning { "btn btn-ghost btn-scanning" } else { "btn btn-ghost" },
disabled: is_scanning,
onclick: move |_| {
let id = repo_id_scan.clone();
// Add to scanning set
let mut ids = scanning_ids();
ids.push(id.clone());
scanning_ids.set(ids);
spawn(async move {
match crate::infrastructure::repositories::trigger_repo_scan(id).await {
Ok(_) => toasts.push(ToastType::Success, "Scan triggered"),
match crate::infrastructure::repositories::trigger_repo_scan(id.clone()).await {
Ok(_) => {
toasts.push(ToastType::Success, "Scan triggered");
// Poll until scan completes
loop {
async_sleep_5s().await;
match crate::infrastructure::repositories::check_repo_scanning(id.clone()).await {
Ok(false) => break,
Ok(true) => continue,
Err(_) => break,
}
}
toasts.push(ToastType::Success, "Scan complete");
repos.restart();
}
Err(e) => toasts.push(ToastType::Error, e.to_string()),
}
// Remove from scanning set
let mut ids = scanning_ids();
ids.retain(|i| i != &id);
scanning_ids.set(ids);
});
},
"Scan"
if is_scanning {
span { class: "spinner" }
"Scanning..."
} else {
"Scan"
}
}
button {
class: "btn btn-ghost btn-ghost-danger",

View File

@@ -20,7 +20,7 @@ pub struct ListFindingsParams {
pub severity: Option<String>,
/// Filter by status: open, triaged, false_positive, resolved, ignored
pub status: Option<String>,
/// Filter by scan type: sast, sbom, cve, gdpr, oauth
/// Filter by scan type: sast, sbom, cve, gdpr, oauth, secret_detection, lint, code_review
pub scan_type: Option<String>,
/// Maximum number of results (default 50, max 200)
pub limit: Option<i64>,