feat: findings refinement, new scanners, and deployment tooling (#6)
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s
This commit was merged in pull request #6.
This commit is contained in:
@@ -5,6 +5,7 @@ pub mod descriptions;
|
||||
pub mod fixes;
|
||||
#[allow(dead_code)]
|
||||
pub mod pr_review;
|
||||
pub mod review_prompts;
|
||||
pub mod triage;
|
||||
|
||||
pub use client::LlmClient;
|
||||
|
||||
77
compliance-agent/src/llm/review_prompts.rs
Normal file
77
compliance-agent/src/llm/review_prompts.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// System prompts for multi-pass LLM code review.
|
||||
// Each pass focuses on a different aspect to avoid overloading a single prompt.
|
||||
|
||||
pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing code changes. Focus ONLY on logic and correctness issues.
|
||||
|
||||
Look for:
|
||||
- Off-by-one errors, wrong comparisons, missing edge cases
|
||||
- Incorrect control flow (unreachable code, missing returns, wrong loop conditions)
|
||||
- Race conditions or concurrency bugs
|
||||
- Resource leaks (unclosed handles, missing cleanup)
|
||||
- Wrong variable used (copy-paste errors)
|
||||
- Incorrect error handling (swallowed errors, wrong error type)
|
||||
|
||||
Ignore: style, naming, formatting, documentation, minor improvements.
|
||||
|
||||
For each issue found, respond with a JSON array:
|
||||
[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
|
||||
|
||||
If no issues found, respond with: []"#;
|
||||
|
||||
pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing code changes. Focus ONLY on security vulnerabilities.
|
||||
|
||||
Look for:
|
||||
- Injection vulnerabilities (SQL, command, XSS, template injection)
|
||||
- Authentication/authorization bypasses
|
||||
- Sensitive data exposure (logging secrets, hardcoded credentials)
|
||||
- Insecure cryptography (weak algorithms, predictable randomness)
|
||||
- Path traversal, SSRF, open redirects
|
||||
- Unsafe deserialization
|
||||
- Missing input validation at trust boundaries
|
||||
|
||||
Ignore: code style, performance, general quality.
|
||||
|
||||
For each issue found, respond with a JSON array:
|
||||
[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
|
||||
|
||||
If no issues found, respond with: []"#;
|
||||
|
||||
pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking adherence to project conventions. Focus ONLY on patterns that indicate likely bugs or maintenance problems.
|
||||
|
||||
Look for:
|
||||
- Inconsistent error handling patterns within the same module
|
||||
- Public API that doesn't follow the project's established patterns
|
||||
- Missing or incorrect type annotations that could cause runtime issues
|
||||
- Anti-patterns specific to the language (e.g. unwrap in Rust library code, any in TypeScript)
|
||||
|
||||
Do NOT report: minor style preferences, documentation gaps, formatting.
|
||||
Only report issues with HIGH confidence that they deviate from the visible codebase conventions.
|
||||
|
||||
For each issue found, respond with a JSON array:
|
||||
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
|
||||
|
||||
If no issues found, respond with: []"#;
|
||||
|
||||
pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for excessive complexity that could lead to bugs.
|
||||
|
||||
Look for:
|
||||
- Functions over 50 lines that should be decomposed
|
||||
- Deeply nested control flow (4+ levels)
|
||||
- Complex boolean expressions that are hard to reason about
|
||||
- Functions with 5+ parameters
|
||||
- Code duplication within the changed files
|
||||
|
||||
Only report complexity issues that are HIGH risk for future bugs. Ignore acceptable complexity in configuration, CLI argument parsing, or generated code.
|
||||
|
||||
For each issue found, respond with a JSON array:
|
||||
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
|
||||
|
||||
If no issues found, respond with: []"#;
|
||||
|
||||
/// All review types with their prompts
|
||||
pub const REVIEW_PASSES: &[(&str, &str)] = &[
|
||||
("logic", LOGIC_REVIEW_PROMPT),
|
||||
("security", SECURITY_REVIEW_PROMPT),
|
||||
("convention", CONVENTION_REVIEW_PROMPT),
|
||||
("complexity", COMPLEXITY_REVIEW_PROMPT),
|
||||
];
|
||||
@@ -5,13 +5,22 @@ use compliance_core::models::{Finding, FindingStatus};
|
||||
use crate::llm::LlmClient;
|
||||
use crate::pipeline::orchestrator::GraphContext;
|
||||
|
||||
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
|
||||
1. Is this a true positive? (yes/no)
|
||||
2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
|
||||
3. Brief remediation suggestion (1-2 sentences)
|
||||
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
|
||||
|
||||
Actions:
|
||||
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
|
||||
- "downgrade": The finding is real but over-reported. Lower severity recommended.
|
||||
- "upgrade": The finding is under-reported. Higher severity recommended.
|
||||
- "dismiss": The finding is a false positive. Should be removed.
|
||||
|
||||
Consider:
|
||||
- Is the code in a test, example, or generated file? (lower confidence for test code)
|
||||
- Does the surrounding code context confirm or refute the finding?
|
||||
- Is the finding actionable by a developer?
|
||||
- Would a real attacker be able to exploit this?
|
||||
|
||||
Respond in JSON format:
|
||||
{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
|
||||
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
|
||||
|
||||
pub async fn triage_findings(
|
||||
llm: &Arc<LlmClient>,
|
||||
@@ -21,8 +30,10 @@ pub async fn triage_findings(
|
||||
let mut passed = 0;
|
||||
|
||||
for finding in findings.iter_mut() {
|
||||
let file_classification = classify_file_path(finding.file_path.as_deref());
|
||||
|
||||
let mut user_prompt = format!(
|
||||
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
|
||||
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
|
||||
finding.scanner,
|
||||
finding.rule_id.as_deref().unwrap_or("N/A"),
|
||||
finding.severity,
|
||||
@@ -31,8 +42,16 @@ pub async fn triage_findings(
|
||||
finding.file_path.as_deref().unwrap_or("N/A"),
|
||||
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
|
||||
finding.code_snippet.as_deref().unwrap_or("N/A"),
|
||||
file_classification,
|
||||
);
|
||||
|
||||
// Enrich with surrounding code context if possible
|
||||
if let Some(context) = read_surrounding_context(finding) {
|
||||
user_prompt.push_str(&format!(
|
||||
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
|
||||
));
|
||||
}
|
||||
|
||||
// Enrich with graph context if available
|
||||
if let Some(ctx) = graph_context {
|
||||
if let Some(impact) = ctx
|
||||
@@ -69,32 +88,55 @@ pub async fn triage_findings(
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
// Strip markdown code fences if present (e.g. ```json ... ```)
|
||||
let cleaned = response.trim();
|
||||
let cleaned = if cleaned.starts_with("```") {
|
||||
let inner = cleaned
|
||||
cleaned
|
||||
.trim_start_matches("```json")
|
||||
.trim_start_matches("```")
|
||||
.trim_end_matches("```")
|
||||
.trim();
|
||||
inner
|
||||
.trim()
|
||||
} else {
|
||||
cleaned
|
||||
};
|
||||
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
|
||||
finding.confidence = Some(result.confidence);
|
||||
// Apply file-path confidence adjustment
|
||||
let adjusted_confidence =
|
||||
adjust_confidence(result.confidence, &file_classification);
|
||||
finding.confidence = Some(adjusted_confidence);
|
||||
finding.triage_action = Some(result.action.clone());
|
||||
finding.triage_rationale = Some(result.rationale);
|
||||
|
||||
if let Some(remediation) = result.remediation {
|
||||
finding.remediation = Some(remediation);
|
||||
}
|
||||
|
||||
if result.confidence >= 3.0 {
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
} else {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
match result.action.as_str() {
|
||||
"dismiss" => {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
}
|
||||
"downgrade" => {
|
||||
// Downgrade severity by one level
|
||||
finding.severity = downgrade_severity(&finding.severity);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
"upgrade" => {
|
||||
finding.severity = upgrade_severity(&finding.severity);
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
}
|
||||
_ => {
|
||||
// "confirm" or unknown — keep as-is
|
||||
if adjusted_confidence >= 3.0 {
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
} else {
|
||||
finding.status = FindingStatus::FalsePositive;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If LLM response doesn't parse, keep the finding
|
||||
// Parse failure — keep the finding
|
||||
finding.status = FindingStatus::Triaged;
|
||||
passed += 1;
|
||||
tracing::warn!(
|
||||
@@ -117,12 +159,122 @@ pub async fn triage_findings(
|
||||
passed
|
||||
}
|
||||
|
||||
/// Read ~50 lines of surrounding code from the file at the finding's location
|
||||
fn read_surrounding_context(finding: &Finding) -> Option<String> {
|
||||
let file_path = finding.file_path.as_deref()?;
|
||||
let line = finding.line_number? as usize;
|
||||
|
||||
// Try to read the file — this works because the repo is cloned locally
|
||||
let content = std::fs::read_to_string(file_path).ok()?;
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
|
||||
let start = line.saturating_sub(25);
|
||||
let end = (line + 25).min(lines.len());
|
||||
|
||||
Some(
|
||||
lines[start..end]
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n"),
|
||||
)
|
||||
}
|
||||
|
||||
/// Classify a file path to inform triage confidence adjustment
|
||||
fn classify_file_path(path: Option<&str>) -> String {
|
||||
let path = match path {
|
||||
Some(p) => p.to_lowercase(),
|
||||
None => return "unknown".to_string(),
|
||||
};
|
||||
|
||||
if path.contains("/test/")
|
||||
|| path.contains("/tests/")
|
||||
|| path.contains("_test.")
|
||||
|| path.contains(".test.")
|
||||
|| path.contains(".spec.")
|
||||
|| path.contains("/fixtures/")
|
||||
|| path.contains("/testdata/")
|
||||
{
|
||||
return "test".to_string();
|
||||
}
|
||||
|
||||
if path.contains("/example")
|
||||
|| path.contains("/examples/")
|
||||
|| path.contains("/demo/")
|
||||
|| path.contains("/sample")
|
||||
{
|
||||
return "example".to_string();
|
||||
}
|
||||
|
||||
if path.contains("/generated/")
|
||||
|| path.contains("/gen/")
|
||||
|| path.contains(".generated.")
|
||||
|| path.contains(".pb.go")
|
||||
|| path.contains("_generated.rs")
|
||||
{
|
||||
return "generated".to_string();
|
||||
}
|
||||
|
||||
if path.contains("/vendor/")
|
||||
|| path.contains("/node_modules/")
|
||||
|| path.contains("/third_party/")
|
||||
{
|
||||
return "vendored".to_string();
|
||||
}
|
||||
|
||||
"production".to_string()
|
||||
}
|
||||
|
||||
/// Adjust confidence based on file classification
|
||||
fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 {
|
||||
let multiplier = match classification {
|
||||
"test" => 0.5,
|
||||
"example" => 0.6,
|
||||
"generated" => 0.3,
|
||||
"vendored" => 0.4,
|
||||
_ => 1.0,
|
||||
};
|
||||
raw_confidence * multiplier
|
||||
}
|
||||
|
||||
fn downgrade_severity(
|
||||
severity: &compliance_core::models::Severity,
|
||||
) -> compliance_core::models::Severity {
|
||||
use compliance_core::models::Severity;
|
||||
match severity {
|
||||
Severity::Critical => Severity::High,
|
||||
Severity::High => Severity::Medium,
|
||||
Severity::Medium => Severity::Low,
|
||||
Severity::Low => Severity::Info,
|
||||
Severity::Info => Severity::Info,
|
||||
}
|
||||
}
|
||||
|
||||
fn upgrade_severity(
|
||||
severity: &compliance_core::models::Severity,
|
||||
) -> compliance_core::models::Severity {
|
||||
use compliance_core::models::Severity;
|
||||
match severity {
|
||||
Severity::Info => Severity::Low,
|
||||
Severity::Low => Severity::Medium,
|
||||
Severity::Medium => Severity::High,
|
||||
Severity::High => Severity::Critical,
|
||||
Severity::Critical => Severity::Critical,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct TriageResult {
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)]
|
||||
true_positive: bool,
|
||||
#[serde(default = "default_action")]
|
||||
action: String,
|
||||
#[serde(default)]
|
||||
confidence: f64,
|
||||
#[serde(default)]
|
||||
rationale: String,
|
||||
remediation: Option<String>,
|
||||
}
|
||||
|
||||
fn default_action() -> String {
|
||||
"confirm".to_string()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user