Initial commit: Compliance Scanner Agent

Autonomous security and compliance scanning agent for git repositories.
Features: SAST (Semgrep), SBOM (Syft), CVE monitoring (OSV.dev/NVD),
GDPR/OAuth pattern detection, LLM triage, issue creation (GitHub/GitLab/Jira),
PR reviews, and Dioxus fullstack dashboard.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-02 13:30:17 +01:00
commit 0867e401bc
97 changed files with 11750 additions and 0 deletions

View File

@@ -0,0 +1,157 @@
use secrecy::{ExposeSecret, SecretString};
use serde::{Deserialize, Serialize};
use crate::error::AgentError;
#[derive(Clone)]
pub struct LlmClient {
base_url: String,
api_key: SecretString,
model: String,
http: reqwest::Client,
}
#[derive(Serialize)]
struct ChatMessage {
role: String,
content: String,
}
#[derive(Serialize)]
struct ChatCompletionRequest {
model: String,
messages: Vec<ChatMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
max_tokens: Option<u32>,
}
#[derive(Deserialize)]
struct ChatCompletionResponse {
choices: Vec<ChatChoice>,
}
#[derive(Deserialize)]
struct ChatChoice {
message: ChatResponseMessage,
}
#[derive(Deserialize)]
struct ChatResponseMessage {
content: String,
}
impl LlmClient {
pub fn new(base_url: String, api_key: SecretString, model: String) -> Self {
Self {
base_url,
api_key,
model,
http: reqwest::Client::new(),
}
}
pub async fn chat(
&self,
system_prompt: &str,
user_prompt: &str,
temperature: Option<f64>,
) -> Result<String, AgentError> {
let url = format!("{}/v1/chat/completions", self.base_url.trim_end_matches('/'));
let request_body = ChatCompletionRequest {
model: self.model.clone(),
messages: vec![
ChatMessage {
role: "system".to_string(),
content: system_prompt.to_string(),
},
ChatMessage {
role: "user".to_string(),
content: user_prompt.to_string(),
},
],
temperature,
max_tokens: Some(4096),
};
let mut req = self
.http
.post(&url)
.header("content-type", "application/json")
.json(&request_body);
let key = self.api_key.expose_secret();
if !key.is_empty() {
req = req.header("Authorization", format!("Bearer {key}"));
}
let resp = req.send().await.map_err(|e| {
AgentError::Other(format!("LiteLLM request failed: {e}"))
})?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(AgentError::Other(format!("LiteLLM returned {status}: {body}")));
}
let body: ChatCompletionResponse = resp.json().await.map_err(|e| {
AgentError::Other(format!("Failed to parse LiteLLM response: {e}"))
})?;
body.choices
.first()
.map(|c| c.message.content.clone())
.ok_or_else(|| AgentError::Other("Empty response from LiteLLM".to_string()))
}
pub async fn chat_with_messages(
&self,
messages: Vec<(String, String)>,
temperature: Option<f64>,
) -> Result<String, AgentError> {
let url = format!("{}/v1/chat/completions", self.base_url.trim_end_matches('/'));
let request_body = ChatCompletionRequest {
model: self.model.clone(),
messages: messages
.into_iter()
.map(|(role, content)| ChatMessage { role, content })
.collect(),
temperature,
max_tokens: Some(4096),
};
let mut req = self
.http
.post(&url)
.header("content-type", "application/json")
.json(&request_body);
let key = self.api_key.expose_secret();
if !key.is_empty() {
req = req.header("Authorization", format!("Bearer {key}"));
}
let resp = req.send().await.map_err(|e| {
AgentError::Other(format!("LiteLLM request failed: {e}"))
})?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(AgentError::Other(format!("LiteLLM returned {status}: {body}")));
}
let body: ChatCompletionResponse = resp.json().await.map_err(|e| {
AgentError::Other(format!("Failed to parse LiteLLM response: {e}"))
})?;
body.choices
.first()
.map(|c| c.message.content.clone())
.ok_or_else(|| AgentError::Other("Empty response from LiteLLM".to_string()))
}
}

View File

@@ -0,0 +1,65 @@
use std::sync::Arc;
use compliance_core::models::Finding;
use crate::error::AgentError;
use crate::llm::LlmClient;
const DESCRIPTION_SYSTEM_PROMPT: &str = r#"You are a security engineer writing issue descriptions for a bug tracker. Generate a clear, actionable issue body in Markdown format that includes:
1. **Summary**: 1-2 sentence overview
2. **Evidence**: Code location, snippet, and what was detected
3. **Impact**: What could happen if not fixed
4. **Remediation**: Step-by-step fix instructions
5. **References**: Relevant CWE/CVE links if applicable
Keep it concise and professional. Use code blocks for code snippets."#;
pub async fn generate_issue_description(
llm: &Arc<LlmClient>,
finding: &Finding,
) -> Result<(String, String), AgentError> {
let user_prompt = format!(
"Generate an issue title and body for this finding:\n\
Scanner: {}\n\
Type: {}\n\
Severity: {}\n\
Rule: {}\n\
Title: {}\n\
Description: {}\n\
File: {}\n\
Line: {}\n\
Code:\n```\n{}\n```\n\
CWE: {}\n\
CVE: {}\n\
Remediation hint: {}",
finding.scanner,
finding.scan_type,
finding.severity,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
finding.cwe.as_deref().unwrap_or("N/A"),
finding.cve.as_deref().unwrap_or("N/A"),
finding.remediation.as_deref().unwrap_or("N/A"),
);
let response = llm.chat(DESCRIPTION_SYSTEM_PROMPT, &user_prompt, Some(0.3)).await?;
// Extract title from first line, rest is body
let mut lines = response.lines();
let title = lines
.next()
.unwrap_or(&finding.title)
.trim_start_matches('#')
.trim()
.to_string();
let body = lines.collect::<Vec<_>>().join("\n").trim().to_string();
let body = if body.is_empty() { response } else { body };
Ok((title, body))
}

View File

@@ -0,0 +1,27 @@
use std::sync::Arc;
use compliance_core::models::Finding;
use crate::error::AgentError;
use crate::llm::LlmClient;
const FIX_SYSTEM_PROMPT: &str = r#"You are a security engineer. Given a security finding with code context, suggest a concrete code fix. Return ONLY the fixed code snippet that can directly replace the vulnerable code. Include brief inline comments explaining the fix."#;
pub async fn suggest_fix(
llm: &Arc<LlmClient>,
finding: &Finding,
) -> Result<String, AgentError> {
let user_prompt = format!(
"Suggest a fix for this vulnerability:\n\
Language context from file: {}\n\
Rule: {}\n\
Description: {}\n\
Vulnerable code:\n```\n{}\n```",
finding.file_path.as_deref().unwrap_or("unknown"),
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.description,
finding.code_snippet.as_deref().unwrap_or("N/A"),
);
llm.chat(FIX_SYSTEM_PROMPT, &user_prompt, Some(0.2)).await
}

View File

@@ -0,0 +1,10 @@
pub mod client;
#[allow(dead_code)]
pub mod descriptions;
#[allow(dead_code)]
pub mod fixes;
#[allow(dead_code)]
pub mod pr_review;
pub mod triage;
pub use client::LlmClient;

View File

@@ -0,0 +1,77 @@
use std::sync::Arc;
use compliance_core::models::Finding;
use compliance_core::traits::issue_tracker::ReviewComment;
use crate::error::AgentError;
use crate::llm::LlmClient;
const PR_REVIEW_SYSTEM_PROMPT: &str = r#"You are a security-focused code reviewer. Given a list of security findings in a PR diff, generate concise review comments. Each comment should:
1. Briefly explain the issue
2. Suggest a specific fix
3. Reference the relevant security standard (CWE, OWASP) if applicable
Be constructive and professional. Return JSON array:
[{"path": "file.rs", "line": 42, "body": "..."}]"#;
pub async fn generate_pr_review(
llm: &Arc<LlmClient>,
findings: &[Finding],
) -> Result<(String, Vec<ReviewComment>), AgentError> {
if findings.is_empty() {
return Ok(("No security issues found in this PR.".to_string(), Vec::new()));
}
let findings_text: Vec<String> = findings
.iter()
.map(|f| {
format!(
"- [{severity}] {title} in {file}:{line}\n Code: {code}\n Rule: {rule}",
severity = f.severity,
title = f.title,
file = f.file_path.as_deref().unwrap_or("unknown"),
line = f.line_number.map(|n| n.to_string()).unwrap_or_else(|| "?".to_string()),
code = f.code_snippet.as_deref().unwrap_or("N/A"),
rule = f.rule_id.as_deref().unwrap_or("N/A"),
)
})
.collect();
let user_prompt = format!(
"Generate review comments for these {} findings:\n{}",
findings.len(),
findings_text.join("\n"),
);
let response = llm.chat(PR_REVIEW_SYSTEM_PROMPT, &user_prompt, Some(0.3)).await?;
// Parse comments from LLM response
let comments: Vec<ReviewComment> = serde_json::from_str::<Vec<PrComment>>(&response)
.unwrap_or_default()
.into_iter()
.map(|c| ReviewComment {
path: c.path,
line: c.line,
body: c.body,
})
.collect();
let summary = format!(
"## Security Review\n\nFound **{}** potential security issue(s) in this PR.\n\n{}",
findings.len(),
findings
.iter()
.map(|f| format!("- **[{}]** {} in `{}`", f.severity, f.title, f.file_path.as_deref().unwrap_or("unknown")))
.collect::<Vec<_>>()
.join("\n"),
);
Ok((summary, comments))
}
#[derive(serde::Deserialize)]
struct PrComment {
path: String,
line: u32,
body: String,
}

View File

@@ -0,0 +1,73 @@
use std::sync::Arc;
use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
1. Is this a true positive? (yes/no)
2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
3. Brief remediation suggestion (1-2 sentences)
Respond in JSON format:
{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
pub async fn triage_findings(llm: &Arc<LlmClient>, findings: &mut Vec<Finding>) -> usize {
let mut passed = 0;
for finding in findings.iter_mut() {
let user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
);
match llm.chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1)).await {
Ok(response) => {
if let Ok(result) = serde_json::from_str::<TriageResult>(&response) {
finding.confidence = Some(result.confidence);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
if result.confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
} else {
// If LLM response doesn't parse, keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!("Failed to parse triage response for {}: {response}", finding.fingerprint);
}
}
Err(e) => {
// On LLM error, keep the finding
tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
// Remove false positives
findings.retain(|f| f.status != FindingStatus::FalsePositive);
passed
}
#[derive(serde::Deserialize)]
struct TriageResult {
#[serde(default)]
true_positive: bool,
#[serde(default)]
confidence: f64,
remediation: Option<String>,
}