Initial commit: Compliance Scanner Agent
Autonomous security and compliance scanning agent for git repositories. Features: SAST (Semgrep), SBOM (Syft), CVE monitoring (OSV.dev/NVD), GDPR/OAuth pattern detection, LLM triage, issue creation (GitHub/GitLab/Jira), PR reviews, and Dioxus fullstack dashboard. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
252
compliance-agent/src/pipeline/orchestrator.rs
Normal file
252
compliance-agent/src/pipeline/orchestrator.rs
Normal file
@@ -0,0 +1,252 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use mongodb::bson::doc;
|
||||
|
||||
use compliance_core::models::*;
|
||||
use compliance_core::traits::Scanner;
|
||||
use compliance_core::AgentConfig;
|
||||
|
||||
use crate::database::Database;
|
||||
use crate::error::AgentError;
|
||||
use crate::llm::LlmClient;
|
||||
use crate::pipeline::cve::CveScanner;
|
||||
use crate::pipeline::git::GitOps;
|
||||
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
|
||||
use crate::pipeline::sbom::SbomScanner;
|
||||
use crate::pipeline::semgrep::SemgrepScanner;
|
||||
|
||||
pub struct PipelineOrchestrator {
|
||||
config: AgentConfig,
|
||||
db: Database,
|
||||
llm: Arc<LlmClient>,
|
||||
http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl PipelineOrchestrator {
|
||||
pub fn new(
|
||||
config: AgentConfig,
|
||||
db: Database,
|
||||
llm: Arc<LlmClient>,
|
||||
http: reqwest::Client,
|
||||
) -> Self {
|
||||
Self { config, db, llm, http }
|
||||
}
|
||||
|
||||
pub async fn run(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
trigger: ScanTrigger,
|
||||
) -> Result<(), AgentError> {
|
||||
// Look up the repository
|
||||
let repo = self
|
||||
.db
|
||||
.repositories()
|
||||
.find_one(doc! { "_id": mongodb::bson::oid::ObjectId::parse_str(repo_id).map_err(|e| AgentError::Other(e.to_string()))? })
|
||||
.await?
|
||||
.ok_or_else(|| AgentError::Other(format!("Repository {repo_id} not found")))?;
|
||||
|
||||
// Create scan run
|
||||
let scan_run = ScanRun::new(repo_id.to_string(), trigger);
|
||||
let insert = self.db.scan_runs().insert_one(&scan_run).await?;
|
||||
let scan_run_id = insert.inserted_id.as_object_id()
|
||||
.map(|id| id.to_hex())
|
||||
.unwrap_or_default();
|
||||
|
||||
let result = self.run_pipeline(&repo, &scan_run_id).await;
|
||||
|
||||
// Update scan run status
|
||||
match &result {
|
||||
Ok(count) => {
|
||||
self.db.scan_runs().update_one(
|
||||
doc! { "_id": &insert.inserted_id },
|
||||
doc! {
|
||||
"$set": {
|
||||
"status": "completed",
|
||||
"current_phase": "completed",
|
||||
"new_findings_count": *count as i64,
|
||||
"completed_at": mongodb::bson::DateTime::now(),
|
||||
}
|
||||
},
|
||||
).await?;
|
||||
}
|
||||
Err(e) => {
|
||||
self.db.scan_runs().update_one(
|
||||
doc! { "_id": &insert.inserted_id },
|
||||
doc! {
|
||||
"$set": {
|
||||
"status": "failed",
|
||||
"error_message": e.to_string(),
|
||||
"completed_at": mongodb::bson::DateTime::now(),
|
||||
}
|
||||
},
|
||||
).await?;
|
||||
}
|
||||
}
|
||||
|
||||
result.map(|_| ())
|
||||
}
|
||||
|
||||
async fn run_pipeline(
|
||||
&self,
|
||||
repo: &TrackedRepository,
|
||||
scan_run_id: &str,
|
||||
) -> Result<u32, AgentError> {
|
||||
let repo_id = repo.id.as_ref()
|
||||
.map(|id| id.to_hex())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Stage 0: Change detection
|
||||
tracing::info!("[{repo_id}] Stage 0: Change detection");
|
||||
let git_ops = GitOps::new(&self.config.git_clone_base_path);
|
||||
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
|
||||
|
||||
if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
|
||||
tracing::info!("[{repo_id}] No new commits, skipping scan");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let current_sha = GitOps::get_head_sha(&repo_path)?;
|
||||
let mut all_findings: Vec<Finding> = Vec::new();
|
||||
|
||||
// Stage 1: Semgrep SAST
|
||||
tracing::info!("[{repo_id}] Stage 1: Semgrep SAST");
|
||||
self.update_phase(scan_run_id, "sast").await;
|
||||
let semgrep = SemgrepScanner;
|
||||
match semgrep.scan(&repo_path, &repo_id).await {
|
||||
Ok(output) => all_findings.extend(output.findings),
|
||||
Err(e) => tracing::warn!("[{repo_id}] Semgrep failed: {e}"),
|
||||
}
|
||||
|
||||
// Stage 2: SBOM Generation
|
||||
tracing::info!("[{repo_id}] Stage 2: SBOM Generation");
|
||||
self.update_phase(scan_run_id, "sbom_generation").await;
|
||||
let sbom_scanner = SbomScanner;
|
||||
let mut sbom_entries = match sbom_scanner.scan(&repo_path, &repo_id).await {
|
||||
Ok(output) => output.sbom_entries,
|
||||
Err(e) => {
|
||||
tracing::warn!("[{repo_id}] SBOM generation failed: {e}");
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
|
||||
// Stage 3: CVE Scanning
|
||||
tracing::info!("[{repo_id}] Stage 3: CVE Scanning");
|
||||
self.update_phase(scan_run_id, "cve_scanning").await;
|
||||
let cve_scanner = CveScanner::new(
|
||||
self.http.clone(),
|
||||
self.config.searxng_url.clone(),
|
||||
self.config.nvd_api_key.as_ref().map(|k| {
|
||||
use secrecy::ExposeSecret;
|
||||
k.expose_secret().to_string()
|
||||
}),
|
||||
);
|
||||
let cve_alerts = match cve_scanner.scan_dependencies(&repo_id, &mut sbom_entries).await {
|
||||
Ok(alerts) => alerts,
|
||||
Err(e) => {
|
||||
tracing::warn!("[{repo_id}] CVE scanning failed: {e}");
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
|
||||
// Stage 4: Pattern Scanning (GDPR + OAuth)
|
||||
tracing::info!("[{repo_id}] Stage 4: Pattern Scanning");
|
||||
self.update_phase(scan_run_id, "pattern_scanning").await;
|
||||
let gdpr = GdprPatternScanner::new();
|
||||
match gdpr.scan(&repo_path, &repo_id).await {
|
||||
Ok(output) => all_findings.extend(output.findings),
|
||||
Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"),
|
||||
}
|
||||
let oauth = OAuthPatternScanner::new();
|
||||
match oauth.scan(&repo_path, &repo_id).await {
|
||||
Ok(output) => all_findings.extend(output.findings),
|
||||
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
|
||||
}
|
||||
|
||||
// Stage 5: LLM Triage
|
||||
tracing::info!("[{repo_id}] Stage 5: LLM Triage ({} findings)", all_findings.len());
|
||||
self.update_phase(scan_run_id, "llm_triage").await;
|
||||
let triaged = crate::llm::triage::triage_findings(&self.llm, &mut all_findings).await;
|
||||
tracing::info!("[{repo_id}] Triaged: {triaged} findings passed confidence threshold");
|
||||
|
||||
// Dedup against existing findings and insert new ones
|
||||
let mut new_count = 0u32;
|
||||
for mut finding in all_findings {
|
||||
finding.scan_run_id = Some(scan_run_id.to_string());
|
||||
// Check if fingerprint already exists
|
||||
let existing = self
|
||||
.db
|
||||
.findings()
|
||||
.find_one(doc! { "fingerprint": &finding.fingerprint })
|
||||
.await?;
|
||||
if existing.is_none() {
|
||||
self.db.findings().insert_one(&finding).await?;
|
||||
new_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Persist SBOM entries (upsert by repo_id + name + version)
|
||||
for entry in &sbom_entries {
|
||||
let filter = doc! {
|
||||
"repo_id": &entry.repo_id,
|
||||
"name": &entry.name,
|
||||
"version": &entry.version,
|
||||
};
|
||||
let update = mongodb::bson::to_document(entry)
|
||||
.map(|d| doc! { "$set": d })
|
||||
.unwrap_or_else(|_| doc! {});
|
||||
self.db
|
||||
.sbom_entries()
|
||||
.update_one(filter, update)
|
||||
.upsert(true)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Persist CVE alerts (upsert by cve_id + repo_id)
|
||||
for alert in &cve_alerts {
|
||||
let filter = doc! {
|
||||
"cve_id": &alert.cve_id,
|
||||
"repo_id": &alert.repo_id,
|
||||
};
|
||||
let update = mongodb::bson::to_document(alert)
|
||||
.map(|d| doc! { "$set": d })
|
||||
.unwrap_or_else(|_| doc! {});
|
||||
self.db
|
||||
.cve_alerts()
|
||||
.update_one(filter, update)
|
||||
.upsert(true)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Stage 6: Issue Creation
|
||||
tracing::info!("[{repo_id}] Stage 6: Issue Creation");
|
||||
self.update_phase(scan_run_id, "issue_creation").await;
|
||||
// Issue creation is handled by the trackers module - deferred to agent
|
||||
|
||||
// Stage 7: Update repository
|
||||
self.db.repositories().update_one(
|
||||
doc! { "_id": repo.id },
|
||||
doc! {
|
||||
"$set": {
|
||||
"last_scanned_commit": ¤t_sha,
|
||||
"updated_at": mongodb::bson::DateTime::now(),
|
||||
},
|
||||
"$inc": { "findings_count": new_count as i64 },
|
||||
},
|
||||
).await?;
|
||||
|
||||
tracing::info!("[{repo_id}] Scan complete: {new_count} new findings");
|
||||
Ok(new_count)
|
||||
}
|
||||
|
||||
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
|
||||
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
|
||||
let _ = self.db.scan_runs().update_one(
|
||||
doc! { "_id": oid },
|
||||
doc! {
|
||||
"$set": { "current_phase": phase },
|
||||
"$push": { "phases_completed": phase },
|
||||
},
|
||||
).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user