refactor: modularize codebase and add 404 unit tests (#13)
All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s

This commit was merged in pull request #13.
This commit is contained in:
2026-03-13 08:03:45 +00:00
parent acc5b86aa4
commit 3bb690e5bb
89 changed files with 11884 additions and 6046 deletions

View File

@@ -4,7 +4,6 @@ use mongodb::bson::doc;
use tracing::Instrument;
use compliance_core::models::*;
use compliance_core::traits::issue_tracker::IssueTracker;
use compliance_core::traits::Scanner;
use compliance_core::AgentConfig;
@@ -19,84 +18,6 @@ use crate::pipeline::lint::LintScanner;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
use crate::pipeline::sbom::SbomScanner;
use crate::pipeline::semgrep::SemgrepScanner;
use crate::trackers;
/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
enum TrackerDispatch {
GitHub(trackers::github::GitHubTracker),
GitLab(trackers::gitlab::GitLabTracker),
Gitea(trackers::gitea::GiteaTracker),
Jira(trackers::jira::JiraTracker),
}
impl TrackerDispatch {
fn name(&self) -> &str {
match self {
Self::GitHub(t) => t.name(),
Self::GitLab(t) => t.name(),
Self::Gitea(t) => t.name(),
Self::Jira(t) => t.name(),
}
}
async fn create_issue(
&self,
owner: &str,
repo: &str,
title: &str,
body: &str,
labels: &[String],
) -> Result<TrackerIssue, compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
}
}
async fn find_existing_issue(
&self,
owner: &str,
repo: &str,
fingerprint: &str,
) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
}
}
async fn create_pr_review(
&self,
owner: &str,
repo: &str,
pr_number: u64,
body: &str,
comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
) -> Result<(), compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::GitLab(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Gitea(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Jira(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
}
}
}
/// Context from graph analysis passed to LLM triage for enhanced filtering
#[derive(Debug)]
@@ -109,10 +30,10 @@ pub struct GraphContext {
}
pub struct PipelineOrchestrator {
config: AgentConfig,
db: Database,
llm: Arc<LlmClient>,
http: reqwest::Client,
pub(super) config: AgentConfig,
pub(super) db: Database,
pub(super) llm: Arc<LlmClient>,
pub(super) http: reqwest::Client,
}
impl PipelineOrchestrator {
@@ -460,446 +381,7 @@ impl PipelineOrchestrator {
Ok(new_count)
}
/// Build the code knowledge graph for a repo and compute impact analyses
async fn build_code_graph(
&self,
repo_path: &std::path::Path,
repo_id: &str,
findings: &[Finding],
) -> Result<GraphContext, AgentError> {
let graph_build_id = uuid::Uuid::new_v4().to_string();
let engine = compliance_graph::GraphEngine::new(50_000);
let (mut code_graph, build_run) =
engine
.build_graph(repo_path, repo_id, &graph_build_id)
.map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
// Apply community detection
compliance_graph::graph::community::apply_communities(&mut code_graph);
// Store graph in MongoDB
let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
store
.delete_repo_graph(repo_id)
.await
.map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
store
.store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
.await
.map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
// Compute impact analysis for each finding
let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
let mut impacts = Vec::new();
for finding in findings {
if let Some(file_path) = &finding.file_path {
let impact = analyzer.analyze(
repo_id,
&finding.fingerprint,
&graph_build_id,
file_path,
finding.line_number,
);
store
.store_impact(&impact)
.await
.map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
impacts.push(impact);
}
}
Ok(GraphContext {
node_count: build_run.node_count,
edge_count: build_run.edge_count,
community_count: build_run.community_count,
impacts,
})
}
/// Trigger DAST scan if a target is configured for this repo
async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
use futures_util::TryStreamExt;
let filter = mongodb::bson::doc! { "repo_id": repo_id };
let targets: Vec<compliance_core::models::DastTarget> =
match self.db.dast_targets().find(filter).await {
Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
Err(_) => return,
};
if targets.is_empty() {
tracing::info!("[{repo_id}] No DAST targets configured, skipping");
return;
}
for target in targets {
let db = self.db.clone();
let scan_run_id = scan_run_id.to_string();
tokio::spawn(async move {
let orchestrator = compliance_dast::DastOrchestrator::new(100);
match orchestrator.run_scan(&target, Vec::new()).await {
Ok((mut scan_run, findings)) => {
scan_run.sast_scan_run_id = Some(scan_run_id);
if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
tracing::error!("Failed to store DAST scan run: {e}");
}
for finding in &findings {
if let Err(e) = db.dast_findings().insert_one(finding).await {
tracing::error!("Failed to store DAST finding: {e}");
}
}
tracing::info!("DAST scan complete: {} findings", findings.len());
}
Err(e) => {
tracing::error!("DAST scan failed: {e}");
}
}
});
}
}
/// Build an issue tracker client from a repository's tracker configuration.
/// Returns `None` if the repo has no tracker configured.
fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
let tracker_type = repo.tracker_type.as_ref()?;
// Per-repo token takes precedence, fall back to global config
match tracker_type {
TrackerType::GitHub => {
let token = repo.tracker_token.clone().or_else(|| {
self.config.github_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
match trackers::github::GitHubTracker::new(&secret) {
Ok(t) => Some(TrackerDispatch::GitHub(t)),
Err(e) => {
tracing::warn!("Failed to build GitHub tracker: {e}");
None
}
}
}
TrackerType::GitLab => {
let base_url = self
.config
.gitlab_url
.clone()
.unwrap_or_else(|| "https://gitlab.com".to_string());
let token = repo.tracker_token.clone().or_else(|| {
self.config.gitlab_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::GitLab(
trackers::gitlab::GitLabTracker::new(base_url, secret),
))
}
TrackerType::Gitea => {
let token = repo.tracker_token.clone()?;
let base_url = extract_base_url(&repo.git_url)?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
base_url, secret,
)))
}
TrackerType::Jira => {
let base_url = self.config.jira_url.clone()?;
let email = self.config.jira_email.clone()?;
let project_key = self.config.jira_project_key.clone()?;
let token = repo.tracker_token.clone().or_else(|| {
self.config.jira_api_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
base_url,
email,
secret,
project_key,
)))
}
}
}
/// Create tracker issues for new findings (severity >= Medium).
/// Checks for duplicates via fingerprint search before creating.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
async fn create_tracker_issues(
&self,
repo: &TrackedRepository,
repo_id: &str,
new_findings: &[Finding],
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::info!("[{repo_id}] No issue tracker configured, skipping");
return Ok(());
}
};
let owner = match repo.tracker_owner.as_deref() {
Some(o) => o,
None => {
tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
return Ok(());
}
};
let tracker_repo_name = match repo.tracker_repo.as_deref() {
Some(r) => r,
None => {
tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
return Ok(());
}
};
// Only create issues for medium+ severity findings
let actionable: Vec<&Finding> = new_findings
.iter()
.filter(|f| {
matches!(
f.severity,
Severity::Medium | Severity::High | Severity::Critical
)
})
.collect();
if actionable.is_empty() {
tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
return Ok(());
}
tracing::info!(
"[{repo_id}] Creating issues for {} findings via {}",
actionable.len(),
tracker.name()
);
let mut created = 0u32;
for finding in actionable {
let title = format!(
"[{}] {}: {}",
finding.severity, finding.scanner, finding.title
);
// Check if an issue already exists by fingerprint first, then by title
let mut found_existing = false;
for search_term in [&finding.fingerprint, &title] {
match tracker
.find_existing_issue(owner, tracker_repo_name, search_term)
.await
{
Ok(Some(existing)) => {
tracing::debug!(
"[{repo_id}] Issue already exists for '{}': {}",
search_term,
existing.external_url
);
found_existing = true;
break;
}
Ok(None) => {}
Err(e) => {
tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
}
}
}
if found_existing {
continue;
}
let body = format_issue_body(finding);
let labels = vec![
format!("severity:{}", finding.severity),
format!("scanner:{}", finding.scanner),
"compliance-scanner".to_string(),
];
match tracker
.create_issue(owner, tracker_repo_name, &title, &body, &labels)
.await
{
Ok(mut issue) => {
issue.finding_id = finding
.id
.as_ref()
.map(|id| id.to_hex())
.unwrap_or_default();
// Update the finding with the issue URL
if let Some(finding_id) = &finding.id {
let _ = self
.db
.findings()
.update_one(
doc! { "_id": finding_id },
doc! { "$set": { "tracker_issue_url": &issue.external_url } },
)
.await;
}
// Store the tracker issue record
if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
}
created += 1;
}
Err(e) => {
tracing::warn!(
"[{repo_id}] Failed to create issue for {}: {e}",
finding.fingerprint
);
}
}
}
tracing::info!("[{repo_id}] Created {created} tracker issues");
Ok(())
}
/// Run an incremental scan on a PR diff and post review comments.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
pub async fn run_pr_review(
&self,
repo: &TrackedRepository,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
return Ok(());
}
};
let owner = repo.tracker_owner.as_deref().unwrap_or("");
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
if owner.is_empty() || tracker_repo_name.is_empty() {
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
return Ok(());
}
// Clone/fetch the repo
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
// Get diff between base and head
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
if diff_files.is_empty() {
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
return Ok(());
}
// Run semgrep on the full repo but we'll filter findings to changed files
let changed_paths: std::collections::HashSet<String> =
diff_files.iter().map(|f| f.path.clone()).collect();
let mut pr_findings: Vec<Finding> = Vec::new();
// SAST scan (semgrep)
match SemgrepScanner.scan(&repo_path, repo_id).await {
Ok(output) => {
for f in output.findings {
if let Some(fp) = &f.file_path {
if changed_paths.contains(fp.as_str()) {
pr_findings.push(f);
}
}
}
}
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
}
// LLM code review on the diff
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, repo_id, base_sha, head_sha)
.await;
pr_findings.extend(review_output.findings);
if pr_findings.is_empty() {
// Post a clean review
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
"Compliance scan: no issues found in this PR.",
Vec::new(),
)
.await
{
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
}
return Ok(());
}
// Build review comments from findings
let mut review_comments = Vec::new();
for finding in &pr_findings {
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
let comment_body = format!(
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
finding.severity,
finding.title,
finding.description,
finding.scanner,
finding
.cwe
.as_deref()
.map(|c| format!("CWE: {c}"))
.unwrap_or_default(),
);
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
path: path.clone(),
line,
body: comment_body,
});
}
}
let summary = format!(
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
pr_findings.len(),
pr_findings
.iter()
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
.collect::<Vec<_>>()
.join("\n"),
);
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
&summary,
review_comments,
)
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
} else {
tracing::info!(
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
pr_findings.len()
);
}
Ok(())
}
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
pub(super) async fn update_phase(&self, scan_run_id: &str, phase: &str) {
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
let _ = self
.db
@@ -917,9 +399,9 @@ impl PipelineOrchestrator {
}
/// Extract the scheme + host from a git URL.
/// e.g. "https://gitea.example.com/owner/repo.git" "https://gitea.example.com"
/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" "https://gitea.example.com"
fn extract_base_url(git_url: &str) -> Option<String> {
/// e.g. "https://gitea.example.com/owner/repo.git" -> "https://gitea.example.com"
/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" -> "https://gitea.example.com"
pub(super) fn extract_base_url(git_url: &str) -> Option<String> {
if let Some(rest) = git_url.strip_prefix("https://") {
let host = rest.split('/').next()?;
Some(format!("https://{host}"))
@@ -927,7 +409,7 @@ fn extract_base_url(git_url: &str) -> Option<String> {
let host = rest.split('/').next()?;
Some(format!("http://{host}"))
} else if let Some(rest) = git_url.strip_prefix("ssh://") {
// ssh://git@host:port/path extract host
// ssh://git@host:port/path -> extract host
let after_at = rest.find('@').map(|i| &rest[i + 1..]).unwrap_or(rest);
let host = after_at.split(&[':', '/'][..]).next()?;
Some(format!("https://{host}"))
@@ -940,48 +422,3 @@ fn extract_base_url(git_url: &str) -> Option<String> {
None
}
}
/// Format a finding into a markdown issue body for the tracker.
fn format_issue_body(finding: &Finding) -> String {
let mut body = String::new();
body.push_str(&format!("## {} Finding\n\n", finding.severity));
body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
body.push_str(&format!("**Severity:** {}\n", finding.severity));
if let Some(rule) = &finding.rule_id {
body.push_str(&format!("**Rule:** {}\n", rule));
}
if let Some(cwe) = &finding.cwe {
body.push_str(&format!("**CWE:** {}\n", cwe));
}
body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
if let Some(file_path) = &finding.file_path {
body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
if let Some(line) = finding.line_number {
body.push_str(&format!(" (line {})", line));
}
body.push('\n');
}
if let Some(snippet) = &finding.code_snippet {
body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
}
if let Some(remediation) = &finding.remediation {
body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
}
if let Some(fix) = &finding.suggested_fix {
body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
}
body.push_str(&format!(
"\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
finding.fingerprint
));
body
}