feat: per-repo issue tracker, Gitea support, PR review pipeline (#10)
Some checks failed
CI / Security Audit (push) Has been cancelled
CI / Tests (push) Has been cancelled
CI / Detect Changes (push) Has been cancelled
CI / Deploy Agent (push) Has been cancelled
CI / Deploy Dashboard (push) Has been cancelled
CI / Deploy Docs (push) Has been cancelled
CI / Deploy MCP (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Format (push) Successful in 4s

This commit was merged in pull request #10.
This commit is contained in:
2026-03-11 12:13:59 +00:00
parent be4b43ed64
commit 491665559f
22 changed files with 1582 additions and 122 deletions

View File

@@ -132,6 +132,18 @@ impl GitOps {
Ok(())
}
/// Build credentials from agent config + per-repo overrides
pub fn make_repo_credentials(
config: &compliance_core::AgentConfig,
repo: &compliance_core::models::TrackedRepository,
) -> RepoCredentials {
RepoCredentials {
ssh_key_path: Some(config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
}
}
pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
let repo = Repository::open(repo_path)?;
let head = repo.head()?;

View File

@@ -4,6 +4,7 @@ use mongodb::bson::doc;
use tracing::Instrument;
use compliance_core::models::*;
use compliance_core::traits::issue_tracker::IssueTracker;
use compliance_core::traits::Scanner;
use compliance_core::AgentConfig;
@@ -12,12 +13,90 @@ use crate::error::AgentError;
use crate::llm::LlmClient;
use crate::pipeline::code_review::CodeReviewScanner;
use crate::pipeline::cve::CveScanner;
use crate::pipeline::git::{GitOps, RepoCredentials};
use crate::pipeline::git::GitOps;
use crate::pipeline::gitleaks::GitleaksScanner;
use crate::pipeline::lint::LintScanner;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
use crate::pipeline::sbom::SbomScanner;
use crate::pipeline::semgrep::SemgrepScanner;
use crate::trackers;
/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
enum TrackerDispatch {
GitHub(trackers::github::GitHubTracker),
GitLab(trackers::gitlab::GitLabTracker),
Gitea(trackers::gitea::GiteaTracker),
Jira(trackers::jira::JiraTracker),
}
impl TrackerDispatch {
fn name(&self) -> &str {
match self {
Self::GitHub(t) => t.name(),
Self::GitLab(t) => t.name(),
Self::Gitea(t) => t.name(),
Self::Jira(t) => t.name(),
}
}
async fn create_issue(
&self,
owner: &str,
repo: &str,
title: &str,
body: &str,
labels: &[String],
) -> Result<TrackerIssue, compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
}
}
async fn find_existing_issue(
&self,
owner: &str,
repo: &str,
fingerprint: &str,
) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
}
}
async fn create_pr_review(
&self,
owner: &str,
repo: &str,
pr_number: u64,
body: &str,
comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
) -> Result<(), compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::GitLab(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Gitea(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Jira(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
}
}
}
/// Context from graph analysis passed to LLM triage for enhanced filtering
#[derive(Debug)]
@@ -121,11 +200,7 @@ impl PipelineOrchestrator {
// Stage 0: Change detection
tracing::info!("[{repo_id}] Stage 0: Change detection");
let creds = RepoCredentials {
ssh_key_path: Some(self.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
@@ -293,6 +368,7 @@ impl PipelineOrchestrator {
// Dedup against existing findings and insert new ones
let mut new_count = 0u32;
let mut new_findings: Vec<Finding> = Vec::new();
for mut finding in all_findings {
finding.scan_run_id = Some(scan_run_id.to_string());
// Check if fingerprint already exists
@@ -302,7 +378,9 @@ impl PipelineOrchestrator {
.find_one(doc! { "fingerprint": &finding.fingerprint })
.await?;
if existing.is_none() {
self.db.findings().insert_one(&finding).await?;
let result = self.db.findings().insert_one(&finding).await?;
finding.id = result.inserted_id.as_object_id();
new_findings.push(finding);
new_count += 1;
}
}
@@ -351,7 +429,12 @@ impl PipelineOrchestrator {
// Stage 6: Issue Creation
tracing::info!("[{repo_id}] Stage 6: Issue Creation");
self.update_phase(scan_run_id, "issue_creation").await;
// Issue creation is handled by the trackers module - deferred to agent
if let Err(e) = self
.create_tracker_issues(repo, &repo_id, &new_findings)
.await
{
tracing::warn!("[{repo_id}] Issue creation failed: {e}");
}
// Stage 7: Update repository
self.db
@@ -477,6 +560,339 @@ impl PipelineOrchestrator {
}
}
/// Build an issue tracker client from a repository's tracker configuration.
/// Returns `None` if the repo has no tracker configured.
fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
let tracker_type = repo.tracker_type.as_ref()?;
// Per-repo token takes precedence, fall back to global config
match tracker_type {
TrackerType::GitHub => {
let token = repo.tracker_token.clone().or_else(|| {
self.config.github_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
match trackers::github::GitHubTracker::new(&secret) {
Ok(t) => Some(TrackerDispatch::GitHub(t)),
Err(e) => {
tracing::warn!("Failed to build GitHub tracker: {e}");
None
}
}
}
TrackerType::GitLab => {
let base_url = self
.config
.gitlab_url
.clone()
.unwrap_or_else(|| "https://gitlab.com".to_string());
let token = repo.tracker_token.clone().or_else(|| {
self.config.gitlab_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::GitLab(
trackers::gitlab::GitLabTracker::new(base_url, secret),
))
}
TrackerType::Gitea => {
let token = repo.tracker_token.clone()?;
let base_url = extract_base_url(&repo.git_url)?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
base_url, secret,
)))
}
TrackerType::Jira => {
let base_url = self.config.jira_url.clone()?;
let email = self.config.jira_email.clone()?;
let project_key = self.config.jira_project_key.clone()?;
let token = repo.tracker_token.clone().or_else(|| {
self.config.jira_api_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
base_url,
email,
secret,
project_key,
)))
}
}
}
/// Create tracker issues for new findings (severity >= Medium).
/// Checks for duplicates via fingerprint search before creating.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
async fn create_tracker_issues(
&self,
repo: &TrackedRepository,
repo_id: &str,
new_findings: &[Finding],
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::info!("[{repo_id}] No issue tracker configured, skipping");
return Ok(());
}
};
let owner = match repo.tracker_owner.as_deref() {
Some(o) => o,
None => {
tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
return Ok(());
}
};
let tracker_repo_name = match repo.tracker_repo.as_deref() {
Some(r) => r,
None => {
tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
return Ok(());
}
};
// Only create issues for medium+ severity findings
let actionable: Vec<&Finding> = new_findings
.iter()
.filter(|f| {
matches!(
f.severity,
Severity::Medium | Severity::High | Severity::Critical
)
})
.collect();
if actionable.is_empty() {
tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
return Ok(());
}
tracing::info!(
"[{repo_id}] Creating issues for {} findings via {}",
actionable.len(),
tracker.name()
);
let mut created = 0u32;
for finding in actionable {
// Check if an issue already exists for this fingerprint
match tracker
.find_existing_issue(owner, tracker_repo_name, &finding.fingerprint)
.await
{
Ok(Some(existing)) => {
tracing::debug!(
"[{repo_id}] Issue already exists for {}: {}",
finding.fingerprint,
existing.external_url
);
continue;
}
Ok(None) => {}
Err(e) => {
tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
// Continue and try to create anyway
}
}
let title = format!(
"[{}] {}: {}",
finding.severity, finding.scanner, finding.title
);
let body = format_issue_body(finding);
let labels = vec![
format!("severity:{}", finding.severity),
format!("scanner:{}", finding.scanner),
"compliance-scanner".to_string(),
];
match tracker
.create_issue(owner, tracker_repo_name, &title, &body, &labels)
.await
{
Ok(mut issue) => {
issue.finding_id = finding
.id
.as_ref()
.map(|id| id.to_hex())
.unwrap_or_default();
// Update the finding with the issue URL
if let Some(finding_id) = &finding.id {
let _ = self
.db
.findings()
.update_one(
doc! { "_id": finding_id },
doc! { "$set": { "tracker_issue_url": &issue.external_url } },
)
.await;
}
// Store the tracker issue record
if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
}
created += 1;
}
Err(e) => {
tracing::warn!(
"[{repo_id}] Failed to create issue for {}: {e}",
finding.fingerprint
);
}
}
}
tracing::info!("[{repo_id}] Created {created} tracker issues");
Ok(())
}
/// Run an incremental scan on a PR diff and post review comments.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
pub async fn run_pr_review(
&self,
repo: &TrackedRepository,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
return Ok(());
}
};
let owner = repo.tracker_owner.as_deref().unwrap_or("");
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
if owner.is_empty() || tracker_repo_name.is_empty() {
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
return Ok(());
}
// Clone/fetch the repo
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
// Get diff between base and head
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
if diff_files.is_empty() {
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
return Ok(());
}
// Run semgrep on the full repo but we'll filter findings to changed files
let changed_paths: std::collections::HashSet<String> =
diff_files.iter().map(|f| f.path.clone()).collect();
let mut pr_findings: Vec<Finding> = Vec::new();
// SAST scan (semgrep)
match SemgrepScanner.scan(&repo_path, repo_id).await {
Ok(output) => {
for f in output.findings {
if let Some(fp) = &f.file_path {
if changed_paths.contains(fp.as_str()) {
pr_findings.push(f);
}
}
}
}
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
}
// LLM code review on the diff
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, repo_id, base_sha, head_sha)
.await;
pr_findings.extend(review_output.findings);
if pr_findings.is_empty() {
// Post a clean review
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
"Compliance scan: no issues found in this PR.",
Vec::new(),
)
.await
{
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
}
return Ok(());
}
// Build review comments from findings
let mut review_comments = Vec::new();
for finding in &pr_findings {
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
let comment_body = format!(
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
finding.severity,
finding.title,
finding.description,
finding.scanner,
finding
.cwe
.as_deref()
.map(|c| format!("CWE: {c}"))
.unwrap_or_default(),
);
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
path: path.clone(),
line,
body: comment_body,
});
}
}
let summary = format!(
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
pr_findings.len(),
pr_findings
.iter()
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
.collect::<Vec<_>>()
.join("\n"),
);
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
&summary,
review_comments,
)
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
} else {
tracing::info!(
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
pr_findings.len()
);
}
Ok(())
}
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
let _ = self
@@ -493,3 +909,73 @@ impl PipelineOrchestrator {
}
}
}
/// Extract the scheme + host from a git URL.
/// e.g. "https://gitea.example.com/owner/repo.git" → "https://gitea.example.com"
/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" → "https://gitea.example.com"
fn extract_base_url(git_url: &str) -> Option<String> {
if let Some(rest) = git_url.strip_prefix("https://") {
let host = rest.split('/').next()?;
Some(format!("https://{host}"))
} else if let Some(rest) = git_url.strip_prefix("http://") {
let host = rest.split('/').next()?;
Some(format!("http://{host}"))
} else if let Some(rest) = git_url.strip_prefix("ssh://") {
// ssh://git@host:port/path → extract host
let after_at = rest.find('@').map(|i| &rest[i + 1..]).unwrap_or(rest);
let host = after_at.split(&[':', '/'][..]).next()?;
Some(format!("https://{host}"))
} else if let Some(at_pos) = git_url.find('@') {
// SCP-style: git@host:owner/repo.git
let after_at = &git_url[at_pos + 1..];
let host = after_at.split(':').next()?;
Some(format!("https://{host}"))
} else {
None
}
}
/// Format a finding into a markdown issue body for the tracker.
fn format_issue_body(finding: &Finding) -> String {
let mut body = String::new();
body.push_str(&format!("## {} Finding\n\n", finding.severity));
body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
body.push_str(&format!("**Severity:** {}\n", finding.severity));
if let Some(rule) = &finding.rule_id {
body.push_str(&format!("**Rule:** {}\n", rule));
}
if let Some(cwe) = &finding.cwe {
body.push_str(&format!("**CWE:** {}\n", cwe));
}
body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
if let Some(file_path) = &finding.file_path {
body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
if let Some(line) = finding.line_number {
body.push_str(&format!(" (line {})", line));
}
body.push('\n');
}
if let Some(snippet) = &finding.code_snippet {
body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
}
if let Some(remediation) = &finding.remediation {
body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
}
if let Some(fix) = &finding.suggested_fix {
body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
}
body.push_str(&format!(
"\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
finding.fingerprint
));
body
}