diff --git a/compliance-agent/src/agent.rs b/compliance-agent/src/agent.rs index e13271d..6c8fed7 100644 --- a/compliance-agent/src/agent.rs +++ b/compliance-agent/src/agent.rs @@ -43,4 +43,35 @@ impl ComplianceAgent { ); orchestrator.run(repo_id, trigger).await } + + /// Run a PR review: scan the diff and post review comments. + pub async fn run_pr_review( + &self, + repo_id: &str, + pr_number: u64, + base_sha: &str, + head_sha: &str, + ) -> Result<(), crate::error::AgentError> { + let repo = self + .db + .repositories() + .find_one(mongodb::bson::doc! { + "_id": mongodb::bson::oid::ObjectId::parse_str(repo_id) + .map_err(|e| crate::error::AgentError::Other(e.to_string()))? + }) + .await? + .ok_or_else(|| { + crate::error::AgentError::Other(format!("Repository {repo_id} not found")) + })?; + + let orchestrator = PipelineOrchestrator::new( + self.config.clone(), + self.db.clone(), + self.llm.clone(), + self.http.clone(), + ); + orchestrator + .run_pr_review(&repo, repo_id, pr_number, base_sha, head_sha) + .await + } } diff --git a/compliance-agent/src/config.rs b/compliance-agent/src/config.rs index f166007..a00621e 100644 --- a/compliance-agent/src/config.rs +++ b/compliance-agent/src/config.rs @@ -31,6 +31,7 @@ pub fn load_config() -> Result { gitlab_url: env_var_opt("GITLAB_URL"), gitlab_token: env_secret_opt("GITLAB_TOKEN"), gitlab_webhook_secret: env_secret_opt("GITLAB_WEBHOOK_SECRET"), + gitea_webhook_secret: env_secret_opt("GITEA_WEBHOOK_SECRET"), jira_url: env_var_opt("JIRA_URL"), jira_email: env_var_opt("JIRA_EMAIL"), jira_api_token: env_secret_opt("JIRA_API_TOKEN"), diff --git a/compliance-agent/src/pipeline/git.rs b/compliance-agent/src/pipeline/git.rs index 39c7f7d..9508c3f 100644 --- a/compliance-agent/src/pipeline/git.rs +++ b/compliance-agent/src/pipeline/git.rs @@ -132,6 +132,18 @@ impl GitOps { Ok(()) } + /// Build credentials from agent config + per-repo overrides + pub fn make_repo_credentials( + config: &compliance_core::AgentConfig, + repo: &compliance_core::models::TrackedRepository, + ) -> RepoCredentials { + RepoCredentials { + ssh_key_path: Some(config.ssh_key_path.clone()), + auth_token: repo.auth_token.clone(), + auth_username: repo.auth_username.clone(), + } + } + pub fn get_head_sha(repo_path: &Path) -> Result { let repo = Repository::open(repo_path)?; let head = repo.head()?; diff --git a/compliance-agent/src/pipeline/orchestrator.rs b/compliance-agent/src/pipeline/orchestrator.rs index 3b975d6..ead8f6e 100644 --- a/compliance-agent/src/pipeline/orchestrator.rs +++ b/compliance-agent/src/pipeline/orchestrator.rs @@ -13,7 +13,7 @@ use crate::error::AgentError; use crate::llm::LlmClient; use crate::pipeline::code_review::CodeReviewScanner; use crate::pipeline::cve::CveScanner; -use crate::pipeline::git::{GitOps, RepoCredentials}; +use crate::pipeline::git::GitOps; use crate::pipeline::gitleaks::GitleaksScanner; use crate::pipeline::lint::LintScanner; use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner}; @@ -68,6 +68,34 @@ impl TrackerDispatch { Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await, } } + + async fn create_pr_review( + &self, + owner: &str, + repo: &str, + pr_number: u64, + body: &str, + comments: Vec, + ) -> Result<(), compliance_core::error::CoreError> { + match self { + Self::GitHub(t) => { + t.create_pr_review(owner, repo, pr_number, body, comments) + .await + } + Self::GitLab(t) => { + t.create_pr_review(owner, repo, pr_number, body, comments) + .await + } + Self::Gitea(t) => { + t.create_pr_review(owner, repo, pr_number, body, comments) + .await + } + Self::Jira(t) => { + t.create_pr_review(owner, repo, pr_number, body, comments) + .await + } + } + } } /// Context from graph analysis passed to LLM triage for enhanced filtering @@ -172,11 +200,7 @@ impl PipelineOrchestrator { // Stage 0: Change detection tracing::info!("[{repo_id}] Stage 0: Change detection"); - let creds = RepoCredentials { - ssh_key_path: Some(self.config.ssh_key_path.clone()), - auth_token: repo.auth_token.clone(), - auth_username: repo.auth_username.clone(), - }; + let creds = GitOps::make_repo_credentials(&self.config, repo); let git_ops = GitOps::new(&self.config.git_clone_base_path, creds); let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?; @@ -734,6 +758,141 @@ impl PipelineOrchestrator { Ok(()) } + /// Run an incremental scan on a PR diff and post review comments. + #[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))] + pub async fn run_pr_review( + &self, + repo: &TrackedRepository, + repo_id: &str, + pr_number: u64, + base_sha: &str, + head_sha: &str, + ) -> Result<(), AgentError> { + let tracker = match self.build_tracker(repo) { + Some(t) => t, + None => { + tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review"); + return Ok(()); + } + }; + let owner = repo.tracker_owner.as_deref().unwrap_or(""); + let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or(""); + if owner.is_empty() || tracker_repo_name.is_empty() { + tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set"); + return Ok(()); + } + + // Clone/fetch the repo + let creds = GitOps::make_repo_credentials(&self.config, repo); + let git_ops = GitOps::new(&self.config.git_clone_base_path, creds); + let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?; + + // Get diff between base and head + let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?; + if diff_files.is_empty() { + tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review"); + return Ok(()); + } + + // Run semgrep on the full repo but we'll filter findings to changed files + let changed_paths: std::collections::HashSet = + diff_files.iter().map(|f| f.path.clone()).collect(); + + let mut pr_findings: Vec = Vec::new(); + + // SAST scan (semgrep) + match SemgrepScanner.scan(&repo_path, repo_id).await { + Ok(output) => { + for f in output.findings { + if let Some(fp) = &f.file_path { + if changed_paths.contains(fp.as_str()) { + pr_findings.push(f); + } + } + } + } + Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"), + } + + // LLM code review on the diff + let reviewer = CodeReviewScanner::new(self.llm.clone()); + let review_output = reviewer + .review_diff(&repo_path, repo_id, base_sha, head_sha) + .await; + pr_findings.extend(review_output.findings); + + if pr_findings.is_empty() { + // Post a clean review + if let Err(e) = tracker + .create_pr_review( + owner, + tracker_repo_name, + pr_number, + "Compliance scan: no issues found in this PR.", + Vec::new(), + ) + .await + { + tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}"); + } + return Ok(()); + } + + // Build review comments from findings + let mut review_comments = Vec::new(); + for finding in &pr_findings { + if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) { + let comment_body = format!( + "**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*", + finding.severity, + finding.title, + finding.description, + finding.scanner, + finding + .cwe + .as_deref() + .map(|c| format!("CWE: {c}")) + .unwrap_or_default(), + ); + review_comments.push(compliance_core::traits::issue_tracker::ReviewComment { + path: path.clone(), + line, + body: comment_body, + }); + } + } + + let summary = format!( + "Compliance scan found **{}** issue(s) in this PR:\n\n{}", + pr_findings.len(), + pr_findings + .iter() + .map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title)) + .collect::>() + .join("\n"), + ); + + if let Err(e) = tracker + .create_pr_review( + owner, + tracker_repo_name, + pr_number, + &summary, + review_comments, + ) + .await + { + tracing::warn!("[{repo_id}] Failed to post PR review: {e}"); + } else { + tracing::info!( + "[{repo_id}] Posted PR review on #{pr_number} with {} findings", + pr_findings.len() + ); + } + + Ok(()) + } + async fn update_phase(&self, scan_run_id: &str, phase: &str) { if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) { let _ = self diff --git a/compliance-agent/src/webhooks/gitea.rs b/compliance-agent/src/webhooks/gitea.rs new file mode 100644 index 0000000..ac9ce1f --- /dev/null +++ b/compliance-agent/src/webhooks/gitea.rs @@ -0,0 +1,159 @@ +use std::sync::Arc; + +use axum::body::Bytes; +use axum::extract::Extension; +use axum::http::{HeaderMap, StatusCode}; +use hmac::{Hmac, Mac}; +use sha2::Sha256; + +use compliance_core::models::ScanTrigger; + +use crate::agent::ComplianceAgent; + +type HmacSha256 = Hmac; + +pub async fn handle_gitea_webhook( + Extension(agent): Extension>, + headers: HeaderMap, + body: Bytes, +) -> StatusCode { + // Verify HMAC-SHA256 signature (Gitea uses X-Gitea-Signature, no sha256= prefix) + if let Some(secret) = &agent.config.gitea_webhook_secret { + use secrecy::ExposeSecret; + let signature = headers + .get("x-gitea-signature") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + if !verify_signature(secret.expose_secret(), &body, signature) { + tracing::warn!("Gitea webhook: invalid signature"); + return StatusCode::UNAUTHORIZED; + } + } + + let event = headers + .get("x-gitea-event") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + let payload: serde_json::Value = match serde_json::from_slice(&body) { + Ok(v) => v, + Err(e) => { + tracing::warn!("Gitea webhook: invalid JSON: {e}"); + return StatusCode::BAD_REQUEST; + } + }; + + match event { + "push" => handle_push(agent, &payload).await, + "pull_request" => handle_pull_request(agent, &payload).await, + _ => { + tracing::debug!("Gitea webhook: ignoring event '{event}'"); + StatusCode::OK + } + } +} + +async fn handle_push(agent: Arc, payload: &serde_json::Value) -> StatusCode { + let repo_url = payload["repository"]["clone_url"] + .as_str() + .or_else(|| payload["repository"]["html_url"].as_str()) + .unwrap_or(""); + + if repo_url.is_empty() { + return StatusCode::BAD_REQUEST; + } + + let repo = agent + .db + .repositories() + .find_one(mongodb::bson::doc! { "git_url": repo_url }) + .await + .ok() + .flatten(); + + if let Some(repo) = repo { + let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default(); + let agent_clone = (*agent).clone(); + tokio::spawn(async move { + tracing::info!("Gitea push webhook: triggering scan for {repo_id}"); + if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await { + tracing::error!("Webhook-triggered scan failed: {e}"); + } + }); + } else { + tracing::debug!("Gitea push webhook: no tracked repo for {repo_url}"); + } + + StatusCode::OK +} + +async fn handle_pull_request( + agent: Arc, + payload: &serde_json::Value, +) -> StatusCode { + let action = payload["action"].as_str().unwrap_or(""); + if action != "opened" && action != "synchronized" { + return StatusCode::OK; + } + + let repo_url = payload["repository"]["clone_url"] + .as_str() + .or_else(|| payload["repository"]["html_url"].as_str()) + .unwrap_or(""); + let pr_number = payload["pull_request"]["number"].as_u64().unwrap_or(0); + let head_sha = payload["pull_request"]["head"]["sha"] + .as_str() + .unwrap_or(""); + let base_sha = payload["pull_request"]["base"]["sha"] + .as_str() + .unwrap_or(""); + + if repo_url.is_empty() || pr_number == 0 || head_sha.is_empty() || base_sha.is_empty() { + tracing::warn!("Gitea PR webhook: missing required fields"); + return StatusCode::BAD_REQUEST; + } + + let repo = agent + .db + .repositories() + .find_one(mongodb::bson::doc! { "git_url": repo_url }) + .await + .ok() + .flatten(); + + if let Some(repo) = repo { + let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default(); + let head_sha = head_sha.to_string(); + let base_sha = base_sha.to_string(); + let agent_clone = (*agent).clone(); + tokio::spawn(async move { + tracing::info!("Gitea PR webhook: reviewing PR #{pr_number} on {repo_id}"); + if let Err(e) = agent_clone + .run_pr_review(&repo_id, pr_number, &base_sha, &head_sha) + .await + { + tracing::error!("PR review failed for #{pr_number}: {e}"); + } + }); + } else { + tracing::debug!("Gitea PR webhook: no tracked repo for {repo_url}"); + } + + StatusCode::OK +} + +fn verify_signature(secret: &str, body: &[u8], signature: &str) -> bool { + // Gitea sends raw hex (no sha256= prefix unlike GitHub) + let sig_bytes = match hex::decode(signature) { + Ok(b) => b, + Err(_) => return false, + }; + + let mut mac = match HmacSha256::new_from_slice(secret.as_bytes()) { + Ok(m) => m, + Err(_) => return false, + }; + mac.update(body); + mac.verify_slice(&sig_bytes).is_ok() +} diff --git a/compliance-agent/src/webhooks/github.rs b/compliance-agent/src/webhooks/github.rs index 6f567f9..4b616e0 100644 --- a/compliance-agent/src/webhooks/github.rs +++ b/compliance-agent/src/webhooks/github.rs @@ -90,7 +90,7 @@ async fn handle_push(agent: Arc, payload: &serde_json::Value) - } async fn handle_pull_request( - _agent: Arc, + agent: Arc, payload: &serde_json::Value, ) -> StatusCode { let action = payload["action"].as_str().unwrap_or(""); @@ -100,14 +100,42 @@ async fn handle_pull_request( let repo_url = payload["repository"]["clone_url"].as_str().unwrap_or(""); let pr_number = payload["pull_request"]["number"].as_u64().unwrap_or(0); + let head_sha = payload["pull_request"]["head"]["sha"] + .as_str() + .unwrap_or(""); + let base_sha = payload["pull_request"]["base"]["sha"] + .as_str() + .unwrap_or(""); - if repo_url.is_empty() || pr_number == 0 { + if repo_url.is_empty() || pr_number == 0 || head_sha.is_empty() || base_sha.is_empty() { return StatusCode::BAD_REQUEST; } - tracing::info!("GitHub PR webhook: PR #{pr_number} {action} on {repo_url}"); - // PR review scan would be triggered here - runs incremental SAST on diff - // and posts review comments via the GitHub tracker + let repo = agent + .db + .repositories() + .find_one(mongodb::bson::doc! { "git_url": repo_url }) + .await + .ok() + .flatten(); + + if let Some(repo) = repo { + let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default(); + let head_sha = head_sha.to_string(); + let base_sha = base_sha.to_string(); + let agent_clone = (*agent).clone(); + tokio::spawn(async move { + tracing::info!("GitHub PR webhook: reviewing PR #{pr_number} on {repo_id}"); + if let Err(e) = agent_clone + .run_pr_review(&repo_id, pr_number, &base_sha, &head_sha) + .await + { + tracing::error!("PR review failed for #{pr_number}: {e}"); + } + }); + } else { + tracing::debug!("GitHub PR webhook: no tracked repo for {repo_url}"); + } StatusCode::OK } diff --git a/compliance-agent/src/webhooks/gitlab.rs b/compliance-agent/src/webhooks/gitlab.rs index b811675..075bce5 100644 --- a/compliance-agent/src/webhooks/gitlab.rs +++ b/compliance-agent/src/webhooks/gitlab.rs @@ -80,7 +80,7 @@ async fn handle_push(agent: Arc, payload: &serde_json::Value) - } async fn handle_merge_request( - _agent: Arc, + agent: Arc, payload: &serde_json::Value, ) -> StatusCode { let action = payload["object_attributes"]["action"] @@ -90,8 +90,49 @@ async fn handle_merge_request( return StatusCode::OK; } + let repo_url = payload["project"]["git_http_url"] + .as_str() + .or_else(|| payload["project"]["web_url"].as_str()) + .unwrap_or(""); let mr_iid = payload["object_attributes"]["iid"].as_u64().unwrap_or(0); - tracing::info!("GitLab MR webhook: MR !{mr_iid} {action}"); + let head_sha = payload["object_attributes"]["last_commit"]["id"] + .as_str() + .unwrap_or(""); + // GitLab doesn't include base sha directly; use the target branch's latest + let base_sha = payload["object_attributes"]["diff_refs"]["base_sha"] + .as_str() + .unwrap_or(""); + + if repo_url.is_empty() || mr_iid == 0 || head_sha.is_empty() || base_sha.is_empty() { + tracing::warn!("GitLab MR webhook: missing required fields"); + return StatusCode::BAD_REQUEST; + } + + let repo = agent + .db + .repositories() + .find_one(mongodb::bson::doc! { "git_url": repo_url }) + .await + .ok() + .flatten(); + + if let Some(repo) = repo { + let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default(); + let head_sha = head_sha.to_string(); + let base_sha = base_sha.to_string(); + let agent_clone = (*agent).clone(); + tokio::spawn(async move { + tracing::info!("GitLab MR webhook: reviewing MR !{mr_iid} on {repo_id}"); + if let Err(e) = agent_clone + .run_pr_review(&repo_id, mr_iid, &base_sha, &head_sha) + .await + { + tracing::error!("MR review failed for !{mr_iid}: {e}"); + } + }); + } else { + tracing::debug!("GitLab MR webhook: no tracked repo for {repo_url}"); + } StatusCode::OK } diff --git a/compliance-agent/src/webhooks/mod.rs b/compliance-agent/src/webhooks/mod.rs index 8ca1a34..1ea9d87 100644 --- a/compliance-agent/src/webhooks/mod.rs +++ b/compliance-agent/src/webhooks/mod.rs @@ -1,3 +1,4 @@ +pub mod gitea; pub mod github; pub mod gitlab; pub mod server; diff --git a/compliance-agent/src/webhooks/server.rs b/compliance-agent/src/webhooks/server.rs index b695aba..6db45f9 100644 --- a/compliance-agent/src/webhooks/server.rs +++ b/compliance-agent/src/webhooks/server.rs @@ -5,12 +5,13 @@ use axum::{Extension, Router}; use crate::agent::ComplianceAgent; use crate::error::AgentError; -use crate::webhooks::{github, gitlab}; +use crate::webhooks::{gitea, github, gitlab}; pub async fn start_webhook_server(agent: &ComplianceAgent) -> Result<(), AgentError> { let app = Router::new() .route("/webhook/github", post(github::handle_github_webhook)) .route("/webhook/gitlab", post(gitlab::handle_gitlab_webhook)) + .route("/webhook/gitea", post(gitea::handle_gitea_webhook)) .layer(Extension(Arc::new(agent.clone()))); let addr = "0.0.0.0:3002"; diff --git a/compliance-core/src/config.rs b/compliance-core/src/config.rs index 401f9a8..1490893 100644 --- a/compliance-core/src/config.rs +++ b/compliance-core/src/config.rs @@ -14,6 +14,7 @@ pub struct AgentConfig { pub gitlab_url: Option, pub gitlab_token: Option, pub gitlab_webhook_secret: Option, + pub gitea_webhook_secret: Option, pub jira_url: Option, pub jira_email: Option, pub jira_api_token: Option,