feat: implement PR review pipeline with Gitea/GitHub/GitLab webhooks
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Failing after 1m50s
CI / Security Audit (push) Has been skipped
CI / Tests (push) Has been skipped
CI / Detect Changes (push) Has been skipped
CI / Deploy Agent (push) Has been skipped
CI / Deploy Dashboard (push) Has been skipped
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Has been skipped

On PR open/sync, webhook triggers incremental scan: runs semgrep on
changed files + LLM code review on the diff, then posts review comments
via the configured tracker. Adds Gitea webhook handler with HMAC-SHA256
verification, and wires up the previously stubbed GitHub/GitLab PR
handlers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-11 11:02:25 +01:00
parent 9e5342bfd6
commit 7a0a53d399
10 changed files with 448 additions and 14 deletions

View File

@@ -43,4 +43,35 @@ impl ComplianceAgent {
);
orchestrator.run(repo_id, trigger).await
}
/// Run a PR review: scan the diff and post review comments.
pub async fn run_pr_review(
&self,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), crate::error::AgentError> {
let repo = self
.db
.repositories()
.find_one(mongodb::bson::doc! {
"_id": mongodb::bson::oid::ObjectId::parse_str(repo_id)
.map_err(|e| crate::error::AgentError::Other(e.to_string()))?
})
.await?
.ok_or_else(|| {
crate::error::AgentError::Other(format!("Repository {repo_id} not found"))
})?;
let orchestrator = PipelineOrchestrator::new(
self.config.clone(),
self.db.clone(),
self.llm.clone(),
self.http.clone(),
);
orchestrator
.run_pr_review(&repo, repo_id, pr_number, base_sha, head_sha)
.await
}
}

View File

@@ -31,6 +31,7 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
gitlab_url: env_var_opt("GITLAB_URL"),
gitlab_token: env_secret_opt("GITLAB_TOKEN"),
gitlab_webhook_secret: env_secret_opt("GITLAB_WEBHOOK_SECRET"),
gitea_webhook_secret: env_secret_opt("GITEA_WEBHOOK_SECRET"),
jira_url: env_var_opt("JIRA_URL"),
jira_email: env_var_opt("JIRA_EMAIL"),
jira_api_token: env_secret_opt("JIRA_API_TOKEN"),

View File

@@ -132,6 +132,18 @@ impl GitOps {
Ok(())
}
/// Build credentials from agent config + per-repo overrides
pub fn make_repo_credentials(
config: &compliance_core::AgentConfig,
repo: &compliance_core::models::TrackedRepository,
) -> RepoCredentials {
RepoCredentials {
ssh_key_path: Some(config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
}
}
pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
let repo = Repository::open(repo_path)?;
let head = repo.head()?;

View File

@@ -13,7 +13,7 @@ use crate::error::AgentError;
use crate::llm::LlmClient;
use crate::pipeline::code_review::CodeReviewScanner;
use crate::pipeline::cve::CveScanner;
use crate::pipeline::git::{GitOps, RepoCredentials};
use crate::pipeline::git::GitOps;
use crate::pipeline::gitleaks::GitleaksScanner;
use crate::pipeline::lint::LintScanner;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
@@ -68,6 +68,34 @@ impl TrackerDispatch {
Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
}
}
async fn create_pr_review(
&self,
owner: &str,
repo: &str,
pr_number: u64,
body: &str,
comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
) -> Result<(), compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::GitLab(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Gitea(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Jira(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
}
}
}
/// Context from graph analysis passed to LLM triage for enhanced filtering
@@ -172,11 +200,7 @@ impl PipelineOrchestrator {
// Stage 0: Change detection
tracing::info!("[{repo_id}] Stage 0: Change detection");
let creds = RepoCredentials {
ssh_key_path: Some(self.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
@@ -734,6 +758,141 @@ impl PipelineOrchestrator {
Ok(())
}
/// Run an incremental scan on a PR diff and post review comments.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
pub async fn run_pr_review(
&self,
repo: &TrackedRepository,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
return Ok(());
}
};
let owner = repo.tracker_owner.as_deref().unwrap_or("");
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
if owner.is_empty() || tracker_repo_name.is_empty() {
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
return Ok(());
}
// Clone/fetch the repo
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
// Get diff between base and head
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
if diff_files.is_empty() {
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
return Ok(());
}
// Run semgrep on the full repo but we'll filter findings to changed files
let changed_paths: std::collections::HashSet<String> =
diff_files.iter().map(|f| f.path.clone()).collect();
let mut pr_findings: Vec<Finding> = Vec::new();
// SAST scan (semgrep)
match SemgrepScanner.scan(&repo_path, repo_id).await {
Ok(output) => {
for f in output.findings {
if let Some(fp) = &f.file_path {
if changed_paths.contains(fp.as_str()) {
pr_findings.push(f);
}
}
}
}
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
}
// LLM code review on the diff
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, repo_id, base_sha, head_sha)
.await;
pr_findings.extend(review_output.findings);
if pr_findings.is_empty() {
// Post a clean review
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
"Compliance scan: no issues found in this PR.",
Vec::new(),
)
.await
{
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
}
return Ok(());
}
// Build review comments from findings
let mut review_comments = Vec::new();
for finding in &pr_findings {
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
let comment_body = format!(
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
finding.severity,
finding.title,
finding.description,
finding.scanner,
finding
.cwe
.as_deref()
.map(|c| format!("CWE: {c}"))
.unwrap_or_default(),
);
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
path: path.clone(),
line,
body: comment_body,
});
}
}
let summary = format!(
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
pr_findings.len(),
pr_findings
.iter()
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
.collect::<Vec<_>>()
.join("\n"),
);
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
&summary,
review_comments,
)
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
} else {
tracing::info!(
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
pr_findings.len()
);
}
Ok(())
}
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
let _ = self

View File

@@ -0,0 +1,159 @@
use std::sync::Arc;
use axum::body::Bytes;
use axum::extract::Extension;
use axum::http::{HeaderMap, StatusCode};
use hmac::{Hmac, Mac};
use sha2::Sha256;
use compliance_core::models::ScanTrigger;
use crate::agent::ComplianceAgent;
type HmacSha256 = Hmac<Sha256>;
pub async fn handle_gitea_webhook(
Extension(agent): Extension<Arc<ComplianceAgent>>,
headers: HeaderMap,
body: Bytes,
) -> StatusCode {
// Verify HMAC-SHA256 signature (Gitea uses X-Gitea-Signature, no sha256= prefix)
if let Some(secret) = &agent.config.gitea_webhook_secret {
use secrecy::ExposeSecret;
let signature = headers
.get("x-gitea-signature")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if !verify_signature(secret.expose_secret(), &body, signature) {
tracing::warn!("Gitea webhook: invalid signature");
return StatusCode::UNAUTHORIZED;
}
}
let event = headers
.get("x-gitea-event")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
let payload: serde_json::Value = match serde_json::from_slice(&body) {
Ok(v) => v,
Err(e) => {
tracing::warn!("Gitea webhook: invalid JSON: {e}");
return StatusCode::BAD_REQUEST;
}
};
match event {
"push" => handle_push(agent, &payload).await,
"pull_request" => handle_pull_request(agent, &payload).await,
_ => {
tracing::debug!("Gitea webhook: ignoring event '{event}'");
StatusCode::OK
}
}
}
async fn handle_push(agent: Arc<ComplianceAgent>, payload: &serde_json::Value) -> StatusCode {
let repo_url = payload["repository"]["clone_url"]
.as_str()
.or_else(|| payload["repository"]["html_url"].as_str())
.unwrap_or("");
if repo_url.is_empty() {
return StatusCode::BAD_REQUEST;
}
let repo = agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "git_url": repo_url })
.await
.ok()
.flatten();
if let Some(repo) = repo {
let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("Gitea push webhook: triggering scan for {repo_id}");
if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await {
tracing::error!("Webhook-triggered scan failed: {e}");
}
});
} else {
tracing::debug!("Gitea push webhook: no tracked repo for {repo_url}");
}
StatusCode::OK
}
async fn handle_pull_request(
agent: Arc<ComplianceAgent>,
payload: &serde_json::Value,
) -> StatusCode {
let action = payload["action"].as_str().unwrap_or("");
if action != "opened" && action != "synchronized" {
return StatusCode::OK;
}
let repo_url = payload["repository"]["clone_url"]
.as_str()
.or_else(|| payload["repository"]["html_url"].as_str())
.unwrap_or("");
let pr_number = payload["pull_request"]["number"].as_u64().unwrap_or(0);
let head_sha = payload["pull_request"]["head"]["sha"]
.as_str()
.unwrap_or("");
let base_sha = payload["pull_request"]["base"]["sha"]
.as_str()
.unwrap_or("");
if repo_url.is_empty() || pr_number == 0 || head_sha.is_empty() || base_sha.is_empty() {
tracing::warn!("Gitea PR webhook: missing required fields");
return StatusCode::BAD_REQUEST;
}
let repo = agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "git_url": repo_url })
.await
.ok()
.flatten();
if let Some(repo) = repo {
let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default();
let head_sha = head_sha.to_string();
let base_sha = base_sha.to_string();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("Gitea PR webhook: reviewing PR #{pr_number} on {repo_id}");
if let Err(e) = agent_clone
.run_pr_review(&repo_id, pr_number, &base_sha, &head_sha)
.await
{
tracing::error!("PR review failed for #{pr_number}: {e}");
}
});
} else {
tracing::debug!("Gitea PR webhook: no tracked repo for {repo_url}");
}
StatusCode::OK
}
fn verify_signature(secret: &str, body: &[u8], signature: &str) -> bool {
// Gitea sends raw hex (no sha256= prefix unlike GitHub)
let sig_bytes = match hex::decode(signature) {
Ok(b) => b,
Err(_) => return false,
};
let mut mac = match HmacSha256::new_from_slice(secret.as_bytes()) {
Ok(m) => m,
Err(_) => return false,
};
mac.update(body);
mac.verify_slice(&sig_bytes).is_ok()
}

View File

@@ -90,7 +90,7 @@ async fn handle_push(agent: Arc<ComplianceAgent>, payload: &serde_json::Value) -
}
async fn handle_pull_request(
_agent: Arc<ComplianceAgent>,
agent: Arc<ComplianceAgent>,
payload: &serde_json::Value,
) -> StatusCode {
let action = payload["action"].as_str().unwrap_or("");
@@ -100,14 +100,42 @@ async fn handle_pull_request(
let repo_url = payload["repository"]["clone_url"].as_str().unwrap_or("");
let pr_number = payload["pull_request"]["number"].as_u64().unwrap_or(0);
let head_sha = payload["pull_request"]["head"]["sha"]
.as_str()
.unwrap_or("");
let base_sha = payload["pull_request"]["base"]["sha"]
.as_str()
.unwrap_or("");
if repo_url.is_empty() || pr_number == 0 {
if repo_url.is_empty() || pr_number == 0 || head_sha.is_empty() || base_sha.is_empty() {
return StatusCode::BAD_REQUEST;
}
tracing::info!("GitHub PR webhook: PR #{pr_number} {action} on {repo_url}");
// PR review scan would be triggered here - runs incremental SAST on diff
// and posts review comments via the GitHub tracker
let repo = agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "git_url": repo_url })
.await
.ok()
.flatten();
if let Some(repo) = repo {
let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default();
let head_sha = head_sha.to_string();
let base_sha = base_sha.to_string();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("GitHub PR webhook: reviewing PR #{pr_number} on {repo_id}");
if let Err(e) = agent_clone
.run_pr_review(&repo_id, pr_number, &base_sha, &head_sha)
.await
{
tracing::error!("PR review failed for #{pr_number}: {e}");
}
});
} else {
tracing::debug!("GitHub PR webhook: no tracked repo for {repo_url}");
}
StatusCode::OK
}

View File

@@ -80,7 +80,7 @@ async fn handle_push(agent: Arc<ComplianceAgent>, payload: &serde_json::Value) -
}
async fn handle_merge_request(
_agent: Arc<ComplianceAgent>,
agent: Arc<ComplianceAgent>,
payload: &serde_json::Value,
) -> StatusCode {
let action = payload["object_attributes"]["action"]
@@ -90,8 +90,49 @@ async fn handle_merge_request(
return StatusCode::OK;
}
let repo_url = payload["project"]["git_http_url"]
.as_str()
.or_else(|| payload["project"]["web_url"].as_str())
.unwrap_or("");
let mr_iid = payload["object_attributes"]["iid"].as_u64().unwrap_or(0);
tracing::info!("GitLab MR webhook: MR !{mr_iid} {action}");
let head_sha = payload["object_attributes"]["last_commit"]["id"]
.as_str()
.unwrap_or("");
// GitLab doesn't include base sha directly; use the target branch's latest
let base_sha = payload["object_attributes"]["diff_refs"]["base_sha"]
.as_str()
.unwrap_or("");
if repo_url.is_empty() || mr_iid == 0 || head_sha.is_empty() || base_sha.is_empty() {
tracing::warn!("GitLab MR webhook: missing required fields");
return StatusCode::BAD_REQUEST;
}
let repo = agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "git_url": repo_url })
.await
.ok()
.flatten();
if let Some(repo) = repo {
let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default();
let head_sha = head_sha.to_string();
let base_sha = base_sha.to_string();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("GitLab MR webhook: reviewing MR !{mr_iid} on {repo_id}");
if let Err(e) = agent_clone
.run_pr_review(&repo_id, mr_iid, &base_sha, &head_sha)
.await
{
tracing::error!("MR review failed for !{mr_iid}: {e}");
}
});
} else {
tracing::debug!("GitLab MR webhook: no tracked repo for {repo_url}");
}
StatusCode::OK
}

View File

@@ -1,3 +1,4 @@
pub mod gitea;
pub mod github;
pub mod gitlab;
pub mod server;

View File

@@ -5,12 +5,13 @@ use axum::{Extension, Router};
use crate::agent::ComplianceAgent;
use crate::error::AgentError;
use crate::webhooks::{github, gitlab};
use crate::webhooks::{gitea, github, gitlab};
pub async fn start_webhook_server(agent: &ComplianceAgent) -> Result<(), AgentError> {
let app = Router::new()
.route("/webhook/github", post(github::handle_github_webhook))
.route("/webhook/gitlab", post(gitlab::handle_gitlab_webhook))
.route("/webhook/gitea", post(gitea::handle_gitea_webhook))
.layer(Extension(Arc::new(agent.clone())));
let addr = "0.0.0.0:3002";

View File

@@ -14,6 +14,7 @@ pub struct AgentConfig {
pub gitlab_url: Option<String>,
pub gitlab_token: Option<SecretString>,
pub gitlab_webhook_secret: Option<SecretString>,
pub gitea_webhook_secret: Option<SecretString>,
pub jira_url: Option<String>,
pub jira_email: Option<String>,
pub jira_api_token: Option<SecretString>,