diff --git a/compliance-agent/src/api/handlers/chat.rs b/compliance-agent/src/api/handlers/chat.rs index c3f72b2..989de90 100644 --- a/compliance-agent/src/api/handlers/chat.rs +++ b/compliance-agent/src/api/handlers/chat.rs @@ -17,7 +17,7 @@ use super::ApiResponse; type AgentExt = Extension>; /// POST /api/v1/chat/:repo_id — Send a chat message with RAG context -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn chat( Extension(agent): AgentExt, Path(repo_id): Path, @@ -127,7 +127,7 @@ pub async fn chat( } /// POST /api/v1/chat/:repo_id/build-embeddings — Trigger embedding build -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn build_embeddings( Extension(agent): AgentExt, Path(repo_id): Path, @@ -228,7 +228,7 @@ pub async fn build_embeddings( } /// GET /api/v1/chat/:repo_id/status — Get latest embedding build status -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn embedding_status( Extension(agent): AgentExt, Path(repo_id): Path, diff --git a/compliance-agent/src/api/handlers/dast.rs b/compliance-agent/src/api/handlers/dast.rs index 199bedd..fc74113 100644 --- a/compliance-agent/src/api/handlers/dast.rs +++ b/compliance-agent/src/api/handlers/dast.rs @@ -63,7 +63,10 @@ pub async fn list_targets( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch DAST targets: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -101,7 +104,7 @@ pub async fn add_target( } /// POST /api/v1/dast/targets/:id/scan — Trigger DAST scan -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(target_id = %id))] pub async fn trigger_scan( Extension(agent): AgentExt, Path(id): Path, @@ -163,7 +166,10 @@ pub async fn list_scan_runs( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch DAST scan runs: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -196,7 +202,10 @@ pub async fn list_findings( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch DAST findings: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -207,7 +216,7 @@ pub async fn list_findings( } /// GET /api/v1/dast/findings/:id — Finding detail with evidence -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(finding_id = %id))] pub async fn get_finding( Extension(agent): AgentExt, Path(id): Path, diff --git a/compliance-agent/src/api/handlers/graph.rs b/compliance-agent/src/api/handlers/graph.rs index d228b4e..43a79b9 100644 --- a/compliance-agent/src/api/handlers/graph.rs +++ b/compliance-agent/src/api/handlers/graph.rs @@ -33,7 +33,7 @@ fn default_search_limit() -> usize { } /// GET /api/v1/graph/:repo_id — Full graph data -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn get_graph( Extension(agent): AgentExt, Path(repo_id): Path, @@ -55,11 +55,17 @@ pub async fn get_graph( let all_nodes: Vec = match db.graph_nodes().find(filter.clone()).await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch graph nodes: {e}"); + Vec::new() + } }; let edges: Vec = match db.graph_edges().find(filter).await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch graph edges: {e}"); + Vec::new() + } }; // Remove disconnected nodes (no edges) to keep the graph clean @@ -89,7 +95,7 @@ pub async fn get_graph( } /// GET /api/v1/graph/:repo_id/nodes — List nodes (paginated) -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn get_nodes( Extension(agent): AgentExt, Path(repo_id): Path, @@ -99,7 +105,10 @@ pub async fn get_nodes( let nodes: Vec = match db.graph_nodes().find(filter).await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch graph nodes: {e}"); + Vec::new() + } }; let total = nodes.len() as u64; @@ -111,7 +120,7 @@ pub async fn get_nodes( } /// GET /api/v1/graph/:repo_id/communities — List detected communities -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn get_communities( Extension(agent): AgentExt, Path(repo_id): Path, @@ -121,7 +130,10 @@ pub async fn get_communities( let nodes: Vec = match db.graph_nodes().find(filter).await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch graph nodes for communities: {e}"); + Vec::new() + } }; let mut communities: std::collections::HashMap> = @@ -161,7 +173,7 @@ pub struct CommunityInfo { } /// GET /api/v1/graph/:repo_id/impact/:finding_id — Impact analysis -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id, finding_id = %finding_id))] pub async fn get_impact( Extension(agent): AgentExt, Path((repo_id, finding_id)): Path<(String, String)>, @@ -183,7 +195,7 @@ pub async fn get_impact( } /// GET /api/v1/graph/:repo_id/search — BM25 symbol search -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id, query = %params.q))] pub async fn search_symbols( Extension(agent): AgentExt, Path(repo_id): Path, @@ -204,7 +216,10 @@ pub async fn search_symbols( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to search graph nodes: {e}"); + Vec::new() + } }; let total = nodes.len() as u64; @@ -216,7 +231,7 @@ pub async fn search_symbols( } /// GET /api/v1/graph/:repo_id/file-content — Read source file from cloned repo -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn get_file_content( Extension(agent): AgentExt, Path(repo_id): Path, @@ -278,7 +293,7 @@ pub struct FileContent { } /// POST /api/v1/graph/:repo_id/build — Trigger graph rebuild -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub async fn trigger_build( Extension(agent): AgentExt, Path(repo_id): Path, diff --git a/compliance-agent/src/api/handlers/mod.rs b/compliance-agent/src/api/handlers/mod.rs index 02af141..0dca8b3 100644 --- a/compliance-agent/src/api/handlers/mod.rs +++ b/compliance-agent/src/api/handlers/mod.rs @@ -234,7 +234,10 @@ pub async fn stats_overview(Extension(agent): AgentExt) -> ApiResult collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch recent scans: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -276,7 +279,10 @@ pub async fn list_repositories( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch repositories: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -342,7 +348,7 @@ pub async fn get_ssh_public_key( Ok(Json(serde_json::json!({ "public_key": public_key.trim() }))) } -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %id))] pub async fn trigger_scan( Extension(agent): AgentExt, Path(id): Path, @@ -357,7 +363,7 @@ pub async fn trigger_scan( Ok(Json(serde_json::json!({ "status": "scan_triggered" }))) } -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = %id))] pub async fn delete_repository( Extension(agent): AgentExt, Path(id): Path, @@ -404,7 +410,7 @@ pub async fn delete_repository( Ok(Json(serde_json::json!({ "status": "deleted" }))) } -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = ?filter.repo_id, severity = ?filter.severity, scan_type = ?filter.scan_type))] pub async fn list_findings( Extension(agent): AgentExt, Query(filter): Query, @@ -463,7 +469,10 @@ pub async fn list_findings( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch findings: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -473,7 +482,7 @@ pub async fn list_findings( })) } -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(finding_id = %id))] pub async fn get_finding( Extension(agent): AgentExt, Path(id): Path, @@ -494,7 +503,7 @@ pub async fn get_finding( })) } -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(finding_id = %id))] pub async fn update_finding_status( Extension(agent): AgentExt, Path(id): Path, @@ -598,7 +607,7 @@ pub async fn sbom_filters( }))) } -#[tracing::instrument(skip_all)] +#[tracing::instrument(skip_all, fields(repo_id = ?filter.repo_id, package_manager = ?filter.package_manager))] pub async fn list_sbom( Extension(agent): AgentExt, Query(filter): Query, @@ -644,7 +653,10 @@ pub async fn list_sbom( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch SBOM entries: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -666,7 +678,10 @@ pub async fn export_sbom( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch SBOM entries for export: {e}"); + Vec::new() + } }; let body = if params.format == "spdx" { @@ -799,7 +814,10 @@ pub async fn license_summary( let entries: Vec = match db.sbom_entries().find(query).await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch SBOM entries for license summary: {e}"); + Vec::new() + } }; let mut license_map: std::collections::HashMap> = @@ -845,7 +863,10 @@ pub async fn sbom_diff( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch SBOM entries for repo_a: {e}"); + Vec::new() + } }; let entries_b: Vec = match db @@ -854,7 +875,10 @@ pub async fn sbom_diff( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch SBOM entries for repo_b: {e}"); + Vec::new() + } }; // Build maps by (name, package_manager) -> version @@ -944,7 +968,10 @@ pub async fn list_issues( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch tracker issues: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { @@ -972,7 +999,10 @@ pub async fn list_scan_runs( .await { Ok(cursor) => collect_cursor_async(cursor).await, - Err(_) => Vec::new(), + Err(e) => { + tracing::warn!("Failed to fetch scan runs: {e}"); + Vec::new() + } }; Ok(Json(ApiResponse { diff --git a/compliance-agent/src/pipeline/cve.rs b/compliance-agent/src/pipeline/cve.rs index 2b3b77c..e8f133c 100644 --- a/compliance-agent/src/pipeline/cve.rs +++ b/compliance-agent/src/pipeline/cve.rs @@ -27,6 +27,7 @@ impl CveScanner { repo_id: &str, entries: &mut [SbomEntry], ) -> Result, CoreError> { + tracing::info!("scanning {} SBOM entries for known CVEs", entries.len()); let mut alerts = Vec::new(); // Batch query OSV.dev @@ -93,7 +94,10 @@ impl CveScanner { .json(&body) .send() .await - .map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?; + .map_err(|e| { + tracing::warn!("OSV.dev API call failed: {e}"); + CoreError::Http(format!("OSV.dev request failed: {e}")) + })?; if !resp.status().is_success() { let status = resp.status(); @@ -104,10 +108,10 @@ impl CveScanner { continue; } - let result: OsvBatchResponse = resp - .json() - .await - .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?; + let result: OsvBatchResponse = resp.json().await.map_err(|e| { + tracing::warn!("failed to parse OSV.dev response: {e}"); + CoreError::Http(format!("Failed to parse OSV.dev response: {e}")) + })?; let chunk_vulns = result.results.into_iter().map(|r| { r.vulns diff --git a/compliance-agent/src/pipeline/git.rs b/compliance-agent/src/pipeline/git.rs index 3647047..39c7f7d 100644 --- a/compliance-agent/src/pipeline/git.rs +++ b/compliance-agent/src/pipeline/git.rs @@ -78,10 +78,12 @@ impl GitOps { } } + #[tracing::instrument(skip_all, fields(repo_name = %repo_name))] pub fn clone_or_fetch(&self, git_url: &str, repo_name: &str) -> Result { let repo_path = self.base_path.join(repo_name); if repo_path.exists() { + tracing::info!("fetching updates for existing repo"); self.fetch(&repo_path)?; } else { std::fs::create_dir_all(&repo_path)?; @@ -92,7 +94,9 @@ impl GitOps { Ok(repo_path) } + #[tracing::instrument(skip_all)] fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> { + tracing::info!("cloning repo from {}", git_url); let mut builder = git2::build::RepoBuilder::new(); let fetch_opts = self.credentials.fetch_options(); builder.fetch_options(fetch_opts); diff --git a/compliance-agent/src/pipeline/orchestrator.rs b/compliance-agent/src/pipeline/orchestrator.rs index abd2b2b..f43f6dc 100644 --- a/compliance-agent/src/pipeline/orchestrator.rs +++ b/compliance-agent/src/pipeline/orchestrator.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use mongodb::bson::doc; +use tracing::Instrument; use compliance_core::models::*; use compliance_core::traits::Scanner; @@ -50,7 +51,7 @@ impl PipelineOrchestrator { } } - #[tracing::instrument(skip_all)] + #[tracing::instrument(skip_all, fields(repo_id = %repo_id, trigger = ?trigger))] pub async fn run(&self, repo_id: &str, trigger: ScanTrigger) -> Result<(), AgentError> { // Look up the repository let repo = self @@ -90,6 +91,7 @@ impl PipelineOrchestrator { .await?; } Err(e) => { + tracing::error!(repo_id, error = %e, "Scan pipeline failed"); self.db .scan_runs() .update_one( @@ -109,7 +111,7 @@ impl PipelineOrchestrator { result.map(|_| ()) } - #[tracing::instrument(skip_all)] + #[tracing::instrument(skip_all, fields(repo_id = repo.name.as_str()))] async fn run_pipeline( &self, repo: &TrackedRepository, @@ -138,8 +140,13 @@ impl PipelineOrchestrator { // Stage 1: Semgrep SAST tracing::info!("[{repo_id}] Stage 1: Semgrep SAST"); self.update_phase(scan_run_id, "sast").await; - let semgrep = SemgrepScanner; - match semgrep.scan(&repo_path, &repo_id).await { + match async { + let semgrep = SemgrepScanner; + semgrep.scan(&repo_path, &repo_id).await + } + .instrument(tracing::info_span!("stage_sast")) + .await + { Ok(output) => all_findings.extend(output.findings), Err(e) => tracing::warn!("[{repo_id}] Semgrep failed: {e}"), } @@ -147,8 +154,13 @@ impl PipelineOrchestrator { // Stage 2: SBOM Generation tracing::info!("[{repo_id}] Stage 2: SBOM Generation"); self.update_phase(scan_run_id, "sbom_generation").await; - let sbom_scanner = SbomScanner; - let mut sbom_entries = match sbom_scanner.scan(&repo_path, &repo_id).await { + let mut sbom_entries = match async { + let sbom_scanner = SbomScanner; + sbom_scanner.scan(&repo_path, &repo_id).await + } + .instrument(tracing::info_span!("stage_sbom_generation")) + .await + { Ok(output) => output.sbom_entries, Err(e) => { tracing::warn!("[{repo_id}] SBOM generation failed: {e}"); @@ -167,9 +179,13 @@ impl PipelineOrchestrator { k.expose_secret().to_string() }), ); - let cve_alerts = match cve_scanner - .scan_dependencies(&repo_id, &mut sbom_entries) - .await + let cve_alerts = match async { + cve_scanner + .scan_dependencies(&repo_id, &mut sbom_entries) + .await + } + .instrument(tracing::info_span!("stage_cve_scanning")) + .await { Ok(alerts) => alerts, Err(e) => { @@ -181,22 +197,36 @@ impl PipelineOrchestrator { // Stage 4: Pattern Scanning (GDPR + OAuth) tracing::info!("[{repo_id}] Stage 4: Pattern Scanning"); self.update_phase(scan_run_id, "pattern_scanning").await; - let gdpr = GdprPatternScanner::new(); - match gdpr.scan(&repo_path, &repo_id).await { - Ok(output) => all_findings.extend(output.findings), - Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"), - } - let oauth = OAuthPatternScanner::new(); - match oauth.scan(&repo_path, &repo_id).await { - Ok(output) => all_findings.extend(output.findings), - Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"), + { + let pattern_findings = async { + let mut findings = Vec::new(); + let gdpr = GdprPatternScanner::new(); + match gdpr.scan(&repo_path, &repo_id).await { + Ok(output) => findings.extend(output.findings), + Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"), + } + let oauth = OAuthPatternScanner::new(); + match oauth.scan(&repo_path, &repo_id).await { + Ok(output) => findings.extend(output.findings), + Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"), + } + findings + } + .instrument(tracing::info_span!("stage_pattern_scanning")) + .await; + all_findings.extend(pattern_findings); } // Stage 4a: Secret Detection (Gitleaks) tracing::info!("[{repo_id}] Stage 4a: Secret Detection"); self.update_phase(scan_run_id, "secret_detection").await; - let gitleaks = GitleaksScanner; - match gitleaks.scan(&repo_path, &repo_id).await { + match async { + let gitleaks = GitleaksScanner; + gitleaks.scan(&repo_path, &repo_id).await + } + .instrument(tracing::info_span!("stage_secret_detection")) + .await + { Ok(output) => all_findings.extend(output.findings), Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"), } @@ -204,8 +234,13 @@ impl PipelineOrchestrator { // Stage 4b: Lint Scanning tracing::info!("[{repo_id}] Stage 4b: Lint Scanning"); self.update_phase(scan_run_id, "lint_scanning").await; - let lint = LintScanner; - match lint.scan(&repo_path, &repo_id).await { + match async { + let lint = LintScanner; + lint.scan(&repo_path, &repo_id).await + } + .instrument(tracing::info_span!("stage_lint_scanning")) + .await + { Ok(output) => all_findings.extend(output.findings), Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"), } @@ -214,19 +249,26 @@ impl PipelineOrchestrator { if let Some(old_sha) = &repo.last_scanned_commit { tracing::info!("[{repo_id}] Stage 4c: LLM Code Review"); self.update_phase(scan_run_id, "code_review").await; - let reviewer = CodeReviewScanner::new(self.llm.clone()); - let review_output = reviewer - .review_diff(&repo_path, &repo_id, old_sha, ¤t_sha) - .await; + let review_output = async { + let reviewer = CodeReviewScanner::new(self.llm.clone()); + reviewer + .review_diff(&repo_path, &repo_id, old_sha, ¤t_sha) + .await + } + .instrument(tracing::info_span!("stage_code_review")) + .await; all_findings.extend(review_output.findings); } // Stage 4.5: Graph Building tracing::info!("[{repo_id}] Stage 4.5: Graph Building"); self.update_phase(scan_run_id, "graph_building").await; - let graph_context = match self - .build_code_graph(&repo_path, &repo_id, &all_findings) - .await + let graph_context = match async { + self.build_code_graph(&repo_path, &repo_id, &all_findings) + .await + } + .instrument(tracing::info_span!("stage_graph_building")) + .await { Ok(ctx) => Some(ctx), Err(e) => { diff --git a/compliance-agent/src/pipeline/sbom.rs b/compliance-agent/src/pipeline/sbom.rs index 1254a44..9839385 100644 --- a/compliance-agent/src/pipeline/sbom.rs +++ b/compliance-agent/src/pipeline/sbom.rs @@ -46,6 +46,7 @@ impl Scanner for SbomScanner { /// Generate missing lock files so Syft can resolve the full dependency tree. /// This handles repos that gitignore their lock files (common for Rust libraries). +#[tracing::instrument(skip_all)] async fn generate_lockfiles(repo_path: &Path) { // Cargo: generate Cargo.lock if Cargo.toml exists without it if repo_path.join("Cargo.toml").exists() && !repo_path.join("Cargo.lock").exists() { @@ -122,6 +123,7 @@ async fn generate_lockfiles(repo_path: &Path) { /// Enrich Cargo SBOM entries with license info from `cargo metadata`. /// Syft doesn't read license data from Cargo.lock, so we fill it in. +#[tracing::instrument(skip_all)] async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) { if !repo_path.join("Cargo.toml").exists() { return; @@ -182,6 +184,7 @@ async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) { } } +#[tracing::instrument(skip_all, fields(repo_id = %repo_id))] async fn run_syft(repo_path: &Path, repo_id: &str) -> Result, CoreError> { let output = tokio::process::Command::new("syft") .arg(repo_path) @@ -232,6 +235,7 @@ async fn run_syft(repo_path: &Path, repo_id: &str) -> Result, Cor Ok(entries) } +#[tracing::instrument(skip_all)] async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result, CoreError> { let cargo_lock = repo_path.join("Cargo.lock"); if !cargo_lock.exists() {