feat: enhance tracing with field attributes and warn logging across all handlers

Add repo_id, finding_id, and filter fields to tracing::instrument attributes for better trace correlation in SigNoz. Replace all silently swallowed errors (Err(_) => Vec::new()) with tracing::warn! logging across mod.rs, dast.rs, graph.rs handlers. Add stage-level spans with .instrument() to pipeline orchestrator for visibility into scan phases. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 21:56:16 +01:00
parent 67d6a937ae
commit 99983c51e3
8 changed files with 178 additions and 70 deletions
@@ -27,6 +27,7 @@ impl CveScanner {
        repo_id: &str,
        entries: &mut [SbomEntry],
    ) -> Result<Vec<CveAlert>, CoreError> {
+        tracing::info!("scanning {} SBOM entries for known CVEs", entries.len());
        let mut alerts = Vec::new();

        // Batch query OSV.dev
@@ -93,7 +94,10 @@ impl CveScanner {
                .json(&body)
                .send()
                .await
-                .map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
+                .map_err(|e| {
+                    tracing::warn!("OSV.dev API call failed: {e}");
+                    CoreError::Http(format!("OSV.dev request failed: {e}"))
+                })?;

            if !resp.status().is_success() {
                let status = resp.status();
@@ -104,10 +108,10 @@ impl CveScanner {
                continue;
            }

-            let result: OsvBatchResponse = resp
-                .json()
-                .await
-                .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
+            let result: OsvBatchResponse = resp.json().await.map_err(|e| {
+                tracing::warn!("failed to parse OSV.dev response: {e}");
+                CoreError::Http(format!("Failed to parse OSV.dev response: {e}"))
+            })?;

            let chunk_vulns = result.results.into_iter().map(|r| {
                r.vulns
@@ -78,10 +78,12 @@ impl GitOps {
        }
    }

+    #[tracing::instrument(skip_all, fields(repo_name = %repo_name))]
    pub fn clone_or_fetch(&self, git_url: &str, repo_name: &str) -> Result<PathBuf, AgentError> {
        let repo_path = self.base_path.join(repo_name);

        if repo_path.exists() {
+            tracing::info!("fetching updates for existing repo");
            self.fetch(&repo_path)?;
        } else {
            std::fs::create_dir_all(&repo_path)?;
@@ -92,7 +94,9 @@ impl GitOps {
        Ok(repo_path)
    }

+    #[tracing::instrument(skip_all)]
    fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> {
+        tracing::info!("cloning repo from {}", git_url);
        let mut builder = git2::build::RepoBuilder::new();
        let fetch_opts = self.credentials.fetch_options();
        builder.fetch_options(fetch_opts);
@@ -1,6 +1,7 @@
 use std::sync::Arc;

 use mongodb::bson::doc;
+use tracing::Instrument;

 use compliance_core::models::*;
 use compliance_core::traits::Scanner;
@@ -50,7 +51,7 @@ impl PipelineOrchestrator {
        }
    }

-    #[tracing::instrument(skip_all)]
+    #[tracing::instrument(skip_all, fields(repo_id = %repo_id, trigger = ?trigger))]
    pub async fn run(&self, repo_id: &str, trigger: ScanTrigger) -> Result<(), AgentError> {
        // Look up the repository
        let repo = self
@@ -90,6 +91,7 @@ impl PipelineOrchestrator {
                    .await?;
            }
            Err(e) => {
+                tracing::error!(repo_id, error = %e, "Scan pipeline failed");
                self.db
                    .scan_runs()
                    .update_one(
@@ -109,7 +111,7 @@ impl PipelineOrchestrator {
        result.map(|_| ())
    }

-    #[tracing::instrument(skip_all)]
+    #[tracing::instrument(skip_all, fields(repo_id = repo.name.as_str()))]
    async fn run_pipeline(
        &self,
        repo: &TrackedRepository,
@@ -138,8 +140,13 @@ impl PipelineOrchestrator {
        // Stage 1: Semgrep SAST
        tracing::info!("[{repo_id}] Stage 1: Semgrep SAST");
        self.update_phase(scan_run_id, "sast").await;
-        let semgrep = SemgrepScanner;
-        match semgrep.scan(&repo_path, &repo_id).await {
+        match async {
+            let semgrep = SemgrepScanner;
+            semgrep.scan(&repo_path, &repo_id).await
+        }
+        .instrument(tracing::info_span!("stage_sast"))
+        .await
+        {
            Ok(output) => all_findings.extend(output.findings),
            Err(e) => tracing::warn!("[{repo_id}] Semgrep failed: {e}"),
        }
@@ -147,8 +154,13 @@ impl PipelineOrchestrator {
        // Stage 2: SBOM Generation
        tracing::info!("[{repo_id}] Stage 2: SBOM Generation");
        self.update_phase(scan_run_id, "sbom_generation").await;
-        let sbom_scanner = SbomScanner;
-        let mut sbom_entries = match sbom_scanner.scan(&repo_path, &repo_id).await {
+        let mut sbom_entries = match async {
+            let sbom_scanner = SbomScanner;
+            sbom_scanner.scan(&repo_path, &repo_id).await
+        }
+        .instrument(tracing::info_span!("stage_sbom_generation"))
+        .await
+        {
            Ok(output) => output.sbom_entries,
            Err(e) => {
                tracing::warn!("[{repo_id}] SBOM generation failed: {e}");
@@ -167,9 +179,13 @@ impl PipelineOrchestrator {
                k.expose_secret().to_string()
            }),
        );
-        let cve_alerts = match cve_scanner
-            .scan_dependencies(&repo_id, &mut sbom_entries)
-            .await
+        let cve_alerts = match async {
+            cve_scanner
+                .scan_dependencies(&repo_id, &mut sbom_entries)
+                .await
+        }
+        .instrument(tracing::info_span!("stage_cve_scanning"))
+        .await
        {
            Ok(alerts) => alerts,
            Err(e) => {
@@ -181,22 +197,36 @@ impl PipelineOrchestrator {
        // Stage 4: Pattern Scanning (GDPR + OAuth)
        tracing::info!("[{repo_id}] Stage 4: Pattern Scanning");
        self.update_phase(scan_run_id, "pattern_scanning").await;
-        let gdpr = GdprPatternScanner::new();
-        match gdpr.scan(&repo_path, &repo_id).await {
-            Ok(output) => all_findings.extend(output.findings),
-            Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"),
-        }
-        let oauth = OAuthPatternScanner::new();
-        match oauth.scan(&repo_path, &repo_id).await {
-            Ok(output) => all_findings.extend(output.findings),
-            Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
+        {
+            let pattern_findings = async {
+                let mut findings = Vec::new();
+                let gdpr = GdprPatternScanner::new();
+                match gdpr.scan(&repo_path, &repo_id).await {
+                    Ok(output) => findings.extend(output.findings),
+                    Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"),
+                }
+                let oauth = OAuthPatternScanner::new();
+                match oauth.scan(&repo_path, &repo_id).await {
+                    Ok(output) => findings.extend(output.findings),
+                    Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
+                }
+                findings
+            }
+            .instrument(tracing::info_span!("stage_pattern_scanning"))
+            .await;
+            all_findings.extend(pattern_findings);
        }

        // Stage 4a: Secret Detection (Gitleaks)
        tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
        self.update_phase(scan_run_id, "secret_detection").await;
-        let gitleaks = GitleaksScanner;
-        match gitleaks.scan(&repo_path, &repo_id).await {
+        match async {
+            let gitleaks = GitleaksScanner;
+            gitleaks.scan(&repo_path, &repo_id).await
+        }
+        .instrument(tracing::info_span!("stage_secret_detection"))
+        .await
+        {
            Ok(output) => all_findings.extend(output.findings),
            Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
        }
@@ -204,8 +234,13 @@ impl PipelineOrchestrator {
        // Stage 4b: Lint Scanning
        tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
        self.update_phase(scan_run_id, "lint_scanning").await;
-        let lint = LintScanner;
-        match lint.scan(&repo_path, &repo_id).await {
+        match async {
+            let lint = LintScanner;
+            lint.scan(&repo_path, &repo_id).await
+        }
+        .instrument(tracing::info_span!("stage_lint_scanning"))
+        .await
+        {
            Ok(output) => all_findings.extend(output.findings),
            Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
        }
@@ -214,19 +249,26 @@ impl PipelineOrchestrator {
        if let Some(old_sha) = &repo.last_scanned_commit {
            tracing::info!("[{repo_id}] Stage 4c: LLM Code Review");
            self.update_phase(scan_run_id, "code_review").await;
-            let reviewer = CodeReviewScanner::new(self.llm.clone());
-            let review_output = reviewer
-                .review_diff(&repo_path, &repo_id, old_sha, &current_sha)
-                .await;
+            let review_output = async {
+                let reviewer = CodeReviewScanner::new(self.llm.clone());
+                reviewer
+                    .review_diff(&repo_path, &repo_id, old_sha, &current_sha)
+                    .await
+            }
+            .instrument(tracing::info_span!("stage_code_review"))
+            .await;
            all_findings.extend(review_output.findings);
        }

        // Stage 4.5: Graph Building
        tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
        self.update_phase(scan_run_id, "graph_building").await;
-        let graph_context = match self
-            .build_code_graph(&repo_path, &repo_id, &all_findings)
-            .await
+        let graph_context = match async {
+            self.build_code_graph(&repo_path, &repo_id, &all_findings)
+                .await
+        }
+        .instrument(tracing::info_span!("stage_graph_building"))
+        .await
        {
            Ok(ctx) => Some(ctx),
            Err(e) => {
@@ -46,6 +46,7 @@ impl Scanner for SbomScanner {

 /// Generate missing lock files so Syft can resolve the full dependency tree.
 /// This handles repos that gitignore their lock files (common for Rust libraries).
+#[tracing::instrument(skip_all)]
 async fn generate_lockfiles(repo_path: &Path) {
    // Cargo: generate Cargo.lock if Cargo.toml exists without it
    if repo_path.join("Cargo.toml").exists() && !repo_path.join("Cargo.lock").exists() {
@@ -122,6 +123,7 @@ async fn generate_lockfiles(repo_path: &Path) {

 /// Enrich Cargo SBOM entries with license info from `cargo metadata`.
 /// Syft doesn't read license data from Cargo.lock, so we fill it in.
+#[tracing::instrument(skip_all)]
 async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
    if !repo_path.join("Cargo.toml").exists() {
        return;
@@ -182,6 +184,7 @@ async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
    }
 }

+#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
 async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
    let output = tokio::process::Command::new("syft")
        .arg(repo_path)
@@ -232,6 +235,7 @@ async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, Cor
    Ok(entries)
 }

+#[tracing::instrument(skip_all)]
 async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVuln>, CoreError> {
    let cargo_lock = repo_path.join("Cargo.lock");
    if !cargo_lock.exists() {