fix: live progress + concurrency for embedding builds

The embedding build progress was only written to MongoDB after every batch completed (and the final flush + status update only happened at the very end), so the dashboard would show "0/N chunks (0%)" for the entire run, then jump straight to "complete." For a repo with 2k+ chunks this looked like the build was stuck. Three fixes: - pipeline: call update_build(Running, embedded_count, ...) after each batch so /api/v1/chat/:repo_id/status reflects real progress, and flush embeddings to Mongo every 200 records so a partial failure does not lose everything. - pipeline: drive batches with FuturesUnordered at concurrency=4 so litellm requests overlap instead of going strictly serial (112 sequential requests for a 2221-chunk repo were the wall-time floor). - llm client: give the reqwest client a 300s request timeout and 10s connect timeout. Previously LlmClient used reqwest::Client::new() with no timeout, so a hung embedding call would block the build indefinitely. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: add HTTP timeout to reqwest client and CVE stage timeout (#79 )
2026-05-13 11:39:37 +02:00 · 2026-05-13 07:30:26 +00:00 · 2026-05-12 11:27:24 +00:00
10 changed files with 202 additions and 79 deletions
@@ -0,0 +1,10 @@
 [advisories]
 ignore = [
    # hickory-proto 0.25.x pulled in transitively via mongodb → hickory-resolver.
    # MongoDB 3.x has not yet released with hickory-resolver 0.26.x, so we cannot
    # upgrade past this without a mongodb release. Both are DNS-layer DoS vectors
    # requiring a MITM/controlled DNS server against MongoDB's hostname resolution —
    # not a realistic attack surface here. Revisit when mongodb bumps hickory.
    "RUSTSEC-2026-0118", # NSEC3 loop, no fix available upstream
    "RUSTSEC-2026-0119", # O(n²) name compression, fixed in hickory-proto >=0.26.1
 ]
@@ -3524,9 +3524,9 @@ checksum = "224484c5d09285a7b8cb0a0c117e847ebd14cb6e4470ecf68cdb89c503b0edb9"
 [[package]]
 name = "mongodb"
-version = "3.5.1"
+version = "3.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "803dd859e8afa084c255a8effd8000ff86f7c8076a50cd6d8c99e8f3496f75c2"
+checksum = "1ef2c933617431ad0246fb5b43c425ebdae18c7f7259c87de0726d93b0e7e91b"
 dependencies = [
 "base64",
 "bitflags",
@@ -3570,9 +3570,9 @@ dependencies = [
 [[package]]
 name = "mongodb-internal-macros"
-version = "3.5.1"
+version = "3.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a973ef3dd3dbc6f6e65bbdecfd9ec5e781b9e7493b0f369a7c62e35d8e5ae2c8"
+checksum = "9e5758dc828eb2d02ec30563cba365609d56ddd833190b192beaee2b475a7bb3"
 dependencies = [
 "macro_magic",
 "proc-macro2",
@@ -4699,9 +4699,9 @@ dependencies = [
 [[package]]
 name = "rustls-webpki"
-version = "0.103.10"
+version = "0.103.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
 dependencies = [
 "ring",
 "rustls-pki-types",
@@ -35,11 +35,16 @@ impl ComplianceAgent {
            config.litellm_model.clone(),
            config.litellm_embed_model.clone(),
        ));
        let http = reqwest::Client::builder()
            .timeout(std::time::Duration::from_secs(30))
            .connect_timeout(std::time::Duration::from_secs(10))
            .build()
            .unwrap_or_default();
        Self {
            config,
            db,
            llm,
-            http: reqwest::Client::new(),
+            http,
            session_streams: Arc::new(DashMap::new()),
            session_pause: Arc::new(DashMap::new()),
            session_semaphore: Arc::new(Semaphore::new(DEFAULT_MAX_CONCURRENT_SESSIONS)),
@@ -19,12 +19,17 @@ impl LlmClient {
        model: String,
        embed_model: String,
    ) -> Self {
        let http = reqwest::Client::builder()
            .timeout(std::time::Duration::from_secs(300))
            .connect_timeout(std::time::Duration::from_secs(10))
            .build()
            .unwrap_or_default();
        Self {
            base_url,
            api_key,
            model,
            embed_model,
-            http: reqwest::Client::new(),
+            http,
        }
    }
@@ -19,26 +19,33 @@ impl Scanner for GitleaksScanner {
    #[tracing::instrument(skip_all)]
    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
-        let output = tokio::process::Command::new("gitleaks")
+        let output = tokio::time::timeout(
-            .args([
+            std::time::Duration::from_secs(300),
-                "detect",
+            tokio::process::Command::new("gitleaks")
-                "--source",
+                .args([
-                ".",
+                    "detect",
-                "--report-format",
+                    "--source",
-                "json",
+                    ".",
-                "--report-path",
+                    "--report-format",
-                "/dev/stdout",
+                    "json",
-                "--no-banner",
+                    "--report-path",
-                "--exit-code",
+                    "/dev/stdout",
-                "0",
+                    "--no-banner",
-            ])
+                    "--exit-code",
-            .current_dir(repo_path)
+                    "0",
-            .output()
+                ])
-            .await
+                .current_dir(repo_path)
-            .map_err(|e| CoreError::Scanner {
+                .output(),
-                scanner: "gitleaks".to_string(),
+        )
-                source: Box::new(e),
+        .await
-            })?;
+        .map_err(|_| CoreError::Scanner {
            scanner: "gitleaks".to_string(),
            source: "timed out after 5 minutes".into(),
        })?
        .map_err(|e| CoreError::Scanner {
            scanner: "gitleaks".to_string(),
            source: Box::new(e),
        })?;
        if output.stdout.is_empty() {
            return Ok(ScanOutput::default());
@@ -174,19 +174,26 @@ impl PipelineOrchestrator {
                k.expose_secret().to_string()
            }),
        );
-        let cve_alerts = match async {
+        let cve_alerts = match tokio::time::timeout(
-            cve_scanner
+            std::time::Duration::from_secs(600),
-                .scan_dependencies(&repo_id, &mut sbom_entries)
+            async {
-                .await
+                cve_scanner
-        }
+                    .scan_dependencies(&repo_id, &mut sbom_entries)
-        .instrument(tracing::info_span!("stage_cve_scanning"))
+                    .await
            }
            .instrument(tracing::info_span!("stage_cve_scanning")),
        )
        .await
        {
-            Ok(alerts) => alerts,
+            Ok(Ok(alerts)) => alerts,
-            Err(e) => {
+            Ok(Err(e)) => {
                tracing::warn!("[{repo_id}] CVE scanning failed: {e}");
                Vec::new()
            }
            Err(_) => {
                tracing::warn!("[{repo_id}] CVE scanning timed out after 10 minutes");
                Vec::new()
            }
        };
        // Stage 4: Pattern Scanning (GDPR + OAuth)
@@ -5,20 +5,26 @@ use compliance_core::CoreError;
 #[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
 pub(super) async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
-    let output = tokio::process::Command::new("syft")
+    let output = tokio::time::timeout(
-        .arg(repo_path)
+        std::time::Duration::from_secs(300),
-        .args(["-o", "cyclonedx-json"])
+        tokio::process::Command::new("syft")
-        // Enable remote license lookups for all ecosystems
+            .arg(repo_path)
-        .env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
+            .args(["-o", "cyclonedx-json"])
-        .env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
+            .env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
-        .env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
+            .env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
-        .env("SYFT_JAVA_USE_NETWORK", "true")
+            .env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
-        .output()
+            .env("SYFT_JAVA_USE_NETWORK", "true")
-        .await
+            .output(),
-        .map_err(|e| CoreError::Scanner {
+    )
-            scanner: "syft".to_string(),
+    .await
-            source: Box::new(e),
+    .map_err(|_| CoreError::Scanner {
-        })?;
+        scanner: "syft".to_string(),
        source: "timed out after 5 minutes".into(),
    })?
    .map_err(|e| CoreError::Scanner {
        scanner: "syft".to_string(),
        source: Box::new(e),
    })?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
@@ -19,15 +19,30 @@ impl Scanner for SemgrepScanner {
    #[tracing::instrument(skip_all)]
    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
-        let output = tokio::process::Command::new("semgrep")
+        let output = tokio::time::timeout(
-            .args(["--config=auto", "--json", "--quiet"])
+            std::time::Duration::from_secs(600),
-            .arg(repo_path)
+            tokio::process::Command::new("semgrep")
-            .output()
+                .args([
-            .await
+                    "--config=auto",
-            .map_err(|e| CoreError::Scanner {
+                    "--json",
-                scanner: "semgrep".to_string(),
+                    "--quiet",
-                source: Box::new(e),
+                    "--max-memory",
-            })?;
+                    "500",
                    "--jobs",
                    "1",
                ])
                .arg(repo_path)
                .output(),
        )
        .await
        .map_err(|_| CoreError::Scanner {
            scanner: "semgrep".to_string(),
            source: "timed out after 10 minutes".into(),
        })?
        .map_err(|e| CoreError::Scanner {
            scanner: "semgrep".to_string(),
            source: Box::new(e),
        })?;
        if !output.status.success() && output.stdout.is_empty() {
            let stderr = String::from_utf8_lossy(&output.stderr);
@@ -6,11 +6,16 @@ use compliance_core::models::embedding::{CodeEmbedding, EmbeddingBuildRun, Embed
 use compliance_core::models::graph::CodeNode;
 use compliance_graph::graph::chunking::extract_chunks;
 use compliance_graph::graph::embedding_store::EmbeddingStore;
 use futures_util::stream::{FuturesUnordered, StreamExt};
 use tracing::{error, info};
 use crate::error::AgentError;
 use crate::llm::LlmClient;
 const EMBED_BATCH_SIZE: usize = 20;
 const EMBED_CONCURRENCY: usize = 4;
 const EMBED_FLUSH_EVERY: usize = 200;
 /// RAG pipeline for building embeddings and performing retrieval
 pub struct RagPipeline {
    llm: Arc<LlmClient>,
@@ -77,25 +82,33 @@ impl RagPipeline {
            .await
            .map_err(|e| AgentError::Other(format!("Failed to delete old embeddings: {e}")))?;
-        // Step 3: Batch embed (small batches to stay within model limits)
+        // Step 3: Batch embed with bounded concurrency. Flush to Mongo and
-        let batch_size = 20;
+        // update progress periodically so the dashboard can show live status.
-        let mut all_embeddings = Vec::new();
+        let mut pending = Vec::with_capacity(EMBED_FLUSH_EVERY);
        let mut embedded_count = 0u32;
-        for batch_start in (0..chunks.len()).step_by(batch_size) {
+        // Build the list of batch indices to process.
-            let batch_end = (batch_start + batch_size).min(chunks.len());
+        let batches: Vec<(usize, usize)> = (0..chunks.len())
-            let batch_chunks = &chunks[batch_start..batch_end];
+            .step_by(EMBED_BATCH_SIZE)
            .map(|start| (start, (start + EMBED_BATCH_SIZE).min(chunks.len())))
            .collect();
-            // Prepare texts: context_header + content
+        let mut batch_iter = batches.into_iter();
-            let texts: Vec<String> = batch_chunks
+        let mut in_flight = FuturesUnordered::new();
                .iter()
                .map(|c| format!("{}\n{}", c.context_header, c.content))
                .collect();
-            match self.llm.embed(texts).await {
+        // Prime up to EMBED_CONCURRENCY batches.
-                Ok(vectors) => {
+        for _ in 0..EMBED_CONCURRENCY {
            if let Some((start, end)) = batch_iter.next() {
                in_flight.push(self.embed_batch(&chunks[start..end], start, end));
            }
        }
        while let Some(result) = in_flight.next().await {
            match result {
                Ok((start, end, vectors)) => {
                    let batch_chunks = &chunks[start..end];
                    for (chunk, embedding) in batch_chunks.iter().zip(vectors) {
-                        all_embeddings.push(CodeEmbedding {
+                        pending.push(CodeEmbedding {
                            id: None,
                            repo_id: repo_id.to_string(),
                            graph_build_id: graph_build_id.to_string(),
@@ -113,9 +126,45 @@ impl RagPipeline {
                        });
                    }
                    embedded_count += batch_chunks.len() as u32;
                    // Flush pending embeddings to Mongo periodically and update progress.
                    if pending.len() >= EMBED_FLUSH_EVERY {
                        self.embedding_store
                            .store_embeddings(&pending)
                            .await
                            .map_err(|e| {
                                AgentError::Other(format!("Failed to store embeddings: {e}"))
                            })?;
                        pending.clear();
                    }
                    // Always update the progress counter on the build doc — even if
                    // we haven't flushed embeddings yet — so the UI shows movement.
                    if let Err(e) = self
                        .embedding_store
                        .update_build(
                            repo_id,
                            graph_build_id,
                            EmbeddingBuildStatus::Running,
                            embedded_count,
                            None,
                        )
                        .await
                    {
                        error!("[{repo_id}] Failed to update build progress: {e}");
                    }
                    // Queue the next batch to keep concurrency saturated.
                    if let Some((s, e)) = batch_iter.next() {
                        in_flight.push(self.embed_batch(&chunks[s..e], s, e));
                    }
                }
                Err(e) => {
                    error!("[{repo_id}] Embedding batch failed: {e}");
                    // Flush whatever we have so partial progress isn't lost.
                    if !pending.is_empty() {
                        let _ = self.embedding_store.store_embeddings(&pending).await;
                    }
                    build.status = EmbeddingBuildStatus::Failed;
                    build.error_message = Some(e.to_string());
                    build.completed_at = Some(Utc::now());
@@ -134,11 +183,13 @@ impl RagPipeline {
            }
        }
-        // Step 4: Store all embeddings
+        // Step 4: Flush any remaining embeddings
-        self.embedding_store
+        if !pending.is_empty() {
-            .store_embeddings(&all_embeddings)
+            self.embedding_store
-            .await
+                .store_embeddings(&pending)
-            .map_err(|e| AgentError::Other(format!("Failed to store embeddings: {e}")))?;
+                .await
                .map_err(|e| AgentError::Other(format!("Failed to store embeddings: {e}")))?;
        }
        // Step 5: Update build status
        build.status = EmbeddingBuildStatus::Completed;
@@ -161,4 +212,21 @@ impl RagPipeline {
        );
        Ok(build)
    }
    /// Embed one batch of chunks. Returns the (start, end, vectors) tuple so
    /// out-of-order completion from `FuturesUnordered` can still be reconciled
    /// against the original chunk slice.
    async fn embed_batch(
        &self,
        batch_chunks: &[compliance_graph::graph::chunking::CodeChunk],
        start: usize,
        end: usize,
    ) -> Result<(usize, usize, Vec<Vec<f64>>), AgentError> {
        let texts: Vec<String> = batch_chunks
            .iter()
            .map(|c| format!("{}\n{}", c.context_header, c.content))
            .collect();
        let vectors = self.llm.embed(texts).await?;
        Ok((start, end, vectors))
    }
 }
@@ -32,7 +32,7 @@ pub fn AppShell() -> Element {
            // Not authenticated — redirect to Keycloak login
            rsx! {
                document::Script {
-                    dangerous_inner_html: "window.location.href = '/auth';"
+                    "window.location.href = '/auth';"
                }
            }
        }