fix: resolve cargo audit failures

- Update rustls-webpki 0.103.10 → 0.103.13 (fixes RUSTSEC-2026-0098, RUSTSEC-2026-0099, RUSTSEC-2026-0104) - Update mongodb 3.5.1 → 3.6.0 (latest compatible 3.x) - Add .cargo/audit.toml ignoring two hickory-proto advisories that cannot be fixed: mongodb 3.x pins hickory-resolver 0.25.x which pins hickory-proto 0.25.x; RUSTSEC-2026-0118 has no upstream fix at all, RUSTSEC-2026-0119 requires hickory-proto >=0.26.1 which mongodb does not yet support. Both are DNS-layer DoS vectors requiring control of the DNS server responding to MongoDB's hostname resolution. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: restore syft remote license lookup env vars
2026-05-12 12:47:16 +02:00 · 2026-05-12 11:58:21 +02:00 · 2026-05-12 11:49:46 +02:00
4 changed files with 29 additions and 114 deletions
@@ -35,16 +35,11 @@ impl ComplianceAgent {
            config.litellm_model.clone(),
            config.litellm_embed_model.clone(),
        ));
-        let http = reqwest::Client::builder()
-            .timeout(std::time::Duration::from_secs(30))
-            .connect_timeout(std::time::Duration::from_secs(10))
-            .build()
-            .unwrap_or_default();
        Self {
            config,
            db,
            llm,
-            http,
+            http: reqwest::Client::new(),
            session_streams: Arc::new(DashMap::new()),
            session_pause: Arc::new(DashMap::new()),
            session_semaphore: Arc::new(Semaphore::new(DEFAULT_MAX_CONCURRENT_SESSIONS)),
@@ -19,17 +19,12 @@ impl LlmClient {
        model: String,
        embed_model: String,
    ) -> Self {
-        let http = reqwest::Client::builder()
-            .timeout(std::time::Duration::from_secs(300))
-            .connect_timeout(std::time::Duration::from_secs(10))
-            .build()
-            .unwrap_or_default();
        Self {
            base_url,
            api_key,
            model,
            embed_model,
-            http,
+            http: reqwest::Client::new(),
        }
    }

@@ -174,26 +174,19 @@ impl PipelineOrchestrator {
                k.expose_secret().to_string()
            }),
        );
-        let cve_alerts = match tokio::time::timeout(
-            std::time::Duration::from_secs(600),
-            async {
+        let cve_alerts = match async {
            cve_scanner
                .scan_dependencies(&repo_id, &mut sbom_entries)
                .await
        }
-            .instrument(tracing::info_span!("stage_cve_scanning")),
-        )
+        .instrument(tracing::info_span!("stage_cve_scanning"))
        .await
        {
-            Ok(Ok(alerts)) => alerts,
-            Ok(Err(e)) => {
+            Ok(alerts) => alerts,
+            Err(e) => {
                tracing::warn!("[{repo_id}] CVE scanning failed: {e}");
                Vec::new()
            }
-            Err(_) => {
-                tracing::warn!("[{repo_id}] CVE scanning timed out after 10 minutes");
-                Vec::new()
-            }
        };

        // Stage 4: Pattern Scanning (GDPR + OAuth)
@@ -6,16 +6,11 @@ use compliance_core::models::embedding::{CodeEmbedding, EmbeddingBuildRun, Embed
 use compliance_core::models::graph::CodeNode;
 use compliance_graph::graph::chunking::extract_chunks;
 use compliance_graph::graph::embedding_store::EmbeddingStore;
-use futures_util::stream::{FuturesUnordered, StreamExt};
 use tracing::{error, info};

 use crate::error::AgentError;
 use crate::llm::LlmClient;

-const EMBED_BATCH_SIZE: usize = 20;
-const EMBED_CONCURRENCY: usize = 4;
-const EMBED_FLUSH_EVERY: usize = 200;
-
 /// RAG pipeline for building embeddings and performing retrieval
 pub struct RagPipeline {
    llm: Arc<LlmClient>,
@@ -82,33 +77,25 @@ impl RagPipeline {
            .await
            .map_err(|e| AgentError::Other(format!("Failed to delete old embeddings: {e}")))?;

-        // Step 3: Batch embed with bounded concurrency. Flush to Mongo and
-        // update progress periodically so the dashboard can show live status.
-        let mut pending = Vec::with_capacity(EMBED_FLUSH_EVERY);
+        // Step 3: Batch embed (small batches to stay within model limits)
+        let batch_size = 20;
+        let mut all_embeddings = Vec::new();
        let mut embedded_count = 0u32;

-        // Build the list of batch indices to process.
-        let batches: Vec<(usize, usize)> = (0..chunks.len())
-            .step_by(EMBED_BATCH_SIZE)
-            .map(|start| (start, (start + EMBED_BATCH_SIZE).min(chunks.len())))
+        for batch_start in (0..chunks.len()).step_by(batch_size) {
+            let batch_end = (batch_start + batch_size).min(chunks.len());
+            let batch_chunks = &chunks[batch_start..batch_end];
+
+            // Prepare texts: context_header + content
+            let texts: Vec<String> = batch_chunks
+                .iter()
+                .map(|c| format!("{}\n{}", c.context_header, c.content))
                .collect();

-        let mut batch_iter = batches.into_iter();
-        let mut in_flight = FuturesUnordered::new();
-
-        // Prime up to EMBED_CONCURRENCY batches.
-        for _ in 0..EMBED_CONCURRENCY {
-            if let Some((start, end)) = batch_iter.next() {
-                in_flight.push(self.embed_batch(&chunks[start..end], start, end));
-            }
-        }
-
-        while let Some(result) = in_flight.next().await {
-            match result {
-                Ok((start, end, vectors)) => {
-                    let batch_chunks = &chunks[start..end];
+            match self.llm.embed(texts).await {
+                Ok(vectors) => {
                    for (chunk, embedding) in batch_chunks.iter().zip(vectors) {
-                        pending.push(CodeEmbedding {
+                        all_embeddings.push(CodeEmbedding {
                            id: None,
                            repo_id: repo_id.to_string(),
                            graph_build_id: graph_build_id.to_string(),
@@ -126,45 +113,9 @@ impl RagPipeline {
                        });
                    }
                    embedded_count += batch_chunks.len() as u32;
-
-                    // Flush pending embeddings to Mongo periodically and update progress.
-                    if pending.len() >= EMBED_FLUSH_EVERY {
-                        self.embedding_store
-                            .store_embeddings(&pending)
-                            .await
-                            .map_err(|e| {
-                                AgentError::Other(format!("Failed to store embeddings: {e}"))
-                            })?;
-                        pending.clear();
-                    }
-
-                    // Always update the progress counter on the build doc — even if
-                    // we haven't flushed embeddings yet — so the UI shows movement.
-                    if let Err(e) = self
-                        .embedding_store
-                        .update_build(
-                            repo_id,
-                            graph_build_id,
-                            EmbeddingBuildStatus::Running,
-                            embedded_count,
-                            None,
-                        )
-                        .await
-                    {
-                        error!("[{repo_id}] Failed to update build progress: {e}");
-                    }
-
-                    // Queue the next batch to keep concurrency saturated.
-                    if let Some((s, e)) = batch_iter.next() {
-                        in_flight.push(self.embed_batch(&chunks[s..e], s, e));
-                    }
                }
                Err(e) => {
                    error!("[{repo_id}] Embedding batch failed: {e}");
-                    // Flush whatever we have so partial progress isn't lost.
-                    if !pending.is_empty() {
-                        let _ = self.embedding_store.store_embeddings(&pending).await;
-                    }
                    build.status = EmbeddingBuildStatus::Failed;
                    build.error_message = Some(e.to_string());
                    build.completed_at = Some(Utc::now());
@@ -183,13 +134,11 @@ impl RagPipeline {
            }
        }

-        // Step 4: Flush any remaining embeddings
-        if !pending.is_empty() {
+        // Step 4: Store all embeddings
        self.embedding_store
-                .store_embeddings(&pending)
+            .store_embeddings(&all_embeddings)
            .await
            .map_err(|e| AgentError::Other(format!("Failed to store embeddings: {e}")))?;
-        }

        // Step 5: Update build status
        build.status = EmbeddingBuildStatus::Completed;
@@ -212,21 +161,4 @@ impl RagPipeline {
        );
        Ok(build)
    }
-
-    /// Embed one batch of chunks. Returns the (start, end, vectors) tuple so
-    /// out-of-order completion from `FuturesUnordered` can still be reconciled
-    /// against the original chunk slice.
-    async fn embed_batch(
-        &self,
-        batch_chunks: &[compliance_graph::graph::chunking::CodeChunk],
-        start: usize,
-        end: usize,
-    ) -> Result<(usize, usize, Vec<Vec<f64>>), AgentError> {
-        let texts: Vec<String> = batch_chunks
-            .iter()
-            .map(|c| format!("{}\n{}", c.context_header, c.content))
-            .collect();
-        let vectors = self.llm.embed(texts).await?;
-        Ok((start, end, vectors))
-    }
 }
Author	SHA1	Message	Date
Sharang Parnerkar	3edd1d50ac	fix: resolve cargo audit failures CI / Check (pull_request) Successful in 10m35s Details CI / Detect Changes (pull_request) Has been skipped Details CI / Deploy Agent (pull_request) Has been cancelled Details CI / Deploy Dashboard (pull_request) Has been cancelled Details CI / Deploy Docs (pull_request) Has been cancelled Details CI / Deploy MCP (pull_request) Has been cancelled Details - Update rustls-webpki 0.103.10 → 0.103.13 (fixes RUSTSEC-2026-0098, RUSTSEC-2026-0099, RUSTSEC-2026-0104) - Update mongodb 3.5.1 → 3.6.0 (latest compatible 3.x) - Add .cargo/audit.toml ignoring two hickory-proto advisories that cannot be fixed: mongodb 3.x pins hickory-resolver 0.25.x which pins hickory-proto 0.25.x; RUSTSEC-2026-0118 has no upstream fix at all, RUSTSEC-2026-0119 requires hickory-proto >=0.26.1 which mongodb does not yet support. Both are DNS-layer DoS vectors requiring control of the DNS server responding to MongoDB's hostname resolution. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-05-12 12:47:16 +02:00
Sharang Parnerkar	9ff3b9305c	fix: restore syft remote license lookup env vars CI / Check (pull_request) Failing after 5m50s Details CI / Detect Changes (pull_request) Has been skipped Details CI / Deploy Agent (pull_request) Has been skipped Details CI / Deploy Dashboard (pull_request) Has been skipped Details CI / Deploy Docs (pull_request) Has been skipped Details CI / Deploy MCP (pull_request) Has been skipped Details Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-05-12 11:58:21 +02:00
Sharang Parnerkar	e02266511a	fix: add timeouts to scanners, cap semgrep memory, remove syft remote lookups, fix Script error CI / Check (pull_request) Has been cancelled Details CI / Detect Changes (pull_request) Has been cancelled Details CI / Deploy Agent (pull_request) Has been cancelled Details CI / Deploy Dashboard (pull_request) Has been cancelled Details CI / Deploy Docs (pull_request) Has been cancelled Details CI / Deploy MCP (pull_request) Has been cancelled Details Semgrep was running unbounded with --config=auto (downloads all rules) and no memory cap, making it likely to get OOM-killed in resource-constrained Orca containers. Syft had remote license lookups enabled which adds network calls and memory overhead. Neither had timeouts, so a hung process would stall the entire scan indefinitely and silently produce 0 results. - semgrep: add --max-memory 500 --jobs 1 and a 10-minute timeout - syft: remove remote license lookup env vars, add 5-minute timeout - gitleaks: add 5-minute timeout - dashboard: fix Script dangerous_inner_html -> text child (Dioxus 0.7 Script element requires a single text node child, not dangerous_inner_html — was spamming error logs) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-05-12 11:49:46 +02:00