Compare commits

...

2 Commits

Author SHA1 Message Date
Sharang Parnerkar b96dda11fb fix: live progress + concurrency for embedding builds
CI / Check (pull_request) Successful in 10m34s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
The embedding build progress was only written to MongoDB after every
batch completed (and the final flush + status update only happened at
the very end), so the dashboard would show "0/N chunks (0%)" for the
entire run, then jump straight to "complete." For a repo with 2k+
chunks this looked like the build was stuck.

Three fixes:
- pipeline: call update_build(Running, embedded_count, ...) after each
  batch so /api/v1/chat/:repo_id/status reflects real progress, and
  flush embeddings to Mongo every 200 records so a partial failure does
  not lose everything.
- pipeline: drive batches with FuturesUnordered at concurrency=4 so
  litellm requests overlap instead of going strictly serial (112
  sequential requests for a 2221-chunk repo were the wall-time floor).
- llm client: give the reqwest client a 300s request timeout and 10s
  connect timeout. Previously LlmClient used reqwest::Client::new()
  with no timeout, so a hung embedding call would block the build
  indefinitely.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 11:39:37 +02:00
sharang e67a13535a fix: add HTTP timeout to reqwest client and CVE stage timeout (#79)
CI / Check (push) Has been skipped
CI / Detect Changes (push) Successful in 5s
CI / Deploy Agent (push) Successful in 8m26s
CI / Deploy Dashboard (push) Has been skipped
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Has been skipped
2026-05-13 07:30:26 +00:00
4 changed files with 114 additions and 29 deletions
+6 -1
View File
@@ -35,11 +35,16 @@ impl ComplianceAgent {
config.litellm_model.clone(), config.litellm_model.clone(),
config.litellm_embed_model.clone(), config.litellm_embed_model.clone(),
)); ));
let http = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.connect_timeout(std::time::Duration::from_secs(10))
.build()
.unwrap_or_default();
Self { Self {
config, config,
db, db,
llm, llm,
http: reqwest::Client::new(), http,
session_streams: Arc::new(DashMap::new()), session_streams: Arc::new(DashMap::new()),
session_pause: Arc::new(DashMap::new()), session_pause: Arc::new(DashMap::new()),
session_semaphore: Arc::new(Semaphore::new(DEFAULT_MAX_CONCURRENT_SESSIONS)), session_semaphore: Arc::new(Semaphore::new(DEFAULT_MAX_CONCURRENT_SESSIONS)),
+6 -1
View File
@@ -19,12 +19,17 @@ impl LlmClient {
model: String, model: String,
embed_model: String, embed_model: String,
) -> Self { ) -> Self {
let http = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(300))
.connect_timeout(std::time::Duration::from_secs(10))
.build()
.unwrap_or_default();
Self { Self {
base_url, base_url,
api_key, api_key,
model, model,
embed_model, embed_model,
http: reqwest::Client::new(), http,
} }
} }
+15 -8
View File
@@ -174,19 +174,26 @@ impl PipelineOrchestrator {
k.expose_secret().to_string() k.expose_secret().to_string()
}), }),
); );
let cve_alerts = match async { let cve_alerts = match tokio::time::timeout(
cve_scanner std::time::Duration::from_secs(600),
.scan_dependencies(&repo_id, &mut sbom_entries) async {
.await cve_scanner
} .scan_dependencies(&repo_id, &mut sbom_entries)
.instrument(tracing::info_span!("stage_cve_scanning")) .await
}
.instrument(tracing::info_span!("stage_cve_scanning")),
)
.await .await
{ {
Ok(alerts) => alerts, Ok(Ok(alerts)) => alerts,
Err(e) => { Ok(Err(e)) => {
tracing::warn!("[{repo_id}] CVE scanning failed: {e}"); tracing::warn!("[{repo_id}] CVE scanning failed: {e}");
Vec::new() Vec::new()
} }
Err(_) => {
tracing::warn!("[{repo_id}] CVE scanning timed out after 10 minutes");
Vec::new()
}
}; };
// Stage 4: Pattern Scanning (GDPR + OAuth) // Stage 4: Pattern Scanning (GDPR + OAuth)
+87 -19
View File
@@ -6,11 +6,16 @@ use compliance_core::models::embedding::{CodeEmbedding, EmbeddingBuildRun, Embed
use compliance_core::models::graph::CodeNode; use compliance_core::models::graph::CodeNode;
use compliance_graph::graph::chunking::extract_chunks; use compliance_graph::graph::chunking::extract_chunks;
use compliance_graph::graph::embedding_store::EmbeddingStore; use compliance_graph::graph::embedding_store::EmbeddingStore;
use futures_util::stream::{FuturesUnordered, StreamExt};
use tracing::{error, info}; use tracing::{error, info};
use crate::error::AgentError; use crate::error::AgentError;
use crate::llm::LlmClient; use crate::llm::LlmClient;
const EMBED_BATCH_SIZE: usize = 20;
const EMBED_CONCURRENCY: usize = 4;
const EMBED_FLUSH_EVERY: usize = 200;
/// RAG pipeline for building embeddings and performing retrieval /// RAG pipeline for building embeddings and performing retrieval
pub struct RagPipeline { pub struct RagPipeline {
llm: Arc<LlmClient>, llm: Arc<LlmClient>,
@@ -77,25 +82,33 @@ impl RagPipeline {
.await .await
.map_err(|e| AgentError::Other(format!("Failed to delete old embeddings: {e}")))?; .map_err(|e| AgentError::Other(format!("Failed to delete old embeddings: {e}")))?;
// Step 3: Batch embed (small batches to stay within model limits) // Step 3: Batch embed with bounded concurrency. Flush to Mongo and
let batch_size = 20; // update progress periodically so the dashboard can show live status.
let mut all_embeddings = Vec::new(); let mut pending = Vec::with_capacity(EMBED_FLUSH_EVERY);
let mut embedded_count = 0u32; let mut embedded_count = 0u32;
for batch_start in (0..chunks.len()).step_by(batch_size) { // Build the list of batch indices to process.
let batch_end = (batch_start + batch_size).min(chunks.len()); let batches: Vec<(usize, usize)> = (0..chunks.len())
let batch_chunks = &chunks[batch_start..batch_end]; .step_by(EMBED_BATCH_SIZE)
.map(|start| (start, (start + EMBED_BATCH_SIZE).min(chunks.len())))
.collect();
// Prepare texts: context_header + content let mut batch_iter = batches.into_iter();
let texts: Vec<String> = batch_chunks let mut in_flight = FuturesUnordered::new();
.iter()
.map(|c| format!("{}\n{}", c.context_header, c.content))
.collect();
match self.llm.embed(texts).await { // Prime up to EMBED_CONCURRENCY batches.
Ok(vectors) => { for _ in 0..EMBED_CONCURRENCY {
if let Some((start, end)) = batch_iter.next() {
in_flight.push(self.embed_batch(&chunks[start..end], start, end));
}
}
while let Some(result) = in_flight.next().await {
match result {
Ok((start, end, vectors)) => {
let batch_chunks = &chunks[start..end];
for (chunk, embedding) in batch_chunks.iter().zip(vectors) { for (chunk, embedding) in batch_chunks.iter().zip(vectors) {
all_embeddings.push(CodeEmbedding { pending.push(CodeEmbedding {
id: None, id: None,
repo_id: repo_id.to_string(), repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(), graph_build_id: graph_build_id.to_string(),
@@ -113,9 +126,45 @@ impl RagPipeline {
}); });
} }
embedded_count += batch_chunks.len() as u32; embedded_count += batch_chunks.len() as u32;
// Flush pending embeddings to Mongo periodically and update progress.
if pending.len() >= EMBED_FLUSH_EVERY {
self.embedding_store
.store_embeddings(&pending)
.await
.map_err(|e| {
AgentError::Other(format!("Failed to store embeddings: {e}"))
})?;
pending.clear();
}
// Always update the progress counter on the build doc — even if
// we haven't flushed embeddings yet — so the UI shows movement.
if let Err(e) = self
.embedding_store
.update_build(
repo_id,
graph_build_id,
EmbeddingBuildStatus::Running,
embedded_count,
None,
)
.await
{
error!("[{repo_id}] Failed to update build progress: {e}");
}
// Queue the next batch to keep concurrency saturated.
if let Some((s, e)) = batch_iter.next() {
in_flight.push(self.embed_batch(&chunks[s..e], s, e));
}
} }
Err(e) => { Err(e) => {
error!("[{repo_id}] Embedding batch failed: {e}"); error!("[{repo_id}] Embedding batch failed: {e}");
// Flush whatever we have so partial progress isn't lost.
if !pending.is_empty() {
let _ = self.embedding_store.store_embeddings(&pending).await;
}
build.status = EmbeddingBuildStatus::Failed; build.status = EmbeddingBuildStatus::Failed;
build.error_message = Some(e.to_string()); build.error_message = Some(e.to_string());
build.completed_at = Some(Utc::now()); build.completed_at = Some(Utc::now());
@@ -134,11 +183,13 @@ impl RagPipeline {
} }
} }
// Step 4: Store all embeddings // Step 4: Flush any remaining embeddings
self.embedding_store if !pending.is_empty() {
.store_embeddings(&all_embeddings) self.embedding_store
.await .store_embeddings(&pending)
.map_err(|e| AgentError::Other(format!("Failed to store embeddings: {e}")))?; .await
.map_err(|e| AgentError::Other(format!("Failed to store embeddings: {e}")))?;
}
// Step 5: Update build status // Step 5: Update build status
build.status = EmbeddingBuildStatus::Completed; build.status = EmbeddingBuildStatus::Completed;
@@ -161,4 +212,21 @@ impl RagPipeline {
); );
Ok(build) Ok(build)
} }
/// Embed one batch of chunks. Returns the (start, end, vectors) tuple so
/// out-of-order completion from `FuturesUnordered` can still be reconciled
/// against the original chunk slice.
async fn embed_batch(
&self,
batch_chunks: &[compliance_graph::graph::chunking::CodeChunk],
start: usize,
end: usize,
) -> Result<(usize, usize, Vec<Vec<f64>>), AgentError> {
let texts: Vec<String> = batch_chunks
.iter()
.map(|c| format!("{}\n{}", c.context_header, c.content))
.collect();
let vectors = self.llm.embed(texts).await?;
Ok((start, end, vectors))
}
} }