Add RAG embedding and AI chat feature

Implement end-to-end RAG pipeline: AST-aware code chunking, LiteLLM embedding generation, MongoDB vector storage with brute-force cosine similarity fallback for self-hosted instances, and a chat API with RAG-augmented responses. Add dedicated /chat/:repo_id dashboard page with embedding build controls, message history, and source reference cards. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 23:29:40 +01:00
parent db454867f3
commit 89c30a62dd
25 changed files with 1692 additions and 25 deletions
--- a/compliance-agent/src/api/handlers/chat.rs
+++ b/compliance-agent/src/api/handlers/chat.rs
@@ -0,0 +1,238 @@
+use std::sync::Arc;
+
+use axum::extract::{Extension, Path};
+use axum::http::StatusCode;
+use axum::Json;
+use mongodb::bson::doc;
+
+use compliance_core::models::chat::{ChatRequest, ChatResponse, SourceReference};
+use compliance_core::models::embedding::EmbeddingBuildRun;
+use compliance_graph::graph::embedding_store::EmbeddingStore;
+
+use crate::agent::ComplianceAgent;
+use crate::rag::pipeline::RagPipeline;
+
+use super::ApiResponse;
+
+type AgentExt = Extension<Arc<ComplianceAgent>>;
+
+/// POST /api/v1/chat/:repo_id — Send a chat message with RAG context
+pub async fn chat(
+    Extension(agent): AgentExt,
+    Path(repo_id): Path<String>,
+    Json(req): Json<ChatRequest>,
+) -> Result<Json<ApiResponse<ChatResponse>>, StatusCode> {
+    let pipeline = RagPipeline::new(agent.llm.clone(), agent.db.inner());
+
+    // Step 1: Embed the user's message
+    let query_vectors = agent
+        .llm
+        .embed(vec![req.message.clone()])
+        .await
+        .map_err(|e| {
+            tracing::error!("Failed to embed query: {e}");
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+
+    let query_embedding = query_vectors.into_iter().next().ok_or_else(|| {
+        tracing::error!("Empty embedding response");
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    // Step 2: Vector search — retrieve top 8 chunks
+    let search_results = pipeline
+        .store()
+        .vector_search(&repo_id, query_embedding, 8, 0.5)
+        .await
+        .map_err(|e| {
+            tracing::error!("Vector search failed: {e}");
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+
+    // Step 3: Build system prompt with code context
+    let mut context_parts = Vec::new();
+    let mut sources = Vec::new();
+
+    for (embedding, score) in &search_results {
+        context_parts.push(format!(
+            "--- {} ({}, {}:L{}-L{}) ---\n{}",
+            embedding.qualified_name,
+            embedding.kind,
+            embedding.file_path,
+            embedding.start_line,
+            embedding.end_line,
+            embedding.content,
+        ));
+
+        // Truncate snippet for the response
+        let snippet: String = embedding
+            .content
+            .lines()
+            .take(10)
+            .collect::<Vec<_>>()
+            .join("\n");
+        sources.push(SourceReference {
+            file_path: embedding.file_path.clone(),
+            qualified_name: embedding.qualified_name.clone(),
+            start_line: embedding.start_line,
+            end_line: embedding.end_line,
+            language: embedding.language.clone(),
+            snippet,
+            score: *score,
+        });
+    }
+
+    let code_context = if context_parts.is_empty() {
+        "No relevant code context found.".to_string()
+    } else {
+        context_parts.join("\n\n")
+    };
+
+    let system_prompt = format!(
+        "You are an expert code assistant for a software repository. \
+         Answer the user's question based on the code context below. \
+         Reference specific files and functions when relevant. \
+         If the context doesn't contain enough information, say so.\n\n\
+         ## Code Context\n\n{code_context}"
+    );
+
+    // Step 4: Build messages array with history
+    let mut messages: Vec<(String, String)> = Vec::new();
+    messages.push(("system".to_string(), system_prompt));
+
+    for msg in &req.history {
+        messages.push((msg.role.clone(), msg.content.clone()));
+    }
+    messages.push(("user".to_string(), req.message));
+
+    // Step 5: Call LLM
+    let response_text = agent
+        .llm
+        .chat_with_messages(messages, Some(0.3))
+        .await
+        .map_err(|e| {
+            tracing::error!("LLM chat failed: {e}");
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+
+    Ok(Json(ApiResponse {
+        data: ChatResponse {
+            message: response_text,
+            sources,
+        },
+        total: None,
+        page: None,
+    }))
+}
+
+/// POST /api/v1/chat/:repo_id/build-embeddings — Trigger embedding build
+pub async fn build_embeddings(
+    Extension(agent): AgentExt,
+    Path(repo_id): Path<String>,
+) -> Result<Json<serde_json::Value>, StatusCode> {
+    let agent_clone = (*agent).clone();
+    tokio::spawn(async move {
+        let repo = match agent_clone
+            .db
+            .repositories()
+            .find_one(doc! { "_id": mongodb::bson::oid::ObjectId::parse_str(&repo_id).ok() })
+            .await
+        {
+            Ok(Some(r)) => r,
+            _ => {
+                tracing::error!("Repository {repo_id} not found for embedding build");
+                return;
+            }
+        };
+
+        // Get latest graph build
+        let build = match agent_clone
+            .db
+            .graph_builds()
+            .find_one(doc! { "repo_id": &repo_id })
+            .sort(doc! { "started_at": -1 })
+            .await
+        {
+            Ok(Some(b)) => b,
+            _ => {
+                tracing::error!("[{repo_id}] No graph build found — build graph first");
+                return;
+            }
+        };
+
+        let graph_build_id = build
+            .id
+            .map(|id| id.to_hex())
+            .unwrap_or_else(|| "unknown".to_string());
+
+        // Get nodes
+        let nodes: Vec<compliance_core::models::graph::CodeNode> = match agent_clone
+            .db
+            .graph_nodes()
+            .find(doc! { "repo_id": &repo_id })
+            .await
+        {
+            Ok(cursor) => {
+                use futures_util::StreamExt;
+                let mut items = Vec::new();
+                let mut cursor = cursor;
+                while let Some(Ok(item)) = cursor.next().await {
+                    items.push(item);
+                }
+                items
+            }
+            Err(e) => {
+                tracing::error!("[{repo_id}] Failed to fetch nodes: {e}");
+                return;
+            }
+        };
+
+        let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
+        let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
+            Ok(p) => p,
+            Err(e) => {
+                tracing::error!("Failed to clone repo for embedding build: {e}");
+                return;
+            }
+        };
+
+        let pipeline = RagPipeline::new(agent_clone.llm.clone(), agent_clone.db.inner());
+        match pipeline
+            .build_embeddings(&repo_id, &repo_path, &graph_build_id, &nodes)
+            .await
+        {
+            Ok(run) => {
+                tracing::info!(
+                    "[{repo_id}] Embedding build complete: {}/{} chunks",
+                    run.embedded_chunks,
+                    run.total_chunks
+                );
+            }
+            Err(e) => {
+                tracing::error!("[{repo_id}] Embedding build failed: {e}");
+            }
+        }
+    });
+
+    Ok(Json(
+        serde_json::json!({ "status": "embedding_build_triggered" }),
+    ))
+}
+
+/// GET /api/v1/chat/:repo_id/status — Get latest embedding build status
+pub async fn embedding_status(
+    Extension(agent): AgentExt,
+    Path(repo_id): Path<String>,
+) -> Result<Json<ApiResponse<Option<EmbeddingBuildRun>>>, StatusCode> {
+    let store = EmbeddingStore::new(agent.db.inner());
+    let build = store.get_latest_build(&repo_id).await.map_err(|e| {
+        tracing::error!("Failed to get embedding status: {e}");
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    Ok(Json(ApiResponse {
+        data: build,
+        total: None,
+        page: None,
+    }))
+}