compliance-scanner-agent/compliance-agent/src/api/handlers/chat.rs

use std::sync::Arc;

use axum::extract::{Extension, Path};
use axum::http::StatusCode;
use axum::Json;
use mongodb::bson::doc;

use compliance_core::models::chat::{ChatRequest, ChatResponse, SourceReference};
use compliance_core::models::embedding::EmbeddingBuildRun;
use compliance_graph::graph::embedding_store::EmbeddingStore;

use crate::agent::ComplianceAgent;
use crate::rag::pipeline::RagPipeline;

use super::ApiResponse;

type AgentExt = Extension<Arc<ComplianceAgent>>;

/// POST /api/v1/chat/:repo_id — Send a chat message with RAG context
pub async fn chat(
    Extension(agent): AgentExt,
    Path(repo_id): Path<String>,
    Json(req): Json<ChatRequest>,
) -> Result<Json<ApiResponse<ChatResponse>>, StatusCode> {
    let pipeline = RagPipeline::new(agent.llm.clone(), agent.db.inner());

    // Step 1: Embed the user's message
    let query_vectors = agent
        .llm
        .embed(vec![req.message.clone()])
        .await
        .map_err(|e| {
            tracing::error!("Failed to embed query: {e}");
            StatusCode::INTERNAL_SERVER_ERROR
        })?;

    let query_embedding = query_vectors.into_iter().next().ok_or_else(|| {
        tracing::error!("Empty embedding response");
        StatusCode::INTERNAL_SERVER_ERROR
    })?;

    // Step 2: Vector search — retrieve top 8 chunks
    let search_results = pipeline
        .store()
        .vector_search(&repo_id, query_embedding, 8, 0.5)
        .await
        .map_err(|e| {
            tracing::error!("Vector search failed: {e}");
            StatusCode::INTERNAL_SERVER_ERROR
        })?;

    // Step 3: Build system prompt with code context
    let mut context_parts = Vec::new();
    let mut sources = Vec::new();

    for (embedding, score) in &search_results {
        context_parts.push(format!(
            "--- {} ({}, {}:L{}-L{}) ---\n{}",
            embedding.qualified_name,
            embedding.kind,
            embedding.file_path,
            embedding.start_line,
            embedding.end_line,
            embedding.content,
        ));

        // Truncate snippet for the response
        let snippet: String = embedding
            .content
            .lines()
            .take(10)
            .collect::<Vec<_>>()
            .join("\n");
        sources.push(SourceReference {
            file_path: embedding.file_path.clone(),
            qualified_name: embedding.qualified_name.clone(),
            start_line: embedding.start_line,
            end_line: embedding.end_line,
            language: embedding.language.clone(),
            snippet,
            score: *score,
        });
    }

    let code_context = if context_parts.is_empty() {
        "No relevant code context found.".to_string()
    } else {
        context_parts.join("\n\n")
    };

    let system_prompt = format!(
        "You are an expert code assistant for a software repository. \
         Answer the user's question based on the code context below. \
         Reference specific files and functions when relevant. \
         If the context doesn't contain enough information, say so.\n\n\
         ## Code Context\n\n{code_context}"
    );

    // Step 4: Build messages array with history
    let mut messages: Vec<(String, String)> = Vec::new();
    messages.push(("system".to_string(), system_prompt));

    for msg in &req.history {
        messages.push((msg.role.clone(), msg.content.clone()));
    }
    messages.push(("user".to_string(), req.message));

    // Step 5: Call LLM
    let response_text = agent
        .llm
        .chat_with_messages(messages, Some(0.3))
        .await
        .map_err(|e| {
            tracing::error!("LLM chat failed: {e}");
            StatusCode::INTERNAL_SERVER_ERROR
        })?;

    Ok(Json(ApiResponse {
        data: ChatResponse {
            message: response_text,
            sources,
        },
        total: None,
        page: None,
    }))
}

/// POST /api/v1/chat/:repo_id/build-embeddings — Trigger embedding build
pub async fn build_embeddings(
    Extension(agent): AgentExt,
    Path(repo_id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
    let agent_clone = (*agent).clone();
    tokio::spawn(async move {
        let repo = match agent_clone
            .db
            .repositories()
            .find_one(doc! { "_id": mongodb::bson::oid::ObjectId::parse_str(&repo_id).ok() })
            .await
        {
            Ok(Some(r)) => r,
            _ => {
                tracing::error!("Repository {repo_id} not found for embedding build");
                return;
            }
        };

        // Get latest graph build
        let build = match agent_clone
            .db
            .graph_builds()
            .find_one(doc! { "repo_id": &repo_id })
            .sort(doc! { "started_at": -1 })
            .await
        {
            Ok(Some(b)) => b,
            _ => {
                tracing::error!("[{repo_id}] No graph build found — build graph first");
                return;
            }
        };

        let graph_build_id = build
            .id
            .map(|id| id.to_hex())
            .unwrap_or_else(|| "unknown".to_string());

        // Get nodes
        let nodes: Vec<compliance_core::models::graph::CodeNode> = match agent_clone
            .db
            .graph_nodes()
            .find(doc! { "repo_id": &repo_id })
            .await
        {
            Ok(cursor) => {
                use futures_util::StreamExt;
                let mut items = Vec::new();
                let mut cursor = cursor;
                while let Some(Ok(item)) = cursor.next().await {
                    items.push(item);
                }
                items
            }
            Err(e) => {
                tracing::error!("[{repo_id}] Failed to fetch nodes: {e}");
                return;
            }
        };

        let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
        let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
            Ok(p) => p,
            Err(e) => {
                tracing::error!("Failed to clone repo for embedding build: {e}");
                return;
            }
        };

        let pipeline = RagPipeline::new(agent_clone.llm.clone(), agent_clone.db.inner());
        match pipeline
            .build_embeddings(&repo_id, &repo_path, &graph_build_id, &nodes)
            .await
        {
            Ok(run) => {
                tracing::info!(
                    "[{repo_id}] Embedding build complete: {}/{} chunks",
                    run.embedded_chunks,
                    run.total_chunks
                );
            }
            Err(e) => {
                tracing::error!("[{repo_id}] Embedding build failed: {e}");
            }
        }
    });

    Ok(Json(
        serde_json::json!({ "status": "embedding_build_triggered" }),
    ))
}

/// GET /api/v1/chat/:repo_id/status — Get latest embedding build status
pub async fn embedding_status(
    Extension(agent): AgentExt,
    Path(repo_id): Path<String>,
) -> Result<Json<ApiResponse<Option<EmbeddingBuildRun>>>, StatusCode> {
    let store = EmbeddingStore::new(agent.db.inner());
    let build = store.get_latest_build(&repo_id).await.map_err(|e| {
        tracing::error!("Failed to get embedding status: {e}");
        StatusCode::INTERNAL_SERVER_ERROR
    })?;

    Ok(Json(ApiResponse {
        data: build,
        total: None,
        page: None,
    }))
}