compliance-scanner-agent/compliance-agent/src/api/handlers/help_chat.rs

use std::path::{Path, PathBuf};
use std::sync::OnceLock;

use axum::extract::Extension;
use axum::http::StatusCode;
use axum::Json;
use serde::{Deserialize, Serialize};
use walkdir::WalkDir;

use super::dto::{AgentExt, ApiResponse};

// ── DTOs ─────────────────────────────────────────────────────────────────────

#[derive(Debug, Deserialize)]
pub struct HelpChatMessage {
    pub role: String,
    pub content: String,
}

#[derive(Debug, Deserialize)]
pub struct HelpChatRequest {
    pub message: String,
    #[serde(default)]
    pub history: Vec<HelpChatMessage>,
}

#[derive(Debug, Serialize)]
pub struct HelpChatResponse {
    pub message: String,
}

// ── Doc cache ────────────────────────────────────────────────────────────────

static DOC_CONTEXT: OnceLock<String> = OnceLock::new();

/// Walk upward from `start` until we find a directory containing both
/// `README.md` and a `docs/` subdirectory.
fn find_project_root(start: &Path) -> Option<PathBuf> {
    let mut current = start.to_path_buf();
    loop {
        if current.join("README.md").is_file() && current.join("docs").is_dir() {
            return Some(current);
        }
        if !current.pop() {
            return None;
        }
    }
}

/// Read README.md + all docs/**/*.md (excluding node_modules).
fn load_docs(root: &Path) -> String {
    let mut parts: Vec<String> = Vec::new();

    // Root README first
    if let Ok(content) = std::fs::read_to_string(root.join("README.md")) {
        parts.push(format!("<!-- file: README.md -->\n{content}"));
    }

    // docs/**/*.md, skipping node_modules
    for entry in WalkDir::new(root.join("docs"))
        .follow_links(false)
        .into_iter()
        .filter_entry(|e| {
            !e.path()
                .components()
                .any(|c| c.as_os_str() == "node_modules")
        })
        .filter_map(|e| e.ok())
    {
        let path = entry.path();
        if !path.is_file() {
            continue;
        }
        if path
            .extension()
            .and_then(|s| s.to_str())
            .map(|s| !s.eq_ignore_ascii_case("md"))
            .unwrap_or(true)
        {
            continue;
        }

        let rel = path.strip_prefix(root).unwrap_or(path);
        if let Ok(content) = std::fs::read_to_string(path) {
            parts.push(format!("<!-- file: {} -->\n{content}", rel.display()));
        }
    }

    if parts.is_empty() {
        tracing::warn!(
            "help_chat: no documentation files found under {}",
            root.display()
        );
    } else {
        tracing::info!(
            "help_chat: loaded {} documentation file(s) from {}",
            parts.len(),
            root.display()
        );
    }

    parts.join("\n\n---\n\n")
}

/// Returns a reference to the cached doc context string, initialised on
/// first call via `OnceLock`.
///
/// Discovery order:
/// 1. `HELP_DOCS_PATH` env var (explicit override)
/// 2. Walk up from the binary location
/// 3. Current working directory
/// 4. Common Docker paths (/app, /opt/compliance-scanner)
fn doc_context() -> &'static str {
    DOC_CONTEXT.get_or_init(|| {
        // 1. Explicit env var
        if let Ok(path) = std::env::var("HELP_DOCS_PATH") {
            let p = PathBuf::from(&path);
            if p.join("README.md").is_file() || p.join("docs").is_dir() {
                tracing::info!("help_chat: loading docs from HELP_DOCS_PATH={path}");
                return load_docs(&p);
            }
            tracing::warn!("help_chat: HELP_DOCS_PATH={path} has no README.md or docs/");
        }

        // 2. Walk up from binary location
        let start = std::env::current_exe()
            .ok()
            .and_then(|p| p.parent().map(Path::to_path_buf))
            .unwrap_or_else(|| PathBuf::from("."));

        if let Some(root) = find_project_root(&start) {
            return load_docs(&root);
        }

        // 3. Current working directory
        if let Ok(cwd) = std::env::current_dir() {
            if let Some(root) = find_project_root(&cwd) {
                return load_docs(&root);
            }
            if cwd.join("README.md").is_file() {
                return load_docs(&cwd);
            }
        }

        // 4. Common Docker/deployment paths
        for candidate in ["/app", "/opt/compliance-scanner", "/srv/compliance-scanner"] {
            let p = PathBuf::from(candidate);
            if p.join("README.md").is_file() || p.join("docs").is_dir() {
                tracing::info!("help_chat: found docs at {candidate}");
                return load_docs(&p);
            }
        }

        tracing::error!(
            "help_chat: could not locate project root; doc context will be empty. \
             Set HELP_DOCS_PATH to the directory containing README.md and docs/"
        );
        String::new()
    })
}

// ── Handler ──────────────────────────────────────────────────────────────────

/// POST /api/v1/help/chat — Answer questions about the compliance-scanner
/// using the project documentation as grounding context.
#[tracing::instrument(skip_all)]
pub async fn help_chat(
    Extension(agent): AgentExt,
    Json(req): Json<HelpChatRequest>,
) -> Result<Json<ApiResponse<HelpChatResponse>>, StatusCode> {
    let context = doc_context();

    let system_prompt = if context.is_empty() {
        "You are a helpful assistant for the Compliance Scanner project. \
         Answer questions about how to use and configure it. \
         No documentation was loaded at startup, so rely on your general knowledge."
            .to_string()
    } else {
        format!(
            "You are a helpful assistant for the Compliance Scanner project. \
             Answer questions about how to use, configure, and understand it \
             using the documentation below as your primary source of truth.\n\n\
             Rules:\n\
             - Prefer information from the provided docs over general knowledge\n\
             - Quote or reference the relevant doc section when it helps\n\
             - If the docs do not cover the topic, say so clearly\n\
             - Be concise — lead with the answer, then explain if needed\n\
             - Use markdown formatting for readability\n\n\
             ## Project Documentation\n\n{context}"
        )
    };

    let mut messages: Vec<(String, String)> = Vec::with_capacity(req.history.len() + 2);
    messages.push(("system".to_string(), system_prompt));

    for msg in &req.history {
        messages.push((msg.role.clone(), msg.content.clone()));
    }
    messages.push(("user".to_string(), req.message));

    let response_text = agent
        .llm
        .chat_with_messages(messages, Some(0.3))
        .await
        .map_err(|e| {
            tracing::error!("LLM help chat failed: {e}");
            StatusCode::INTERNAL_SERVER_ERROR
        })?;

    Ok(Json(ApiResponse {
        data: HelpChatResponse {
            message: response_text,
        },
        total: None,
        page: None,
    }))
}