certifai/src/infrastructure/llm.rs

use dioxus::prelude::*;

#[cfg(feature = "server")]
mod inner {
    use serde::{Deserialize, Serialize};

    /// A single message in the OpenAI-compatible chat format used by Ollama.
    #[derive(Serialize)]
    pub(super) struct ChatMessage {
        pub role: String,
        pub content: String,
    }

    /// Request body for Ollama's OpenAI-compatible chat completions endpoint.
    #[derive(Serialize)]
    pub(super) struct OllamaChatRequest {
        pub model: String,
        pub messages: Vec<ChatMessage>,
        /// Disable streaming so we get a single JSON response.
        pub stream: bool,
    }

    /// A single choice in the Ollama chat completions response.
    #[derive(Deserialize)]
    pub(super) struct ChatChoice {
        pub message: ChatResponseMessage,
    }

    /// The assistant message returned inside a choice.
    #[derive(Deserialize)]
    pub(super) struct ChatResponseMessage {
        pub content: String,
    }

    /// Top-level response from Ollama's `/v1/chat/completions` endpoint.
    #[derive(Deserialize)]
    pub(super) struct OllamaChatResponse {
        pub choices: Vec<ChatChoice>,
    }

    /// Fetch the full text content of a webpage by downloading its HTML
    /// and extracting the main article body, skipping navigation, headers,
    /// footers, and sidebars.
    ///
    /// Uses a tiered extraction strategy:
    /// 1. Try content within `<article>`, `<main>`, or `[role="main"]`
    /// 2. Fall back to all `<p>` tags outside excluded containers
    ///
    /// # Arguments
    ///
    /// * `url` - The article URL to fetch
    ///
    /// # Returns
    ///
    /// The extracted text, or `None` if the fetch/parse fails.
    /// Text is capped at 8000 characters to stay within LLM context limits.
    pub(super) async fn fetch_article_text(url: &str) -> Option<String> {
        let client = reqwest::Client::builder()
            .timeout(std::time::Duration::from_secs(10))
            .build()
            .ok()?;

        let resp = client
            .get(url)
            .header("User-Agent", "CERTifAI/1.0 (Article Summarizer)")
            .send()
            .await
            .ok()?;

        if !resp.status().is_success() {
            return None;
        }

        let html = resp.text().await.ok()?;
        let document = scraper::Html::parse_document(&html);

        // Strategy 1: Extract from semantic article containers.
        // Most news sites wrap the main content in <article>, <main>,
        // or an element with role="main".
        let article_selector = scraper::Selector::parse("article, main, [role='main']").ok()?;
        let paragraph_sel = scraper::Selector::parse("p, h1, h2, h3, li").ok()?;

        let mut text_parts: Vec<String> = Vec::with_capacity(64);

        for container in document.select(&article_selector) {
            for element in container.select(&paragraph_sel) {
                collect_text_fragment(element, &mut text_parts);
            }
        }

        // Strategy 2: If article containers yielded little text, fall back
        // to all <p> tags that are NOT inside nav/header/footer/aside.
        if joined_len(&text_parts) < 200 {
            text_parts.clear();
            let all_p = scraper::Selector::parse("p").ok()?;

            // Tags whose descendants should be excluded from extraction
            const EXCLUDED_TAGS: &[&str] = &["nav", "header", "footer", "aside", "script", "style"];

            for element in document.select(&all_p) {
                // Walk ancestors and skip if inside an excluded container.
                // Checks tag names directly to avoid ego_tree version issues.
                let inside_excluded = element.ancestors().any(|ancestor| {
                    ancestor
                        .value()
                        .as_element()
                        .is_some_and(|el| EXCLUDED_TAGS.contains(&el.name.local.as_ref()))
                });
                if !inside_excluded {
                    collect_text_fragment(element, &mut text_parts);
                }
            }
        }

        let full_text = text_parts.join("\n\n");
        if full_text.len() < 100 {
            return None;
        }

        // Cap at 8000 chars to stay within reasonable LLM context
        let truncated: String = full_text.chars().take(8000).collect();
        Some(truncated)
    }

    /// Extract text from an HTML element and append it to the parts list
    /// if it meets a minimum length threshold.
    fn collect_text_fragment(element: scraper::ElementRef<'_>, parts: &mut Vec<String>) {
        let text: String = element.text().collect::<Vec<_>>().join(" ");
        let trimmed = text.trim().to_string();
        // Skip very short fragments (nav items, buttons, etc.)
        if trimmed.len() >= 30 {
            parts.push(trimmed);
        }
    }

    /// Sum the total character length of all collected text parts.
    fn joined_len(parts: &[String]) -> usize {
        parts.iter().map(|s| s.len()).sum()
    }
}

/// Summarize an article using a local Ollama instance.
///
/// First attempts to fetch the full article text from the provided URL.
/// If that fails (paywall, timeout, etc.), falls back to the search snippet.
/// This mirrors how Perplexity fetches and reads source pages before answering.
///
/// # Arguments
///
/// * `snippet` - The search result snippet (fallback content)
/// * `article_url` - The original article URL to fetch full text from
/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434")
/// * `model` - The Ollama model ID to use (e.g. "llama3.1:8b")
///
/// # Returns
///
/// A summary string generated by the LLM, or a `ServerFnError` on failure
///
/// # Errors
///
/// Returns `ServerFnError` if the Ollama request fails or response parsing fails
#[post("/api/summarize")]
pub async fn summarize_article(
    snippet: String,
    article_url: String,
    ollama_url: String,
    model: String,
) -> Result<String, ServerFnError> {
    use inner::{fetch_article_text, ChatMessage, OllamaChatRequest, OllamaChatResponse};

    let state: crate::infrastructure::ServerState =
        dioxus_fullstack::FullstackContext::extract().await?;

    // Use caller-provided values or fall back to ServerState config
    let base_url = if ollama_url.is_empty() {
        state.services.ollama_url.clone()
    } else {
        ollama_url
    };

    let model = if model.is_empty() {
        state.services.ollama_model.clone()
    } else {
        model
    };

    // Try to fetch the full article; fall back to the search snippet
    let article_text = fetch_article_text(&article_url).await.unwrap_or(snippet);

    let request_body = OllamaChatRequest {
        model,
        stream: false,
        messages: vec![ChatMessage {
            role: "user".into(),
            content: format!(
                "You are a news summarizer. Summarize the following article text \
                 in 2-3 concise paragraphs. Focus only on the key points and \
                 implications. Do NOT comment on the source, the date, the URL, \
                 the formatting, or whether the content seems complete or not. \
                 Just summarize whatever content is provided.\n\n\
                 {article_text}"
            ),
        }],
    };

    let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
    let client = reqwest::Client::new();
    let resp = client
        .post(&url)
        .header("content-type", "application/json")
        .json(&request_body)
        .send()
        .await
        .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?;

    if !resp.status().is_success() {
        let status = resp.status();
        let body = resp.text().await.unwrap_or_default();
        return Err(ServerFnError::new(format!(
            "Ollama returned {status}: {body}"
        )));
    }

    let body: OllamaChatResponse = resp
        .json()
        .await
        .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?;

    body.choices
        .first()
        .map(|choice| choice.message.content.clone())
        .ok_or_else(|| ServerFnError::new("Empty response from Ollama"))
}

/// A lightweight chat message for the follow-up conversation.
/// Uses simple String role ("system"/"user"/"assistant") for Ollama compatibility.
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct FollowUpMessage {
    pub role: String,
    pub content: String,
}

/// Send a follow-up question about an article using a local Ollama instance.
///
/// Accepts the full conversation history (system context + prior turns) and
/// returns the assistant's next response. The system message should contain
/// the article text and summary so the LLM has full context.
///
/// # Arguments
///
/// * `messages` - The conversation history including system context
/// * `ollama_url` - Base URL of the Ollama instance
/// * `model` - The Ollama model ID to use
///
/// # Returns
///
/// The assistant's response text, or a `ServerFnError` on failure
///
/// # Errors
///
/// Returns `ServerFnError` if the Ollama request fails or response parsing fails
#[post("/api/chat")]
pub async fn chat_followup(
    messages: Vec<FollowUpMessage>,
    ollama_url: String,
    model: String,
) -> Result<String, ServerFnError> {
    use inner::{ChatMessage, OllamaChatRequest, OllamaChatResponse};

    let state: crate::infrastructure::ServerState =
        dioxus_fullstack::FullstackContext::extract().await?;

    let base_url = if ollama_url.is_empty() {
        state.services.ollama_url.clone()
    } else {
        ollama_url
    };

    let model = if model.is_empty() {
        state.services.ollama_model.clone()
    } else {
        model
    };

    // Convert FollowUpMessage to inner ChatMessage for the request
    let chat_messages: Vec<ChatMessage> = messages
        .into_iter()
        .map(|m| ChatMessage {
            role: m.role,
            content: m.content,
        })
        .collect();

    let request_body = OllamaChatRequest {
        model,
        stream: false,
        messages: chat_messages,
    };

    let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
    let client = reqwest::Client::new();
    let resp = client
        .post(&url)
        .header("content-type", "application/json")
        .json(&request_body)
        .send()
        .await
        .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?;

    if !resp.status().is_success() {
        let status = resp.status();
        let body = resp.text().await.unwrap_or_default();
        return Err(ServerFnError::new(format!(
            "Ollama returned {status}: {body}"
        )));
    }

    let body: OllamaChatResponse = resp
        .json()
        .await
        .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?;

    body.choices
        .first()
        .map(|choice| choice.message.content.clone())
        .ok_or_else(|| ServerFnError::new("Empty response from Ollama"))
}