feat(dashboard): added dashboard content and features (#7)

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #7
2026-02-19 19:23:06 +00:00
parent a588be306a
commit 5399afd748
20 changed files with 3111 additions and 131 deletions
--- a/src/infrastructure/llm.rs
+++ b/src/infrastructure/llm.rs
@@ -0,0 +1,324 @@
+use dioxus::prelude::*;
+
+#[cfg(feature = "server")]
+mod inner {
+    use serde::{Deserialize, Serialize};
+
+    /// A single message in the OpenAI-compatible chat format used by Ollama.
+    #[derive(Serialize)]
+    pub(super) struct ChatMessage {
+        pub role: String,
+        pub content: String,
+    }
+
+    /// Request body for Ollama's OpenAI-compatible chat completions endpoint.
+    #[derive(Serialize)]
+    pub(super) struct OllamaChatRequest {
+        pub model: String,
+        pub messages: Vec<ChatMessage>,
+        /// Disable streaming so we get a single JSON response.
+        pub stream: bool,
+    }
+
+    /// A single choice in the Ollama chat completions response.
+    #[derive(Deserialize)]
+    pub(super) struct ChatChoice {
+        pub message: ChatResponseMessage,
+    }
+
+    /// The assistant message returned inside a choice.
+    #[derive(Deserialize)]
+    pub(super) struct ChatResponseMessage {
+        pub content: String,
+    }
+
+    /// Top-level response from Ollama's `/v1/chat/completions` endpoint.
+    #[derive(Deserialize)]
+    pub(super) struct OllamaChatResponse {
+        pub choices: Vec<ChatChoice>,
+    }
+
+    /// Fetch the full text content of a webpage by downloading its HTML
+    /// and extracting the main article body, skipping navigation, headers,
+    /// footers, and sidebars.
+    ///
+    /// Uses a tiered extraction strategy:
+    /// 1. Try content within `<article>`, `<main>`, or `[role="main"]`
+    /// 2. Fall back to all `<p>` tags outside excluded containers
+    ///
+    /// # Arguments
+    ///
+    /// * `url` - The article URL to fetch
+    ///
+    /// # Returns
+    ///
+    /// The extracted text, or `None` if the fetch/parse fails.
+    /// Text is capped at 8000 characters to stay within LLM context limits.
+    pub(super) async fn fetch_article_text(url: &str) -> Option<String> {
+        let client = reqwest::Client::builder()
+            .timeout(std::time::Duration::from_secs(10))
+            .build()
+            .ok()?;
+
+        let resp = client
+            .get(url)
+            .header("User-Agent", "CERTifAI/1.0 (Article Summarizer)")
+            .send()
+            .await
+            .ok()?;
+
+        if !resp.status().is_success() {
+            return None;
+        }
+
+        let html = resp.text().await.ok()?;
+        let document = scraper::Html::parse_document(&html);
+
+        // Strategy 1: Extract from semantic article containers.
+        // Most news sites wrap the main content in <article>, <main>,
+        // or an element with role="main".
+        let article_selector = scraper::Selector::parse("article, main, [role='main']").ok()?;
+        let paragraph_sel = scraper::Selector::parse("p, h1, h2, h3, li").ok()?;
+
+        let mut text_parts: Vec<String> = Vec::with_capacity(64);
+
+        for container in document.select(&article_selector) {
+            for element in container.select(&paragraph_sel) {
+                collect_text_fragment(element, &mut text_parts);
+            }
+        }
+
+        // Strategy 2: If article containers yielded little text, fall back
+        // to all <p> tags that are NOT inside nav/header/footer/aside.
+        if joined_len(&text_parts) < 200 {
+            text_parts.clear();
+            let all_p = scraper::Selector::parse("p").ok()?;
+
+            // Tags whose descendants should be excluded from extraction
+            const EXCLUDED_TAGS: &[&str] = &["nav", "header", "footer", "aside", "script", "style"];
+
+            for element in document.select(&all_p) {
+                // Walk ancestors and skip if inside an excluded container.
+                // Checks tag names directly to avoid ego_tree version issues.
+                let inside_excluded = element.ancestors().any(|ancestor| {
+                    ancestor
+                        .value()
+                        .as_element()
+                        .is_some_and(|el| EXCLUDED_TAGS.contains(&el.name.local.as_ref()))
+                });
+                if !inside_excluded {
+                    collect_text_fragment(element, &mut text_parts);
+                }
+            }
+        }
+
+        let full_text = text_parts.join("\n\n");
+        if full_text.len() < 100 {
+            return None;
+        }
+
+        // Cap at 8000 chars to stay within reasonable LLM context
+        let truncated: String = full_text.chars().take(8000).collect();
+        Some(truncated)
+    }
+
+    /// Extract text from an HTML element and append it to the parts list
+    /// if it meets a minimum length threshold.
+    fn collect_text_fragment(element: scraper::ElementRef<'_>, parts: &mut Vec<String>) {
+        let text: String = element.text().collect::<Vec<_>>().join(" ");
+        let trimmed = text.trim().to_string();
+        // Skip very short fragments (nav items, buttons, etc.)
+        if trimmed.len() >= 30 {
+            parts.push(trimmed);
+        }
+    }
+
+    /// Sum the total character length of all collected text parts.
+    fn joined_len(parts: &[String]) -> usize {
+        parts.iter().map(|s| s.len()).sum()
+    }
+}
+
+/// Summarize an article using a local Ollama instance.
+///
+/// First attempts to fetch the full article text from the provided URL.
+/// If that fails (paywall, timeout, etc.), falls back to the search snippet.
+/// This mirrors how Perplexity fetches and reads source pages before answering.
+///
+/// # Arguments
+///
+/// * `snippet` - The search result snippet (fallback content)
+/// * `article_url` - The original article URL to fetch full text from
+/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434")
+/// * `model` - The Ollama model ID to use (e.g. "llama3.1:8b")
+///
+/// # Returns
+///
+/// A summary string generated by the LLM, or a `ServerFnError` on failure
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if the Ollama request fails or response parsing fails
+#[server(endpoint = "/api/summarize")]
+pub async fn summarize_article(
+    snippet: String,
+    article_url: String,
+    ollama_url: String,
+    model: String,
+) -> Result<String, ServerFnError> {
+    dotenvy::dotenv().ok();
+    use inner::{fetch_article_text, ChatMessage, OllamaChatRequest, OllamaChatResponse};
+
+    // Fall back to env var or default if the URL is empty
+    let base_url = if ollama_url.is_empty() {
+        std::env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".into())
+    } else {
+        ollama_url
+    };
+
+    // Fall back to env var or default if the model is empty
+    let model = if model.is_empty() {
+        std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama3.1:8b".into())
+    } else {
+        model
+    };
+
+    // Try to fetch the full article; fall back to the search snippet
+    let article_text = fetch_article_text(&article_url).await.unwrap_or(snippet);
+
+    let request_body = OllamaChatRequest {
+        model,
+        stream: false,
+        messages: vec![ChatMessage {
+            role: "user".into(),
+            content: format!(
+                "You are a news summarizer. Summarize the following article text \
+                 in 2-3 concise paragraphs. Focus only on the key points and \
+                 implications. Do NOT comment on the source, the date, the URL, \
+                 the formatting, or whether the content seems complete or not. \
+                 Just summarize whatever content is provided.\n\n\
+                 {article_text}"
+            ),
+        }],
+    };
+
+    let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
+    let client = reqwest::Client::new();
+    let resp = client
+        .post(&url)
+        .header("content-type", "application/json")
+        .json(&request_body)
+        .send()
+        .await
+        .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?;
+
+    if !resp.status().is_success() {
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        return Err(ServerFnError::new(format!(
+            "Ollama returned {status}: {body}"
+        )));
+    }
+
+    let body: OllamaChatResponse = resp
+        .json()
+        .await
+        .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?;
+
+    body.choices
+        .first()
+        .map(|choice| choice.message.content.clone())
+        .ok_or_else(|| ServerFnError::new("Empty response from Ollama"))
+}
+
+/// A lightweight chat message for the follow-up conversation.
+/// Uses simple String role ("system"/"user"/"assistant") for Ollama compatibility.
+#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
+pub struct FollowUpMessage {
+    pub role: String,
+    pub content: String,
+}
+
+/// Send a follow-up question about an article using a local Ollama instance.
+///
+/// Accepts the full conversation history (system context + prior turns) and
+/// returns the assistant's next response. The system message should contain
+/// the article text and summary so the LLM has full context.
+///
+/// # Arguments
+///
+/// * `messages` - The conversation history including system context
+/// * `ollama_url` - Base URL of the Ollama instance
+/// * `model` - The Ollama model ID to use
+///
+/// # Returns
+///
+/// The assistant's response text, or a `ServerFnError` on failure
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if the Ollama request fails or response parsing fails
+#[server(endpoint = "/api/chat")]
+pub async fn chat_followup(
+    messages: Vec<FollowUpMessage>,
+    ollama_url: String,
+    model: String,
+) -> Result<String, ServerFnError> {
+    dotenvy::dotenv().ok();
+    use inner::{ChatMessage, OllamaChatRequest, OllamaChatResponse};
+
+    let base_url = if ollama_url.is_empty() {
+        std::env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".into())
+    } else {
+        ollama_url
+    };
+
+    let model = if model.is_empty() {
+        std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama3.1:8b".into())
+    } else {
+        model
+    };
+
+    // Convert FollowUpMessage to inner ChatMessage for the request
+    let chat_messages: Vec<ChatMessage> = messages
+        .into_iter()
+        .map(|m| ChatMessage {
+            role: m.role,
+            content: m.content,
+        })
+        .collect();
+
+    let request_body = OllamaChatRequest {
+        model,
+        stream: false,
+        messages: chat_messages,
+    };
+
+    let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
+    let client = reqwest::Client::new();
+    let resp = client
+        .post(&url)
+        .header("content-type", "application/json")
+        .json(&request_body)
+        .send()
+        .await
+        .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?;
+
+    if !resp.status().is_success() {
+        let status = resp.status();
+        let body = resp.text().await.unwrap_or_default();
+        return Err(ServerFnError::new(format!(
+            "Ollama returned {status}: {body}"
+        )));
+    }
+
+    let body: OllamaChatResponse = resp
+        .json()
+        .await
+        .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?;
+
+    body.choices
+        .first()
+        .map(|choice| choice.message.content.clone())
+        .ok_or_else(|| ServerFnError::new("Empty response from Ollama"))
+}
--- a/src/infrastructure/mod.rs
+++ b/src/infrastructure/mod.rs
@@ -1,10 +1,24 @@
-#![cfg(feature = "server")]
+// Server function modules (compiled for both web and server features;
+// the #[server] macro generates client stubs for the web target)
+pub mod llm;
+pub mod ollama;
+pub mod searxng;
+
+// Server-only modules (Axum handlers, state, etc.)
+#[cfg(feature = "server")]
 mod auth;
+#[cfg(feature = "server")]
 mod error;
+#[cfg(feature = "server")]
 mod server;
+#[cfg(feature = "server")]
 mod state;

+#[cfg(feature = "server")]
 pub use auth::*;
+#[cfg(feature = "server")]
 pub use error::*;
+#[cfg(feature = "server")]
 pub use server::*;
+#[cfg(feature = "server")]
 pub use state::*;
--- a/src/infrastructure/ollama.rs
+++ b/src/infrastructure/ollama.rs
@@ -0,0 +1,91 @@
+use dioxus::prelude::*;
+use serde::{Deserialize, Serialize};
+
+/// Status of a local Ollama instance, including connectivity and loaded models.
+///
+/// # Fields
+///
+/// * `online` - Whether the Ollama API responded successfully
+/// * `models` - List of model names currently available on the instance
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct OllamaStatus {
+    pub online: bool,
+    pub models: Vec<String>,
+}
+
+/// Response from Ollama's `GET /api/tags` endpoint.
+#[cfg(feature = "server")]
+#[derive(Deserialize)]
+struct OllamaTagsResponse {
+    models: Vec<OllamaModel>,
+}
+
+/// A single model entry from Ollama's tags API.
+#[cfg(feature = "server")]
+#[derive(Deserialize)]
+struct OllamaModel {
+    name: String,
+}
+
+/// Check the status of a local Ollama instance by querying its tags endpoint.
+///
+/// Calls `GET <ollama_url>/api/tags` to list available models and determine
+/// whether the instance is reachable.
+///
+/// # Arguments
+///
+/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434")
+///
+/// # Returns
+///
+/// An `OllamaStatus` with `online: true` and model names if reachable,
+/// or `online: false` with an empty model list on failure
+///
+/// # Errors
+///
+/// Returns `ServerFnError` only on serialization issues; network failures
+/// are caught and returned as `online: false`
+#[server(endpoint = "/api/ollama-status")]
+pub async fn get_ollama_status(ollama_url: String) -> Result<OllamaStatus, ServerFnError> {
+    dotenvy::dotenv().ok();
+
+    let base_url = if ollama_url.is_empty() {
+        std::env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".into())
+    } else {
+        ollama_url
+    };
+
+    let url = format!("{}/api/tags", base_url.trim_end_matches('/'));
+
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(5))
+        .build()
+        .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?;
+
+    let resp = match client.get(&url).send().await {
+        Ok(r) if r.status().is_success() => r,
+        _ => {
+            return Ok(OllamaStatus {
+                online: false,
+                models: Vec::new(),
+            });
+        }
+    };
+
+    let body: OllamaTagsResponse = match resp.json().await {
+        Ok(b) => b,
+        Err(_) => {
+            return Ok(OllamaStatus {
+                online: true,
+                models: Vec::new(),
+            });
+        }
+    };
+
+    let models = body.models.into_iter().map(|m| m.name).collect();
+
+    Ok(OllamaStatus {
+        online: true,
+        models,
+    })
+}
--- a/src/infrastructure/searxng.rs
+++ b/src/infrastructure/searxng.rs
@@ -0,0 +1,285 @@
+use crate::models::NewsCard;
+use dioxus::prelude::*;
+
+// Server-side helpers and types are only needed for the server build.
+// The #[server] macro generates a client stub for the web build that
+// sends a network request instead of executing this function body.
+#[cfg(feature = "server")]
+mod inner {
+    use serde::Deserialize;
+    use std::collections::HashSet;
+
+    /// Individual result from the SearXNG search API.
+    #[derive(Debug, Deserialize)]
+    pub(super) struct SearxngResult {
+        pub title: String,
+        pub url: String,
+        pub content: Option<String>,
+        #[serde(rename = "publishedDate")]
+        pub published_date: Option<String>,
+        pub thumbnail: Option<String>,
+        /// Relevance score assigned by SearXNG (higher = more relevant).
+        #[serde(default)]
+        pub score: f64,
+    }
+
+    /// Top-level response from the SearXNG search API.
+    #[derive(Debug, Deserialize)]
+    pub(super) struct SearxngResponse {
+        pub results: Vec<SearxngResult>,
+    }
+
+    /// Extract the domain name from a URL to use as the source label.
+    ///
+    /// Strips common prefixes like "www." for cleaner display.
+    ///
+    /// # Arguments
+    ///
+    /// * `url_str` - The full URL string
+    ///
+    /// # Returns
+    ///
+    /// The domain host or a fallback "Web" string
+    pub(super) fn extract_source(url_str: &str) -> String {
+        url::Url::parse(url_str)
+            .ok()
+            .and_then(|u| u.host_str().map(String::from))
+            .map(|host| host.strip_prefix("www.").unwrap_or(&host).to_string())
+            .unwrap_or_else(|| "Web".into())
+    }
+
+    /// Deduplicate and rank search results for quality, similar to Perplexity.
+    ///
+    /// Applies the following filters in order:
+    /// 1. Remove results with empty content (no snippet = low value)
+    /// 2. Deduplicate by domain (keep highest-scored result per domain)
+    /// 3. Sort by SearXNG relevance score (descending)
+    /// 4. Cap at `max_results`
+    ///
+    /// # Arguments
+    ///
+    /// * `results` - Raw search results from SearXNG
+    /// * `max_results` - Maximum number of results to return
+    ///
+    /// # Returns
+    ///
+    /// Filtered, deduplicated, and ranked results
+    pub(super) fn rank_and_deduplicate(
+        mut results: Vec<SearxngResult>,
+        max_results: usize,
+    ) -> Vec<SearxngResult> {
+        // Filter out results with no meaningful content
+        results.retain(|r| r.content.as_ref().is_some_and(|c| c.trim().len() >= 20));
+
+        // Sort by score descending so we keep the best result per domain
+        results.sort_by(|a, b| {
+            b.score
+                .partial_cmp(&a.score)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+
+        // Deduplicate by domain: keep only the first (highest-scored) per domain
+        let mut seen_domains = HashSet::new();
+        results.retain(|r| {
+            let domain = extract_source(&r.url);
+            seen_domains.insert(domain)
+        });
+
+        results.truncate(max_results);
+        results
+    }
+}
+
+/// Search for news using the SearXNG meta-search engine.
+///
+/// Uses Perplexity-style query enrichment and result ranking:
+/// - Queries the "news" and "general" categories for fresh, relevant results
+/// - Filters to the last month for recency
+/// - Deduplicates by domain for source diversity
+/// - Ranks by SearXNG relevance score
+/// - Filters out results without meaningful content
+///
+/// # Arguments
+///
+/// * `query` - The search query string
+///
+/// # Returns
+///
+/// Up to 15 high-quality `NewsCard` results, or a `ServerFnError` on failure
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if the SearXNG request fails or response parsing fails
+#[server(endpoint = "/api/search")]
+pub async fn search_topic(query: String) -> Result<Vec<NewsCard>, ServerFnError> {
+    dotenvy::dotenv().ok();
+    use inner::{extract_source, rank_and_deduplicate, SearxngResponse};
+
+    let searxng_url =
+        std::env::var("SEARXNG_URL").unwrap_or_else(|_| "http://localhost:8888".into());
+
+    // Enrich the query with "latest news" context for better results,
+    // similar to how Perplexity reformulates queries before searching.
+    let enriched_query = format!("{query} latest news");
+
+    // Build URL with query parameters using the url crate's encoder
+    // to avoid reqwest version conflicts between our dep and dioxus's.
+    // Key SearXNG params:
+    //   categories=news,general - prioritize news sources + supplement with general
+    //   time_range=month       - only recent results (last 30 days)
+    //   language=en            - English results
+    //   format=json            - machine-readable output
+    let encoded_query: String =
+        url::form_urlencoded::byte_serialize(enriched_query.as_bytes()).collect();
+    let search_url = format!(
+        "{searxng_url}/search?q={encoded_query}&format=json&language=en\
+         &categories=news,general&time_range=month"
+    );
+
+    let client = reqwest::Client::new();
+    let resp = client
+        .get(&search_url)
+        .send()
+        .await
+        .map_err(|e| ServerFnError::new(format!("SearXNG request failed: {e}")))?;
+
+    if !resp.status().is_success() {
+        return Err(ServerFnError::new(format!(
+            "SearXNG returned status {}",
+            resp.status()
+        )));
+    }
+
+    let body: SearxngResponse = resp
+        .json()
+        .await
+        .map_err(|e| ServerFnError::new(format!("Failed to parse SearXNG response: {e}")))?;
+
+    // Apply Perplexity-style ranking: filter empties, deduplicate domains, sort by score
+    let ranked = rank_and_deduplicate(body.results, 15);
+
+    let cards: Vec<NewsCard> = ranked
+        .into_iter()
+        .map(|r| {
+            let summary = r
+                .content
+                .clone()
+                .unwrap_or_default()
+                .chars()
+                .take(200)
+                .collect::<String>();
+            let content = r.content.unwrap_or_default();
+            NewsCard {
+                title: r.title,
+                source: extract_source(&r.url),
+                summary,
+                content,
+                category: query.clone(),
+                url: r.url,
+                thumbnail_url: r.thumbnail,
+                published_at: r.published_date.unwrap_or_else(|| "Recent".into()),
+            }
+        })
+        .collect();
+
+    Ok(cards)
+}
+
+/// Fetch trending topic keywords by running a broad news search and
+/// extracting the most frequent meaningful terms from result titles.
+///
+/// This approach works regardless of whether SearXNG has autocomplete
+/// configured, since it uses the standard search API.
+///
+/// # Returns
+///
+/// Up to 8 trending keyword strings, or a `ServerFnError` on failure
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if the SearXNG search request fails
+#[server(endpoint = "/api/trending")]
+pub async fn get_trending_topics() -> Result<Vec<String>, ServerFnError> {
+    dotenvy::dotenv().ok();
+    use inner::SearxngResponse;
+    use std::collections::HashMap;
+
+    let searxng_url =
+        std::env::var("SEARXNG_URL").unwrap_or_else(|_| "http://localhost:8888".into());
+
+    let encoded_query: String =
+        url::form_urlencoded::byte_serialize(b"trending technology AI").collect();
+    let search_url = format!(
+        "{searxng_url}/search?q={encoded_query}&format=json&language=en\
+         &categories=news&time_range=week"
+    );
+
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(5))
+        .build()
+        .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?;
+
+    let resp = client
+        .get(&search_url)
+        .send()
+        .await
+        .map_err(|e| ServerFnError::new(format!("SearXNG trending search failed: {e}")))?;
+
+    if !resp.status().is_success() {
+        return Err(ServerFnError::new(format!(
+            "SearXNG trending search returned status {}",
+            resp.status()
+        )));
+    }
+
+    let body: SearxngResponse = resp
+        .json()
+        .await
+        .map_err(|e| ServerFnError::new(format!("Failed to parse trending response: {e}")))?;
+
+    // Common stop words to exclude from trending keywords
+    const STOP_WORDS: &[&str] = &[
+        "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
+        "from", "is", "are", "was", "were", "be", "been", "has", "have", "had", "do", "does",
+        "did", "will", "would", "could", "should", "may", "can", "not", "no", "it", "its", "this",
+        "that", "these", "how", "what", "why", "who", "when", "new", "says", "said", "about",
+        "after", "over", "into", "up", "out", "as", "all", "more", "than", "just", "now", "also",
+        "us", "we", "you", "your", "our", "if", "so", "like", "get", "make", "year", "years",
+        "one", "two",
+    ];
+
+    // Count word frequency across all result titles. Words are lowercased
+    // and must be at least 3 characters to filter out noise.
+    let mut word_counts: HashMap<String, u32> = HashMap::new();
+    for result in &body.results {
+        for word in result.title.split_whitespace() {
+            // Strip punctuation from edges, lowercase
+            let clean: String = word
+                .trim_matches(|c: char| !c.is_alphanumeric())
+                .to_lowercase();
+            if clean.len() >= 3 && !STOP_WORDS.contains(&clean.as_str()) {
+                *word_counts.entry(clean).or_insert(0) += 1;
+            }
+        }
+    }
+
+    // Sort by frequency descending, take top 8
+    let mut sorted: Vec<(String, u32)> = word_counts.into_iter().collect();
+    sorted.sort_by(|a, b| b.1.cmp(&a.1));
+
+    // Capitalize first letter for display
+    let topics: Vec<String> = sorted
+        .into_iter()
+        .filter(|(_, count)| *count >= 2)
+        .take(8)
+        .map(|(word, _)| {
+            let mut chars = word.chars();
+            match chars.next() {
+                Some(c) => c.to_uppercase().to_string() + chars.as_str(),
+                None => word,
+            }
+        })
+        .collect();
+
+    Ok(topics)
+}