use crate::models::NewsCard; use dioxus::prelude::*; // Server-side helpers and types are only needed for the server build. // The #[server] macro generates a client stub for the web build that // sends a network request instead of executing this function body. #[cfg(feature = "server")] mod inner { use serde::Deserialize; use std::collections::HashSet; /// Individual result from the SearXNG search API. #[derive(Debug, Deserialize)] pub(super) struct SearxngResult { pub title: String, pub url: String, pub content: Option, #[serde(rename = "publishedDate")] pub published_date: Option, pub thumbnail: Option, /// Relevance score assigned by SearXNG (higher = more relevant). #[serde(default)] pub score: f64, } /// Top-level response from the SearXNG search API. #[derive(Debug, Deserialize)] pub(super) struct SearxngResponse { pub results: Vec, } /// Extract the domain name from a URL to use as the source label. /// /// Strips common prefixes like "www." for cleaner display. /// /// # Arguments /// /// * `url_str` - The full URL string /// /// # Returns /// /// The domain host or a fallback "Web" string pub(super) fn extract_source(url_str: &str) -> String { url::Url::parse(url_str) .ok() .and_then(|u| u.host_str().map(String::from)) .map(|host| host.strip_prefix("www.").unwrap_or(&host).to_string()) .unwrap_or_else(|| "Web".into()) } /// Deduplicate and rank search results for quality, similar to Perplexity. /// /// Applies the following filters in order: /// 1. Remove results with empty content (no snippet = low value) /// 2. Deduplicate by domain (keep highest-scored result per domain) /// 3. Sort by SearXNG relevance score (descending) /// 4. Cap at `max_results` /// /// # Arguments /// /// * `results` - Raw search results from SearXNG /// * `max_results` - Maximum number of results to return /// /// # Returns /// /// Filtered, deduplicated, and ranked results pub(super) fn rank_and_deduplicate( mut results: Vec, max_results: usize, ) -> Vec { // Filter out results with no meaningful content results.retain(|r| r.content.as_ref().is_some_and(|c| c.trim().len() >= 20)); // Sort by score descending so we keep the best result per domain results.sort_by(|a, b| { b.score .partial_cmp(&a.score) .unwrap_or(std::cmp::Ordering::Equal) }); // Deduplicate by domain: keep only the first (highest-scored) per domain let mut seen_domains = HashSet::new(); results.retain(|r| { let domain = extract_source(&r.url); seen_domains.insert(domain) }); results.truncate(max_results); results } } /// Search for news using the SearXNG meta-search engine. /// /// Uses Perplexity-style query enrichment and result ranking: /// - Queries the "news" and "general" categories for fresh, relevant results /// - Filters to the last month for recency /// - Deduplicates by domain for source diversity /// - Ranks by SearXNG relevance score /// - Filters out results without meaningful content /// /// # Arguments /// /// * `query` - The search query string /// /// # Returns /// /// Up to 15 high-quality `NewsCard` results, or a `ServerFnError` on failure /// /// # Errors /// /// Returns `ServerFnError` if the SearXNG request fails or response parsing fails #[post("/api/search")] pub async fn search_topic(query: String) -> Result, ServerFnError> { use inner::{extract_source, rank_and_deduplicate, SearxngResponse}; let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; let searxng_url = state.services.searxng_url.clone(); // Enrich the query with "latest news" context for better results, // similar to how Perplexity reformulates queries before searching. let enriched_query = format!("{query} latest news"); // Use POST with form-encoded body because SearXNG's default config // sets `method: "POST"` which rejects GET requests with 405. let search_url = format!("{searxng_url}/search"); let params = [ ("q", enriched_query.as_str()), ("format", "json"), ("language", "en"), ("categories", "news,general"), ("time_range", "month"), ]; let client = reqwest::Client::new(); let resp = client .post(&search_url) .form(¶ms) .send() .await .map_err(|e| ServerFnError::new(format!("SearXNG request failed: {e}")))?; if !resp.status().is_success() { return Err(ServerFnError::new(format!( "SearXNG returned status {}", resp.status() ))); } let body: SearxngResponse = resp .json() .await .map_err(|e| ServerFnError::new(format!("Failed to parse SearXNG response: {e}")))?; // Apply Perplexity-style ranking: filter empties, deduplicate domains, sort by score let ranked = rank_and_deduplicate(body.results, 15); let cards: Vec = ranked .into_iter() .map(|r| { let summary = r .content .clone() .unwrap_or_default() .chars() .take(200) .collect::(); let content = r.content.unwrap_or_default(); NewsCard { title: r.title, source: extract_source(&r.url), summary, content, category: query.clone(), url: r.url, thumbnail_url: r.thumbnail, published_at: r.published_date.unwrap_or_else(|| "Recent".into()), } }) .collect(); Ok(cards) } /// Fetch trending topic keywords by running a broad news search and /// extracting the most frequent meaningful terms from result titles. /// /// This approach works regardless of whether SearXNG has autocomplete /// configured, since it uses the standard search API. /// /// # Returns /// /// Up to 8 trending keyword strings, or a `ServerFnError` on failure /// /// # Errors /// /// Returns `ServerFnError` if the SearXNG search request fails #[get("/api/trending")] pub async fn get_trending_topics() -> Result, ServerFnError> { use inner::SearxngResponse; use std::collections::HashMap; let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; let searxng_url = state.services.searxng_url.clone(); // Use POST to match SearXNG's default `method: "POST"` setting let search_url = format!("{searxng_url}/search"); let params = [ ("q", "trending technology AI"), ("format", "json"), ("language", "en"), ("categories", "news"), ("time_range", "week"), ]; let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(5)) .build() .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; let resp = client .post(&search_url) .form(¶ms) .send() .await .map_err(|e| ServerFnError::new(format!("SearXNG trending search failed: {e}")))?; if !resp.status().is_success() { return Err(ServerFnError::new(format!( "SearXNG trending search returned status {}", resp.status() ))); } let body: SearxngResponse = resp .json() .await .map_err(|e| ServerFnError::new(format!("Failed to parse trending response: {e}")))?; // Common stop words to exclude from trending keywords const STOP_WORDS: &[&str] = &[ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "is", "are", "was", "were", "be", "been", "has", "have", "had", "do", "does", "did", "will", "would", "could", "should", "may", "can", "not", "no", "it", "its", "this", "that", "these", "how", "what", "why", "who", "when", "new", "says", "said", "about", "after", "over", "into", "up", "out", "as", "all", "more", "than", "just", "now", "also", "us", "we", "you", "your", "our", "if", "so", "like", "get", "make", "year", "years", "one", "two", ]; // Count word frequency across all result titles. Words are lowercased // and must be at least 3 characters to filter out noise. let mut word_counts: HashMap = HashMap::new(); for result in &body.results { for word in result.title.split_whitespace() { // Strip punctuation from edges, lowercase let clean: String = word .trim_matches(|c: char| !c.is_alphanumeric()) .to_lowercase(); if clean.len() >= 3 && !STOP_WORDS.contains(&clean.as_str()) { *word_counts.entry(clean).or_insert(0) += 1; } } } // Sort by frequency descending, take top 8 let mut sorted: Vec<(String, u32)> = word_counts.into_iter().collect(); sorted.sort_by(|a, b| b.1.cmp(&a.1)); // Capitalize first letter for display let topics: Vec = sorted .into_iter() .filter(|(_, count)| *count >= 2) .take(8) .map(|(word, _)| { let mut chars = word.chars(); match chars.next() { Some(c) => c.to_uppercase().to_string() + chars.as_str(), None => word, } }) .collect(); Ok(topics) }