use crate::models::NewsCard; use dioxus::prelude::*; // Server-side helpers and types are only needed for the server build. // The #[server] macro generates a client stub for the web build that // sends a network request instead of executing this function body. #[cfg(feature = "server")] pub(crate) mod inner { use serde::Deserialize; use std::collections::HashSet; /// Individual result from the SearXNG search API. #[derive(Debug, Deserialize)] pub(crate) struct SearxngResult { pub title: String, pub url: String, pub content: Option, #[serde(rename = "publishedDate")] pub published_date: Option, pub thumbnail: Option, /// Relevance score assigned by SearXNG (higher = more relevant). #[serde(default)] pub score: f64, } /// Top-level response from the SearXNG search API. #[derive(Debug, Deserialize)] pub(crate) struct SearxngResponse { pub results: Vec, } /// Extract the domain name from a URL to use as the source label. /// /// Strips common prefixes like "www." for cleaner display. /// /// # Arguments /// /// * `url_str` - The full URL string /// /// # Returns /// /// The domain host or a fallback "Web" string pub(crate) fn extract_source(url_str: &str) -> String { url::Url::parse(url_str) .ok() .and_then(|u| u.host_str().map(String::from)) .map(|host| host.strip_prefix("www.").unwrap_or(&host).to_string()) .unwrap_or_else(|| "Web".into()) } /// Deduplicate and rank search results for quality, similar to Perplexity. /// /// Applies the following filters in order: /// 1. Remove results with empty content (no snippet = low value) /// 2. Deduplicate by domain (keep highest-scored result per domain) /// 3. Sort by SearXNG relevance score (descending) /// 4. Cap at `max_results` /// /// # Arguments /// /// * `results` - Raw search results from SearXNG /// * `max_results` - Maximum number of results to return /// /// # Returns /// /// Filtered, deduplicated, and ranked results pub(crate) fn rank_and_deduplicate( mut results: Vec, max_results: usize, ) -> Vec { // Filter out results with no meaningful content results.retain(|r| r.content.as_ref().is_some_and(|c| c.trim().len() >= 20)); // Sort by score descending so we keep the best result per domain results.sort_by(|a, b| { b.score .partial_cmp(&a.score) .unwrap_or(std::cmp::Ordering::Equal) }); // Deduplicate by domain: keep only the first (highest-scored) per domain let mut seen_domains = HashSet::new(); results.retain(|r| { let domain = extract_source(&r.url); seen_domains.insert(domain) }); results.truncate(max_results); results } } /// Search for news using the SearXNG meta-search engine. /// /// Uses Perplexity-style query enrichment and result ranking: /// - Queries the "news" and "general" categories for fresh, relevant results /// - Filters to the last month for recency /// - Deduplicates by domain for source diversity /// - Ranks by SearXNG relevance score /// - Filters out results without meaningful content /// /// # Arguments /// /// * `query` - The search query string /// /// # Returns /// /// Up to 15 high-quality `NewsCard` results, or a `ServerFnError` on failure /// /// # Errors /// /// Returns `ServerFnError` if the SearXNG request fails or response parsing fails #[post("/api/search")] pub async fn search_topic(query: String) -> Result, ServerFnError> { use inner::{extract_source, rank_and_deduplicate, SearxngResponse}; let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; let searxng_url = state.services.searxng_url.clone(); // Enrich the query with "latest news" context for better results, // similar to how Perplexity reformulates queries before searching. let enriched_query = format!("{query} latest news"); // Use POST with form-encoded body because SearXNG's default config // sets `method: "POST"` which rejects GET requests with 405. let search_url = format!("{searxng_url}/search"); let params = [ ("q", enriched_query.as_str()), ("format", "json"), ("language", "en"), ("categories", "news,general"), ("time_range", "month"), ]; let client = reqwest::Client::new(); let resp = client .post(&search_url) .form(¶ms) .send() .await .map_err(|e| ServerFnError::new(format!("SearXNG request failed: {e}")))?; if !resp.status().is_success() { return Err(ServerFnError::new(format!( "SearXNG returned status {}", resp.status() ))); } let body: SearxngResponse = resp .json() .await .map_err(|e| ServerFnError::new(format!("Failed to parse SearXNG response: {e}")))?; // Apply Perplexity-style ranking: filter empties, deduplicate domains, sort by score let ranked = rank_and_deduplicate(body.results, 15); let cards: Vec = ranked .into_iter() .map(|r| { let summary = r .content .clone() .unwrap_or_default() .chars() .take(200) .collect::(); let content = r.content.unwrap_or_default(); NewsCard { title: r.title, source: extract_source(&r.url), summary, content, category: query.clone(), url: r.url, thumbnail_url: r.thumbnail, published_at: r.published_date.unwrap_or_else(|| "Recent".into()), } }) .collect(); Ok(cards) } /// Fetch trending topic keywords by running a broad news search and /// extracting the most frequent meaningful terms from result titles. /// /// This approach works regardless of whether SearXNG has autocomplete /// configured, since it uses the standard search API. /// /// # Returns /// /// Up to 8 trending keyword strings, or a `ServerFnError` on failure /// /// # Errors /// /// Returns `ServerFnError` if the SearXNG search request fails #[get("/api/trending")] pub async fn get_trending_topics() -> Result, ServerFnError> { use inner::SearxngResponse; use std::collections::HashMap; let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; let searxng_url = state.services.searxng_url.clone(); // Use POST to match SearXNG's default `method: "POST"` setting let search_url = format!("{searxng_url}/search"); let params = [ ("q", "trending technology AI"), ("format", "json"), ("language", "en"), ("categories", "news"), ("time_range", "week"), ]; let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(5)) .build() .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; let resp = client .post(&search_url) .form(¶ms) .send() .await .map_err(|e| ServerFnError::new(format!("SearXNG trending search failed: {e}")))?; if !resp.status().is_success() { return Err(ServerFnError::new(format!( "SearXNG trending search returned status {}", resp.status() ))); } let body: SearxngResponse = resp .json() .await .map_err(|e| ServerFnError::new(format!("Failed to parse trending response: {e}")))?; // Common stop words to exclude from trending keywords const STOP_WORDS: &[&str] = &[ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "is", "are", "was", "were", "be", "been", "has", "have", "had", "do", "does", "did", "will", "would", "could", "should", "may", "can", "not", "no", "it", "its", "this", "that", "these", "how", "what", "why", "who", "when", "new", "says", "said", "about", "after", "over", "into", "up", "out", "as", "all", "more", "than", "just", "now", "also", "us", "we", "you", "your", "our", "if", "so", "like", "get", "make", "year", "years", "one", "two", ]; // Count word frequency across all result titles. Words are lowercased // and must be at least 3 characters to filter out noise. let mut word_counts: HashMap = HashMap::new(); for result in &body.results { for word in result.title.split_whitespace() { // Strip punctuation from edges, lowercase let clean: String = word .trim_matches(|c: char| !c.is_alphanumeric()) .to_lowercase(); if clean.len() >= 3 && !STOP_WORDS.contains(&clean.as_str()) { *word_counts.entry(clean).or_insert(0) += 1; } } } // Sort by frequency descending, take top 8 let mut sorted: Vec<(String, u32)> = word_counts.into_iter().collect(); sorted.sort_by(|a, b| b.1.cmp(&a.1)); // Capitalize first letter for display let topics: Vec = sorted .into_iter() .filter(|(_, count)| *count >= 2) .take(8) .map(|(word, _)| { let mut chars = word.chars(); match chars.next() { Some(c) => c.to_uppercase().to_string() + chars.as_str(), None => word, } }) .collect(); Ok(topics) } #[cfg(all(test, feature = "server"))] mod tests { #![allow(clippy::unwrap_used, clippy::expect_used)] use super::inner::*; use pretty_assertions::assert_eq; // ----------------------------------------------------------------------- // extract_source() // ----------------------------------------------------------------------- #[test] fn extract_source_strips_www() { assert_eq!( extract_source("https://www.example.com/page"), "example.com" ); } #[test] fn extract_source_returns_domain() { assert_eq!( extract_source("https://techcrunch.com/article"), "techcrunch.com" ); } #[test] fn extract_source_invalid_url_returns_web() { assert_eq!(extract_source("not-a-url"), "Web"); } #[test] fn extract_source_no_scheme_returns_web() { // url::Url::parse requires a scheme; bare domain fails assert_eq!(extract_source("example.com/path"), "Web"); } // ----------------------------------------------------------------------- // rank_and_deduplicate() // ----------------------------------------------------------------------- fn make_result(url: &str, content: &str, score: f64) -> SearxngResult { SearxngResult { title: "Title".into(), url: url.into(), content: if content.is_empty() { None } else { Some(content.into()) }, published_date: None, thumbnail: None, score, } } #[test] fn rank_filters_empty_content() { let results = vec![ make_result("https://a.com", "", 10.0), make_result( "https://b.com", "This is meaningful content that passes the length filter", 5.0, ), ]; let ranked = rank_and_deduplicate(results, 10); assert_eq!(ranked.len(), 1); assert_eq!(ranked[0].url, "https://b.com"); } #[test] fn rank_filters_short_content() { let results = vec![ make_result("https://a.com", "short", 10.0), make_result( "https://b.com", "This content is long enough to pass the 20-char filter threshold", 5.0, ), ]; let ranked = rank_and_deduplicate(results, 10); assert_eq!(ranked.len(), 1); } #[test] fn rank_deduplicates_by_domain_keeps_highest() { let results = vec![ make_result( "https://example.com/page1", "First result with enough content here for the filter", 3.0, ), make_result( "https://example.com/page2", "Second result with enough content here for the filter", 8.0, ), ]; let ranked = rank_and_deduplicate(results, 10); assert_eq!(ranked.len(), 1); // Should keep the highest-scored one (page2 with score 8.0) assert_eq!(ranked[0].url, "https://example.com/page2"); } #[test] fn rank_sorts_by_score_descending() { let results = vec![ make_result( "https://a.com/p", "Content A that is long enough to pass the filter check", 1.0, ), make_result( "https://b.com/p", "Content B that is long enough to pass the filter check", 5.0, ), make_result( "https://c.com/p", "Content C that is long enough to pass the filter check", 3.0, ), ]; let ranked = rank_and_deduplicate(results, 10); assert_eq!(ranked.len(), 3); assert!(ranked[0].score >= ranked[1].score); assert!(ranked[1].score >= ranked[2].score); } #[test] fn rank_truncates_to_max_results() { let results: Vec<_> = (0..20) .map(|i| { make_result( &format!("https://site{i}.com/page"), &format!("Content for site {i} that is long enough to pass the filter"), i as f64, ) }) .collect(); let ranked = rank_and_deduplicate(results, 5); assert_eq!(ranked.len(), 5); } #[test] fn rank_empty_input_returns_empty() { let ranked = rank_and_deduplicate(vec![], 10); assert!(ranked.is_empty()); } #[test] fn rank_all_filtered_returns_empty() { let results = vec![ make_result("https://a.com", "", 10.0), make_result("https://b.com", "too short", 5.0), ]; let ranked = rank_and_deduplicate(results, 10); assert!(ranked.is_empty()); } }