#[cfg(feature = "server")] use std::collections::HashMap; use dioxus::prelude::*; use serde::{Deserialize, Serialize}; use crate::models::LitellmUsageStats; #[cfg(feature = "server")] use crate::models::ModelUsage; /// Status of a LiteLLM proxy instance, including connectivity and available models. /// /// # Fields /// /// * `online` - Whether the LiteLLM API responded successfully /// * `models` - List of model IDs available through the proxy #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct LitellmStatus { pub online: bool, pub models: Vec, } /// Response from LiteLLM's `GET /v1/models` endpoint (OpenAI-compatible). #[cfg(feature = "server")] #[derive(Deserialize)] struct ModelsResponse { data: Vec, } /// A single model entry from the OpenAI-compatible models list. #[cfg(feature = "server")] #[derive(Deserialize)] struct ModelObject { id: String, } /// Check the status of a LiteLLM proxy by querying its models endpoint. /// /// Calls `GET /v1/models` to list available models and determine /// whether the instance is reachable. Sends the API key as a Bearer token /// if configured. /// /// # Arguments /// /// * `litellm_url` - Base URL of the LiteLLM proxy (e.g. "http://localhost:4000") /// /// # Returns /// /// A `LitellmStatus` with `online: true` and model IDs if reachable, /// or `online: false` with an empty model list on failure /// /// # Errors /// /// Returns `ServerFnError` only on serialization issues; network failures /// are caught and returned as `online: false` #[post("/api/litellm-status")] pub async fn get_litellm_status(litellm_url: String) -> Result { let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; let base_url = if litellm_url.is_empty() { state.services.litellm_url.clone() } else { litellm_url }; let api_key = state.services.litellm_api_key.clone(); let url = format!("{}/v1/models", base_url.trim_end_matches('/')); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(5)) .build() .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; let mut request = client.get(&url); if !api_key.is_empty() { request = request.header("Authorization", format!("Bearer {api_key}")); } let resp = match request.send().await { Ok(r) if r.status().is_success() => r, _ => { return Ok(LitellmStatus { online: false, models: Vec::new(), }); } }; let body: ModelsResponse = match resp.json().await { Ok(b) => b, Err(_) => { return Ok(LitellmStatus { online: true, models: Vec::new(), }); } }; let models = body.data.into_iter().map(|m| m.id).collect(); Ok(LitellmStatus { online: true, models, }) } /// Response from LiteLLM's `GET /global/activity` endpoint. /// /// Returns aggregate token counts and API request totals for a date range. /// Available on the free tier (no Enterprise license needed). #[cfg(feature = "server")] #[derive(Debug, Deserialize)] struct ActivityResponse { /// Total tokens across all models in the date range #[serde(default)] sum_total_tokens: u64, } /// Per-model entry from `GET /global/activity/model`. /// /// Each entry contains a model name and its aggregated token total. #[cfg(feature = "server")] #[derive(Debug, Deserialize)] struct ActivityModelEntry { /// Model identifier (may be empty for unattributed traffic) #[serde(default)] model: String, /// Sum of tokens used by this model in the date range #[serde(default)] sum_total_tokens: u64, } /// Per-model spend entry from `GET /global/spend/models`. /// /// Each entry maps a model name to its total spend in USD. #[cfg(feature = "server")] #[derive(Debug, Deserialize)] struct SpendModelEntry { /// Model identifier #[serde(default)] model: String, /// Total spend in USD #[serde(default)] total_spend: f64, } /// Merge per-model token counts and spend data into `ModelUsage` entries. /// /// Joins `activity_models` (tokens) and `spend_models` (spend) by model /// name using a HashMap for O(n + m) merge. Entries with empty model /// names are skipped. /// /// # Arguments /// /// * `activity_models` - Per-model token data from `/global/activity/model` /// * `spend_models` - Per-model spend data from `/global/spend/models` /// /// # Returns /// /// Merged list sorted by total tokens descending #[cfg(feature = "server")] fn merge_model_data( activity_models: Vec, spend_models: Vec, ) -> Vec { let mut model_map: HashMap = HashMap::new(); for entry in activity_models { if entry.model.is_empty() { continue; } model_map .entry(entry.model.clone()) .or_insert_with(|| ModelUsage { model: entry.model, ..Default::default() }) .total_tokens = entry.sum_total_tokens; } for entry in spend_models { if entry.model.is_empty() { continue; } model_map .entry(entry.model.clone()) .or_insert_with(|| ModelUsage { model: entry.model, ..Default::default() }) .spend = entry.total_spend; } let mut result: Vec = model_map.into_values().collect(); result.sort_by(|a, b| b.total_tokens.cmp(&a.total_tokens)); result } /// Fetch aggregated usage statistics from LiteLLM's free-tier APIs. /// /// Combines three endpoints to build a complete usage picture: /// - `GET /global/activity` - total token counts /// - `GET /global/activity/model` - per-model token breakdown /// - `GET /global/spend/models` - per-model spend in USD /// /// # Arguments /// /// * `start_date` - Start of the reporting period in `YYYY-MM-DD` format /// * `end_date` - End of the reporting period in `YYYY-MM-DD` format /// /// # Returns /// /// Aggregated usage stats; returns default (zeroed) stats on network /// failure or permission errors /// /// # Errors /// /// Returns `ServerFnError` only on HTTP client construction failure #[post("/api/litellm-usage")] pub async fn get_litellm_usage( start_date: String, end_date: String, ) -> Result { let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; let base_url = &state.services.litellm_url; let api_key = &state.services.litellm_api_key; if base_url.is_empty() { return Ok(LitellmUsageStats::default()); } let base = base_url.trim_end_matches('/'); let date_params = format!("start_date={start_date}&end_date={end_date}"); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(10)) .build() .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; // Helper closure to build an authenticated GET request let auth_get = |url: String| { let mut req = client.get(url); if !api_key.is_empty() { req = req.header("Authorization", format!("Bearer {api_key}")); } req }; // Fire all three requests concurrently to minimise latency let (activity_res, model_activity_res, model_spend_res) = tokio::join!( auth_get(format!("{base}/global/activity?{date_params}")).send(), auth_get(format!("{base}/global/activity/model?{date_params}")).send(), auth_get(format!("{base}/global/spend/models?{date_params}")).send(), ); // Parse total token count from /global/activity let total_tokens = match activity_res { Ok(r) if r.status().is_success() => r .json::() .await .map(|a| a.sum_total_tokens) .unwrap_or(0), _ => 0, }; // Parse per-model token breakdown from /global/activity/model let activity_models: Vec = match model_activity_res { Ok(r) if r.status().is_success() => r.json().await.unwrap_or_default(), _ => Vec::new(), }; // Parse per-model spend from /global/spend/models let spend_models: Vec = match model_spend_res { Ok(r) if r.status().is_success() => r.json().await.unwrap_or_default(), _ => Vec::new(), }; let total_spend: f64 = spend_models.iter().map(|m| m.total_spend).sum(); let model_breakdown = merge_model_data(activity_models, spend_models); Ok(LitellmUsageStats { total_spend, // Free-tier endpoints don't provide prompt/completion split; // total_tokens comes from /global/activity. total_prompt_tokens: 0, total_completion_tokens: 0, total_tokens, model_breakdown, }) } #[cfg(test)] mod tests { use super::*; #[test] fn merge_empty_inputs() { let result = merge_model_data(Vec::new(), Vec::new()); assert!(result.is_empty()); } #[test] fn merge_activity_only() { let activity = vec![ActivityModelEntry { model: "gpt-4".into(), sum_total_tokens: 1500, }]; let result = merge_model_data(activity, Vec::new()); assert_eq!(result.len(), 1); assert_eq!(result[0].model, "gpt-4"); assert_eq!(result[0].total_tokens, 1500); assert_eq!(result[0].spend, 0.0); } #[test] fn merge_spend_only() { let spend = vec![SpendModelEntry { model: "gpt-4".into(), total_spend: 2.5, }]; let result = merge_model_data(Vec::new(), spend); assert_eq!(result.len(), 1); assert_eq!(result[0].model, "gpt-4"); assert_eq!(result[0].spend, 2.5); assert_eq!(result[0].total_tokens, 0); } #[test] fn merge_joins_by_model_name() { let activity = vec![ ActivityModelEntry { model: "gpt-4".into(), sum_total_tokens: 5000, }, ActivityModelEntry { model: "claude-3".into(), sum_total_tokens: 3000, }, ]; let spend = vec![ SpendModelEntry { model: "gpt-4".into(), total_spend: 1.0, }, SpendModelEntry { model: "claude-3".into(), total_spend: 0.5, }, ]; let result = merge_model_data(activity, spend); assert_eq!(result.len(), 2); // Sorted by tokens descending: gpt-4 (5000) before claude-3 (3000) assert_eq!(result[0].model, "gpt-4"); assert_eq!(result[0].total_tokens, 5000); assert_eq!(result[0].spend, 1.0); assert_eq!(result[1].model, "claude-3"); assert_eq!(result[1].total_tokens, 3000); assert_eq!(result[1].spend, 0.5); } #[test] fn merge_skips_empty_model_names() { let activity = vec![ ActivityModelEntry { model: "".into(), sum_total_tokens: 100, }, ActivityModelEntry { model: "gpt-4".into(), sum_total_tokens: 500, }, ]; let spend = vec![SpendModelEntry { model: "".into(), total_spend: 0.01, }]; let result = merge_model_data(activity, spend); assert_eq!(result.len(), 1); assert_eq!(result[0].model, "gpt-4"); } #[test] fn merge_unmatched_models_appear_in_both_directions() { let activity = vec![ActivityModelEntry { model: "tokens-only".into(), sum_total_tokens: 1000, }]; let spend = vec![SpendModelEntry { model: "spend-only".into(), total_spend: 0.5, }]; let result = merge_model_data(activity, spend); assert_eq!(result.len(), 2); // tokens-only has 1000 tokens, spend-only has 0 tokens assert_eq!(result[0].model, "tokens-only"); assert_eq!(result[0].total_tokens, 1000); assert_eq!(result[1].model, "spend-only"); assert_eq!(result[1].spend, 0.5); } }