feat(org): add LiteLLM usage stats to organization dashboard

Replace mock token usage with real data from LiteLLM free-tier APIs (global/activity, global/activity/model, global/spend/models). Adds per-model breakdown table, loading/error states, usage data models with serde tests, and i18n keys for all five languages. Also includes: replace Ollama with LiteLLM proxy, update config, docker-compose, and provider infrastructure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 18:23:46 +01:00
parent 0deaaca848
commit 0cb350e26e
28 changed files with 1077 additions and 470 deletions
--- a/src/components/dashboard_sidebar.rs
+++ b/src/components/dashboard_sidebar.rs
@@ -1,9 +1,9 @@
 use dioxus::prelude::*;

 use crate::i18n::{t, Locale};
-use crate::infrastructure::ollama::{get_ollama_status, OllamaStatus};
+use crate::infrastructure::litellm::{get_litellm_status, LitellmStatus};

-/// Right sidebar for the dashboard, showing Ollama status, trending topics,
+/// Right sidebar for the dashboard, showing LiteLLM status, trending topics,
 /// and recent search history.
 ///
 /// Appears when no article card is selected. Disappears when the user opens
@@ -11,13 +11,13 @@ use crate::infrastructure::ollama::{get_ollama_status, OllamaStatus};
 ///
 /// # Props
 ///
-/// * `ollama_url` - Ollama instance URL for status polling
+/// * `litellm_url` - LiteLLM proxy URL for status polling
 /// * `trending` - Trending topic keywords extracted from recent news headlines
 /// * `recent_searches` - Recent search topics stored in localStorage
 /// * `on_topic_click` - Fires when a trending or recent topic is clicked
 #[component]
 pub fn DashboardSidebar(
-    ollama_url: String,
+    litellm_url: String,
    trending: Vec<String>,
    recent_searches: Vec<String>,
    on_topic_click: EventHandler<String>,
@@ -25,26 +25,26 @@ pub fn DashboardSidebar(
    let locale = use_context::<Signal<Locale>>();
    let l = *locale.read();

-    // Fetch Ollama status once on mount.
+    // Fetch LiteLLM status once on mount.
    // use_resource with no signal dependencies runs exactly once and
    // won't re-fire on parent re-renders (unlike use_effect).
-    let url = ollama_url.clone();
+    let url = litellm_url.clone();
    let status_resource = use_resource(move || {
        let u = url.clone();
        async move {
-            get_ollama_status(u).await.unwrap_or(OllamaStatus {
+            get_litellm_status(u).await.unwrap_or(LitellmStatus {
                online: false,
                models: Vec::new(),
            })
        }
    });

-    let current_status: OllamaStatus =
+    let current_status: LitellmStatus =
        status_resource
            .read()
            .as_ref()
            .cloned()
-            .unwrap_or(OllamaStatus {
+            .unwrap_or(LitellmStatus {
                online: false,
                models: Vec::new(),
            });
@@ -52,9 +52,9 @@ pub fn DashboardSidebar(
    rsx! {
        aside { class: "dashboard-sidebar",

-            // -- Ollama Status Section --
+            // -- LiteLLM Status Section --
            div { class: "sidebar-section",
-                h4 { class: "sidebar-section-title", "{t(l, \"dashboard.ollama_status\")}" }
+                h4 { class: "sidebar-section-title", "{t(l, \"dashboard.litellm_status\")}" }
                div { class: "sidebar-status-row",
                    span { class: if current_status.online { "sidebar-status-dot sidebar-status-dot--online" } else { "sidebar-status-dot sidebar-status-dot--offline" } }
                    span { class: "sidebar-status-label",
--- a/src/components/news_card.rs
+++ b/src/components/news_card.rs
@@ -112,12 +112,12 @@ pub fn mock_news() -> Vec<NewsCardModel> {
            published_at: "2026-02-16".into(),
        },
        NewsCardModel {
-            title: "Ollama Adds Multi-GPU Scheduling".into(),
-            source: "Ollama".into(),
-            summary: "Run large models across multiple GPUs with automatic sharding.".into(),
-            content: "Ollama now supports multi-GPU scheduling with automatic \
-                model sharding. Users can run models across multiple GPUs \
-                for improved inference performance."
+            title: "LiteLLM Adds Multi-Provider Routing".into(),
+            source: "LiteLLM".into(),
+            summary: "Route requests across multiple LLM providers with automatic fallback.".into(),
+            content: "LiteLLM now supports multi-provider routing with automatic \
+                fallback. Users can route requests across multiple providers \
+                for improved reliability and cost optimization."
                .into(),
            category: "Infrastructure".into(),
            url: "#".into(),