From fe4f8e84ae517c40afd00a84c3f1b1cbd6558efe Mon Sep 17 00:00:00 2001
From: Sharang Parnerkar <sharang@meghsakha.com>
Date: Thu, 26 Feb 2026 17:52:47 +0000
Subject: [PATCH] feat: replaced ollama with litellm (#18)

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: https://gitea.meghsakha.com/sharang/certifai/pulls/18
---
 .env.example                          |   9 +-
 Cargo.lock                            |   1 +
 Cargo.toml                            |   3 +-
 assets/i18n/de.json                   |  22 +-
 assets/i18n/en.json                   |  22 +-
 assets/i18n/es.json                   |  22 +-
 assets/i18n/fr.json                   |  22 +-
 assets/i18n/pt.json                   |  22 +-
 assets/tailwind.css                   | 245 ++++++----------
 docker-compose.yml                    |   3 +-
 librechat/librechat.yaml              |  18 +-
 src/components/dashboard_sidebar.rs   |  22 +-
 src/components/news_card.rs           |  12 +-
 src/infrastructure/chat.rs            |  56 ++--
 src/infrastructure/config.rs          |  57 ++--
 src/infrastructure/litellm.rs         | 403 ++++++++++++++++++++++++++
 src/infrastructure/llm.rs             | 102 ++++---
 src/infrastructure/mod.rs             |   2 +-
 src/infrastructure/ollama.rs          |  92 ------
 src/infrastructure/provider_client.rs |  25 +-
 src/infrastructure/server_state.rs    |   2 +-
 src/models/chat.rs                    |  14 +-
 src/models/organization.rs            | 114 ++++++++
 src/models/provider.rs                |  22 +-
 src/models/user.rs                    |  26 +-
 src/pages/dashboard.rs                |  42 +--
 src/pages/organization/dashboard.rs   | 181 +++++++++++-
 src/pages/providers.rs                |  46 +--
 28 files changed, 1107 insertions(+), 500 deletions(-)
 create mode 100644 src/infrastructure/litellm.rs
 delete mode 100644 src/infrastructure/ollama.rs

diff --git a/.env.example b/.env.example
index f5a8f34..a401835 100644
--- a/.env.example
+++ b/.env.example
@@ -34,10 +34,11 @@ MONGODB_DATABASE=certifai
 SEARXNG_URL=http://localhost:8888
 
 # ---------------------------------------------------------------------------
-# Ollama LLM instance [OPTIONAL - defaults shown]
+# LiteLLM proxy [OPTIONAL - defaults shown]
 # ---------------------------------------------------------------------------
-OLLAMA_URL=http://localhost:11434
-OLLAMA_MODEL=llama3.1:8b
+LITELLM_URL=http://localhost:4000
+LITELLM_MODEL=qwen3-32b
+LITELLM_API_KEY=
 
 # ---------------------------------------------------------------------------
 # LibreChat (external chat via SSO) [OPTIONAL - default: http://localhost:3080]
@@ -47,7 +48,7 @@ LIBRECHAT_URL=http://localhost:3080
 # ---------------------------------------------------------------------------
 # LLM Providers (comma-separated list) [OPTIONAL]
 # ---------------------------------------------------------------------------
-LLM_PROVIDERS=ollama
+LLM_PROVIDERS=litellm
 
 # ---------------------------------------------------------------------------
 # SMTP (transactional email) [OPTIONAL]
diff --git a/Cargo.lock b/Cargo.lock
index 9e9ff42..e7f4119 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -773,6 +773,7 @@ dependencies = [
  "dioxus-sdk",
  "dotenvy",
  "futures",
+ "js-sys",
  "maud",
  "mongodb",
  "petname",
diff --git a/Cargo.toml b/Cargo.toml
index 9de0a77..5a126a3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -61,6 +61,7 @@ secrecy = { version = "0.10", default-features = false, optional = true }
 serde_json = { version = "1.0.133", default-features = false }
 maud = { version = "0.27", default-features = false }
 url = { version = "2.5.4", default-features = false, optional = true }
+js-sys = { version = "0.3", optional = true }
 wasm-bindgen = { version = "0.2", optional = true }
 web-sys = { version = "0.3", optional = true, features = [
     "Clipboard",
@@ -91,7 +92,7 @@ bytes = { version = "1", optional = true }
 
 [features]
 # default = ["web"]
-web = ["dioxus/web", "dep:reqwest", "dep:web-sys", "dep:wasm-bindgen"]
+web = ["dioxus/web", "dep:reqwest", "dep:web-sys", "dep:wasm-bindgen", "dep:js-sys"]
 server = [
     "dioxus/server",
     "dep:axum",
diff --git a/assets/i18n/de.json b/assets/i18n/de.json
index dade857..4f985ff 100644
--- a/assets/i18n/de.json
+++ b/assets/i18n/de.json
@@ -58,15 +58,15 @@
     "title": "Dashboard",
     "subtitle": "KI-Nachrichten und Neuigkeiten",
     "topic_placeholder": "Themenname...",
-    "ollama_settings": "Ollama-Einstellungen",
-    "settings_hint": "Leer lassen, um OLLAMA_URL / OLLAMA_MODEL aus .env zu verwenden",
-    "ollama_url": "Ollama-URL",
-    "ollama_url_placeholder": "Verwendet OLLAMA_URL aus .env",
+    "litellm_settings": "LiteLLM-Einstellungen",
+    "settings_hint": "Leer lassen, um LITELLM_URL / LITELLM_MODEL aus .env zu verwenden",
+    "litellm_url": "LiteLLM-URL",
+    "litellm_url_placeholder": "Verwendet LITELLM_URL aus .env",
     "model": "Modell",
-    "model_placeholder": "Verwendet OLLAMA_MODEL aus .env",
+    "model_placeholder": "Verwendet LITELLM_MODEL aus .env",
     "searching": "Suche laeuft...",
     "search_failed": "Suche fehlgeschlagen: {e}",
-    "ollama_status": "Ollama-Status",
+    "litellm_status": "LiteLLM-Status",
     "trending": "Im Trend",
     "recent_searches": "Letzte Suchen"
   },
@@ -144,6 +144,16 @@
     "email_address": "E-Mail-Adresse",
     "email_placeholder": "kollege@firma.de",
     "send_invite": "Einladung senden",
+    "total_spend": "Gesamtausgaben",
+    "total_tokens": "Tokens gesamt",
+    "model_usage": "Nutzung nach Modell",
+    "model": "Modell",
+    "tokens": "Tokens",
+    "spend": "Ausgaben",
+    "usage_unavailable": "Nutzungsdaten nicht verfuegbar",
+    "loading_usage": "Nutzungsdaten werden geladen...",
+    "prompt_tokens": "Prompt-Tokens",
+    "completion_tokens": "Antwort-Tokens",
     "pricing_title": "Preise",
     "pricing_subtitle": "Waehlen Sie den passenden Plan fuer Ihre Organisation"
   },
diff --git a/assets/i18n/en.json b/assets/i18n/en.json
index 666890e..038ca71 100644
--- a/assets/i18n/en.json
+++ b/assets/i18n/en.json
@@ -58,15 +58,15 @@
     "title": "Dashboard",
     "subtitle": "AI news and updates",
     "topic_placeholder": "Topic name...",
-    "ollama_settings": "Ollama Settings",
-    "settings_hint": "Leave empty to use OLLAMA_URL / OLLAMA_MODEL from .env",
-    "ollama_url": "Ollama URL",
-    "ollama_url_placeholder": "Uses OLLAMA_URL from .env",
+    "litellm_settings": "LiteLLM Settings",
+    "settings_hint": "Leave empty to use LITELLM_URL / LITELLM_MODEL from .env",
+    "litellm_url": "LiteLLM URL",
+    "litellm_url_placeholder": "Uses LITELLM_URL from .env",
     "model": "Model",
-    "model_placeholder": "Uses OLLAMA_MODEL from .env",
+    "model_placeholder": "Uses LITELLM_MODEL from .env",
     "searching": "Searching...",
     "search_failed": "Search failed: {e}",
-    "ollama_status": "Ollama Status",
+    "litellm_status": "LiteLLM Status",
     "trending": "Trending",
     "recent_searches": "Recent Searches"
   },
@@ -144,6 +144,16 @@
     "email_address": "Email Address",
     "email_placeholder": "colleague@company.com",
     "send_invite": "Send Invite",
+    "total_spend": "Total Spend",
+    "total_tokens": "Total Tokens",
+    "model_usage": "Usage by Model",
+    "model": "Model",
+    "tokens": "Tokens",
+    "spend": "Spend",
+    "usage_unavailable": "Usage data unavailable",
+    "loading_usage": "Loading usage data...",
+    "prompt_tokens": "Prompt Tokens",
+    "completion_tokens": "Completion Tokens",
     "pricing_title": "Pricing",
     "pricing_subtitle": "Choose the plan that fits your organization"
   },
diff --git a/assets/i18n/es.json b/assets/i18n/es.json
index ae356e9..07187e1 100644
--- a/assets/i18n/es.json
+++ b/assets/i18n/es.json
@@ -58,15 +58,15 @@
     "title": "Panel de control",
     "subtitle": "Noticias y actualizaciones de IA",
     "topic_placeholder": "Nombre del tema...",
-    "ollama_settings": "Configuracion de Ollama",
-    "settings_hint": "Dejar vacio para usar OLLAMA_URL / OLLAMA_MODEL del archivo .env",
-    "ollama_url": "URL de Ollama",
-    "ollama_url_placeholder": "Usa OLLAMA_URL del archivo .env",
+    "litellm_settings": "Configuracion de LiteLLM",
+    "settings_hint": "Dejar vacio para usar LITELLM_URL / LITELLM_MODEL del archivo .env",
+    "litellm_url": "URL de LiteLLM",
+    "litellm_url_placeholder": "Usa LITELLM_URL del archivo .env",
     "model": "Modelo",
-    "model_placeholder": "Usa OLLAMA_MODEL del archivo .env",
+    "model_placeholder": "Usa LITELLM_MODEL del archivo .env",
     "searching": "Buscando...",
     "search_failed": "La busqueda fallo: {e}",
-    "ollama_status": "Estado de Ollama",
+    "litellm_status": "Estado de LiteLLM",
     "trending": "Tendencias",
     "recent_searches": "Busquedas recientes"
   },
@@ -144,6 +144,16 @@
     "email_address": "Direccion de correo electronico",
     "email_placeholder": "colega@empresa.com",
     "send_invite": "Enviar invitacion",
+    "total_spend": "Gasto total",
+    "total_tokens": "Tokens totales",
+    "model_usage": "Uso por modelo",
+    "model": "Modelo",
+    "tokens": "Tokens",
+    "spend": "Gasto",
+    "usage_unavailable": "Datos de uso no disponibles",
+    "loading_usage": "Cargando datos de uso...",
+    "prompt_tokens": "Tokens de entrada",
+    "completion_tokens": "Tokens de respuesta",
     "pricing_title": "Precios",
     "pricing_subtitle": "Elija el plan que se adapte a su organizacion"
   },
diff --git a/assets/i18n/fr.json b/assets/i18n/fr.json
index 3c134a4..f58f9db 100644
--- a/assets/i18n/fr.json
+++ b/assets/i18n/fr.json
@@ -58,15 +58,15 @@
     "title": "Tableau de bord",
     "subtitle": "Actualites et mises a jour IA",
     "topic_placeholder": "Nom du sujet...",
-    "ollama_settings": "Parametres Ollama",
-    "settings_hint": "Laissez vide pour utiliser OLLAMA_URL / OLLAMA_MODEL du fichier .env",
-    "ollama_url": "URL Ollama",
-    "ollama_url_placeholder": "Utilise OLLAMA_URL du fichier .env",
+    "litellm_settings": "Parametres LiteLLM",
+    "settings_hint": "Laissez vide pour utiliser LITELLM_URL / LITELLM_MODEL du fichier .env",
+    "litellm_url": "URL LiteLLM",
+    "litellm_url_placeholder": "Utilise LITELLM_URL du fichier .env",
     "model": "Modele",
-    "model_placeholder": "Utilise OLLAMA_MODEL du fichier .env",
+    "model_placeholder": "Utilise LITELLM_MODEL du fichier .env",
     "searching": "Recherche en cours...",
     "search_failed": "Echec de la recherche : {e}",
-    "ollama_status": "Statut Ollama",
+    "litellm_status": "Statut LiteLLM",
     "trending": "Tendances",
     "recent_searches": "Recherches recentes"
   },
@@ -144,6 +144,16 @@
     "email_address": "Adresse e-mail",
     "email_placeholder": "collegue@entreprise.com",
     "send_invite": "Envoyer l'invitation",
+    "total_spend": "Depenses totales",
+    "total_tokens": "Tokens totaux",
+    "model_usage": "Utilisation par modele",
+    "model": "Modele",
+    "tokens": "Tokens",
+    "spend": "Depenses",
+    "usage_unavailable": "Donnees d'utilisation indisponibles",
+    "loading_usage": "Chargement des donnees d'utilisation...",
+    "prompt_tokens": "Tokens d'entree",
+    "completion_tokens": "Tokens de reponse",
     "pricing_title": "Tarifs",
     "pricing_subtitle": "Choisissez le plan adapte a votre organisation"
   },
diff --git a/assets/i18n/pt.json b/assets/i18n/pt.json
index 5eeb480..d930bf9 100644
--- a/assets/i18n/pt.json
+++ b/assets/i18n/pt.json
@@ -58,15 +58,15 @@
     "title": "Painel",
     "subtitle": "Noticias e atualizacoes de IA",
     "topic_placeholder": "Nome do topico...",
-    "ollama_settings": "Definicoes do Ollama",
-    "settings_hint": "Deixe vazio para usar OLLAMA_URL / OLLAMA_MODEL do .env",
-    "ollama_url": "URL do Ollama",
-    "ollama_url_placeholder": "Utiliza OLLAMA_URL do .env",
+    "litellm_settings": "Definicoes do LiteLLM",
+    "settings_hint": "Deixe vazio para usar LITELLM_URL / LITELLM_MODEL do .env",
+    "litellm_url": "URL do LiteLLM",
+    "litellm_url_placeholder": "Utiliza LITELLM_URL do .env",
     "model": "Modelo",
-    "model_placeholder": "Utiliza OLLAMA_MODEL do .env",
+    "model_placeholder": "Utiliza LITELLM_MODEL do .env",
     "searching": "A pesquisar...",
     "search_failed": "A pesquisa falhou: {e}",
-    "ollama_status": "Estado do Ollama",
+    "litellm_status": "Estado do LiteLLM",
     "trending": "Em destaque",
     "recent_searches": "Pesquisas recentes"
   },
@@ -144,6 +144,16 @@
     "email_address": "Endereco de Email",
     "email_placeholder": "colleague@company.com",
     "send_invite": "Enviar Convite",
+    "total_spend": "Gasto total",
+    "total_tokens": "Tokens totais",
+    "model_usage": "Uso por modelo",
+    "model": "Modelo",
+    "tokens": "Tokens",
+    "spend": "Gasto",
+    "usage_unavailable": "Dados de uso indisponiveis",
+    "loading_usage": "Carregando dados de uso...",
+    "prompt_tokens": "Tokens de entrada",
+    "completion_tokens": "Tokens de resposta",
     "pricing_title": "Precos",
     "pricing_subtitle": "Escolha o plano adequado a sua organizacao"
   },
diff --git a/assets/tailwind.css b/assets/tailwind.css
index c7c9fdd..89ec9cf 100644
--- a/assets/tailwind.css
+++ b/assets/tailwind.css
@@ -1,4 +1,4 @@
-/*! tailwindcss v4.2.0 | MIT License | https://tailwindcss.com */
+/*! tailwindcss v4.2.1 | MIT License | https://tailwindcss.com */
 @layer properties;
 @layer theme, base, components, utilities;
 @layer theme {
@@ -162,59 +162,6 @@
   }
 }
 @layer utilities {
-  .diff {
-    @layer daisyui.l1.l2.l3 {
-      position: relative;
-      display: grid;
-      width: 100%;
-      overflow: hidden;
-      webkit-user-select: none;
-      user-select: none;
-      grid-template-rows: 1fr 1.8rem 1fr;
-      direction: ltr;
-      container-type: inline-size;
-      grid-template-columns: auto 1fr;
-      &:focus-visible, &:has(.diff-item-1:focus-visible) {
-        outline-style: var(--tw-outline-style);
-        outline-width: 2px;
-        outline-offset: 1px;
-        outline-color: var(--color-base-content);
-      }
-      &:focus-visible {
-        outline-style: var(--tw-outline-style);
-        outline-width: 2px;
-        outline-offset: 1px;
-        outline-color: var(--color-base-content);
-        .diff-resizer {
-          min-width: 95cqi;
-          max-width: 95cqi;
-        }
-      }
-      &:has(.diff-item-1:focus-visible) {
-        outline-style: var(--tw-outline-style);
-        outline-width: 2px;
-        outline-offset: 1px;
-        .diff-resizer {
-          min-width: 5cqi;
-          max-width: 5cqi;
-        }
-      }
-      @supports (-webkit-overflow-scrolling: touch) and (overflow: -webkit-paged-x) {
-        &:focus {
-          .diff-resizer {
-            min-width: 5cqi;
-            max-width: 5cqi;
-          }
-        }
-        &:has(.diff-item-1:focus) {
-          .diff-resizer {
-            min-width: 95cqi;
-            max-width: 95cqi;
-          }
-        }
-      }
-    }
-  }
   .modal {
     @layer daisyui.l1.l2.l3 {
       pointer-events: none;
@@ -1110,31 +1057,98 @@
       }
     }
   }
-  .chat-bubble {
+  .range {
     @layer daisyui.l1.l2.l3 {
-      position: relative;
-      display: block;
-      width: fit-content;
-      border-radius: var(--radius-field);
-      background-color: var(--color-base-300);
-      padding-inline: calc(0.25rem * 4);
-      padding-block: calc(0.25rem * 2);
-      color: var(--color-base-content);
-      grid-row-end: 3;
-      min-height: 2rem;
-      min-width: 2.5rem;
-      max-width: 90%;
-      &:before {
-        position: absolute;
-        bottom: calc(0.25rem * 0);
-        height: calc(0.25rem * 3);
-        width: calc(0.25rem * 3);
-        background-color: inherit;
-        content: "";
-        mask-repeat: no-repeat;
-        mask-image: var(--mask-chat);
-        mask-position: 0px -1px;
-        mask-size: 0.8125rem;
+      appearance: none;
+      webkit-appearance: none;
+      --range-thumb: var(--color-base-100);
+      --range-thumb-size: calc(var(--size-selector, 0.25rem) * 6);
+      --range-progress: currentColor;
+      --range-fill: 1;
+      --range-p: 0.25rem;
+      --range-bg: currentColor;
+      @supports (color: color-mix(in lab, red, red)) {
+        --range-bg: color-mix(in oklab, currentColor 10%, #0000);
+      }
+      cursor: pointer;
+      overflow: hidden;
+      background-color: transparent;
+      vertical-align: middle;
+      width: clamp(3rem, 20rem, 100%);
+      --radius-selector-max: calc(
+      var(--radius-selector) + var(--radius-selector) + var(--radius-selector)
+    );
+      border-radius: calc(var(--radius-selector) + min(var(--range-p), var(--radius-selector-max)));
+      border: none;
+      height: var(--range-thumb-size);
+      [dir="rtl"] & {
+        --range-dir: -1;
+      }
+      &:focus {
+        outline: none;
+      }
+      &:focus-visible {
+        outline: 2px solid;
+        outline-offset: 2px;
+      }
+      &::-webkit-slider-runnable-track {
+        width: 100%;
+        background-color: var(--range-bg);
+        border-radius: var(--radius-selector);
+        height: calc(var(--range-thumb-size) * 0.5);
+      }
+      @media (forced-colors: active) {
+        &::-webkit-slider-runnable-track {
+          border: 1px solid;
+        }
+      }
+      @media (forced-colors: active) {
+        &::-moz-range-track {
+          border: 1px solid;
+        }
+      }
+      &::-webkit-slider-thumb {
+        position: relative;
+        box-sizing: border-box;
+        border-radius: calc(var(--radius-selector) + min(var(--range-p), var(--radius-selector-max)));
+        background-color: var(--range-thumb);
+        height: var(--range-thumb-size);
+        width: var(--range-thumb-size);
+        border: var(--range-p) solid;
+        appearance: none;
+        webkit-appearance: none;
+        top: 50%;
+        color: var(--range-progress);
+        transform: translateY(-50%);
+        box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px currentColor, 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill));
+        @supports (color: color-mix(in lab, red, red)) {
+          box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px color-mix(in oklab, currentColor calc(var(--depth) * 10%), #0000), 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill));
+        }
+      }
+      &::-moz-range-track {
+        width: 100%;
+        background-color: var(--range-bg);
+        border-radius: var(--radius-selector);
+        height: calc(var(--range-thumb-size) * 0.5);
+      }
+      &::-moz-range-thumb {
+        position: relative;
+        box-sizing: border-box;
+        border-radius: calc(var(--radius-selector) + min(var(--range-p), var(--radius-selector-max)));
+        background-color: currentColor;
+        height: var(--range-thumb-size);
+        width: var(--range-thumb-size);
+        border: var(--range-p) solid;
+        top: 50%;
+        color: var(--range-progress);
+        box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px currentColor, 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill));
+        @supports (color: color-mix(in lab, red, red)) {
+          box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px color-mix(in oklab, currentColor calc(var(--depth) * 10%), #0000), 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill));
+        }
+      }
+      &:disabled {
+        cursor: not-allowed;
+        opacity: 30%;
       }
     }
   }
@@ -1525,81 +1539,6 @@
       padding: calc(0.25rem * 4);
     }
   }
-  .textarea {
-    @layer daisyui.l1.l2.l3 {
-      border: var(--border) solid #0000;
-      min-height: calc(0.25rem * 20);
-      flex-shrink: 1;
-      appearance: none;
-      border-radius: var(--radius-field);
-      background-color: var(--color-base-100);
-      padding-block: calc(0.25rem * 2);
-      vertical-align: middle;
-      width: clamp(3rem, 20rem, 100%);
-      padding-inline-start: 0.75rem;
-      padding-inline-end: 0.75rem;
-      font-size: max(var(--font-size, 0.875rem), 0.875rem);
-      touch-action: manipulation;
-      border-color: var(--input-color);
-      box-shadow: 0 1px var(--input-color) inset, 0 -1px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset;
-      @supports (color: color-mix(in lab, red, red)) {
-        box-shadow: 0 1px color-mix(in oklab, var(--input-color) calc(var(--depth) * 10%), #0000) inset, 0 -1px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset;
-      }
-      --input-color: var(--color-base-content);
-      @supports (color: color-mix(in lab, red, red)) {
-        --input-color: color-mix(in oklab, var(--color-base-content) 20%, #0000);
-      }
-      textarea {
-        appearance: none;
-        background-color: transparent;
-        border: none;
-        &:focus, &:focus-within {
-          --tw-outline-style: none;
-          outline-style: none;
-          @media (forced-colors: active) {
-            outline: 2px solid transparent;
-            outline-offset: 2px;
-          }
-        }
-      }
-      &:focus, &:focus-within {
-        --input-color: var(--color-base-content);
-        box-shadow: 0 1px var(--input-color);
-        @supports (color: color-mix(in lab, red, red)) {
-          box-shadow: 0 1px color-mix(in oklab, var(--input-color) calc(var(--depth) * 10%), #0000);
-        }
-        outline: 2px solid var(--input-color);
-        outline-offset: 2px;
-        isolation: isolate;
-      }
-      @media (pointer: coarse) {
-        @supports (-webkit-touch-callout: none) {
-          &:focus, &:focus-within {
-            --font-size: 1rem;
-          }
-        }
-      }
-      &:has(> textarea[disabled]), &:is(:disabled, [disabled]) {
-        cursor: not-allowed;
-        border-color: var(--color-base-200);
-        background-color: var(--color-base-200);
-        color: var(--color-base-content);
-        @supports (color: color-mix(in lab, red, red)) {
-          color: color-mix(in oklab, var(--color-base-content) 40%, transparent);
-        }
-        &::placeholder {
-          color: var(--color-base-content);
-          @supports (color: color-mix(in lab, red, red)) {
-            color: color-mix(in oklab, var(--color-base-content) 20%, transparent);
-          }
-        }
-        box-shadow: none;
-      }
-      &:has(> textarea[disabled]) > textarea[disabled] {
-        cursor: not-allowed;
-      }
-    }
-  }
   .stack {
     @layer daisyui.l1.l2.l3 {
       display: inline-grid;
diff --git a/docker-compose.yml b/docker-compose.yml
index 3f7b1e3..58fafe9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -55,6 +55,8 @@ services:
       mongo:
         condition: service_started
     environment:
+      # LiteLLM API key (used by librechat.yaml endpoint config)
+      LITELLM_API_KEY: ${LITELLM_API_KEY:-}
       # MongoDB (use localhost since we're on host network)
       MONGO_URI: mongodb://root:example@localhost:27017/librechat?authSource=admin
       DOMAIN_CLIENT: http://localhost:3080
@@ -70,7 +72,6 @@ services:
       OPENID_CALLBACK_URL: /oauth/openid/callback
       OPENID_SCOPE: openid profile email
       OPENID_BUTTON_LABEL: Login with CERTifAI
-      OPENID_AUTH_EXTRA_PARAMS: prompt=none
       # Disable local auth (SSO only)
       ALLOW_EMAIL_LOGIN: "false"
       ALLOW_REGISTRATION: "false"
diff --git a/librechat/librechat.yaml b/librechat/librechat.yaml
index 7ba5233..8c09fc5 100644
--- a/librechat/librechat.yaml
+++ b/librechat/librechat.yaml
@@ -1,5 +1,5 @@
 # CERTifAI LibreChat Configuration
-# Ollama backend for self-hosted LLM inference.
+# LiteLLM proxy for unified multi-provider LLM access.
 version: 1.2.8
 
 cache: true
@@ -19,22 +19,16 @@ interface:
 
 endpoints:
   custom:
-    - name: "Ollama"
-      apiKey: "ollama"
-      baseURL: "https://mac-mini-von-benjamin-2:11434/v1/"
+    - name: "LiteLLM"
+      apiKey: "${LITELLM_API_KEY}"
+      baseURL: "https://llm-dev.meghsakha.com/v1/"
       models:
         default:
-          - "llama3.1:8b"
-          - "qwen3:30b-a3b"
+          - "Qwen3-Coder-30B-A3B-Instruct"
         fetch: true
       titleConvo: true
       titleModel: "current_model"
       summarize: false
       summaryModel: "current_model"
       forcePrompt: false
-      modelDisplayLabel: "CERTifAI Ollama"
-      dropParams:
-        - stop
-        - user
-        - frequency_penalty
-        - presence_penalty
+      modelDisplayLabel: "CERTifAI LiteLLM"
diff --git a/src/components/dashboard_sidebar.rs b/src/components/dashboard_sidebar.rs
index 0623dfb..d89ab56 100644
--- a/src/components/dashboard_sidebar.rs
+++ b/src/components/dashboard_sidebar.rs
@@ -1,9 +1,9 @@
 use dioxus::prelude::*;
 
 use crate::i18n::{t, Locale};
-use crate::infrastructure::ollama::{get_ollama_status, OllamaStatus};
+use crate::infrastructure::litellm::{get_litellm_status, LitellmStatus};
 
-/// Right sidebar for the dashboard, showing Ollama status, trending topics,
+/// Right sidebar for the dashboard, showing LiteLLM status, trending topics,
 /// and recent search history.
 ///
 /// Appears when no article card is selected. Disappears when the user opens
@@ -11,13 +11,13 @@ use crate::infrastructure::ollama::{get_ollama_status, OllamaStatus};
 ///
 /// # Props
 ///
-/// * `ollama_url` - Ollama instance URL for status polling
+/// * `litellm_url` - LiteLLM proxy URL for status polling
 /// * `trending` - Trending topic keywords extracted from recent news headlines
 /// * `recent_searches` - Recent search topics stored in localStorage
 /// * `on_topic_click` - Fires when a trending or recent topic is clicked
 #[component]
 pub fn DashboardSidebar(
-    ollama_url: String,
+    litellm_url: String,
     trending: Vec<String>,
     recent_searches: Vec<String>,
     on_topic_click: EventHandler<String>,
@@ -25,26 +25,26 @@ pub fn DashboardSidebar(
     let locale = use_context::<Signal<Locale>>();
     let l = *locale.read();
 
-    // Fetch Ollama status once on mount.
+    // Fetch LiteLLM status once on mount.
     // use_resource with no signal dependencies runs exactly once and
     // won't re-fire on parent re-renders (unlike use_effect).
-    let url = ollama_url.clone();
+    let url = litellm_url.clone();
     let status_resource = use_resource(move || {
         let u = url.clone();
         async move {
-            get_ollama_status(u).await.unwrap_or(OllamaStatus {
+            get_litellm_status(u).await.unwrap_or(LitellmStatus {
                 online: false,
                 models: Vec::new(),
             })
         }
     });
 
-    let current_status: OllamaStatus =
+    let current_status: LitellmStatus =
         status_resource
             .read()
             .as_ref()
             .cloned()
-            .unwrap_or(OllamaStatus {
+            .unwrap_or(LitellmStatus {
                 online: false,
                 models: Vec::new(),
             });
@@ -52,9 +52,9 @@ pub fn DashboardSidebar(
     rsx! {
         aside { class: "dashboard-sidebar",
 
-            // -- Ollama Status Section --
+            // -- LiteLLM Status Section --
             div { class: "sidebar-section",
-                h4 { class: "sidebar-section-title", "{t(l, \"dashboard.ollama_status\")}" }
+                h4 { class: "sidebar-section-title", "{t(l, \"dashboard.litellm_status\")}" }
                 div { class: "sidebar-status-row",
                     span { class: if current_status.online { "sidebar-status-dot sidebar-status-dot--online" } else { "sidebar-status-dot sidebar-status-dot--offline" } }
                     span { class: "sidebar-status-label",
diff --git a/src/components/news_card.rs b/src/components/news_card.rs
index 3eeba56..1e328a2 100644
--- a/src/components/news_card.rs
+++ b/src/components/news_card.rs
@@ -112,12 +112,12 @@ pub fn mock_news() -> Vec<NewsCardModel> {
             published_at: "2026-02-16".into(),
         },
         NewsCardModel {
-            title: "Ollama Adds Multi-GPU Scheduling".into(),
-            source: "Ollama".into(),
-            summary: "Run large models across multiple GPUs with automatic sharding.".into(),
-            content: "Ollama now supports multi-GPU scheduling with automatic \
-                model sharding. Users can run models across multiple GPUs \
-                for improved inference performance."
+            title: "LiteLLM Adds Multi-Provider Routing".into(),
+            source: "LiteLLM".into(),
+            summary: "Route requests across multiple LLM providers with automatic fallback.".into(),
+            content: "LiteLLM now supports multi-provider routing with automatic \
+                fallback. Users can route requests across multiple providers \
+                for improved reliability and cost optimization."
                 .into(),
             category: "Infrastructure".into(),
             url: "#".into(),
diff --git a/src/infrastructure/chat.rs b/src/infrastructure/chat.rs
index 5b5e99a..983bdd8 100644
--- a/src/infrastructure/chat.rs
+++ b/src/infrastructure/chat.rs
@@ -134,7 +134,7 @@ pub async fn list_chat_sessions() -> Result<Vec<ChatSession>, ServerFnError> {
 ///
 /// * `title` - Display title for the session
 /// * `namespace` - Namespace string: `"General"` or `"News"`
-/// * `provider` - LLM provider name (e.g. "ollama")
+/// * `provider` - LLM provider name (e.g. "litellm")
 /// * `model` - Model ID (e.g. "llama3.1:8b")
 /// * `article_url` - Source article URL (only for `News` namespace, empty if none)
 ///
@@ -441,8 +441,8 @@ pub async fn chat_complete(
 
     // Resolve provider URL and model
     let (base_url, model) = resolve_provider_url(
-        &state.services.ollama_url,
-        &state.services.ollama_model,
+        &state.services.litellm_url,
+        &state.services.litellm_model,
         &session.provider,
         &session.model,
     );
@@ -485,22 +485,22 @@ pub async fn chat_complete(
         .ok_or_else(|| ServerFnError::new("empty LLM response"))
 }
 
-/// Resolve the base URL for a provider, falling back to Ollama defaults.
+/// Resolve the base URL for a provider, falling back to LiteLLM defaults.
 ///
 /// # Arguments
 ///
-/// * `ollama_url` - Default Ollama base URL from config
-/// * `ollama_model` - Default Ollama model from config
+/// * `litellm_url` - Default LiteLLM base URL from config
+/// * `litellm_model` - Default LiteLLM model from config
 /// * `provider` - Provider name (e.g. "openai", "anthropic", "huggingface")
-/// * `model` - Model ID (may be empty for Ollama default)
+/// * `model` - Model ID (may be empty for LiteLLM default)
 ///
 /// # Returns
 ///
 /// A `(base_url, model)` tuple resolved for the given provider.
 #[cfg(feature = "server")]
 pub(crate) fn resolve_provider_url(
-    ollama_url: &str,
-    ollama_model: &str,
+    litellm_url: &str,
+    litellm_model: &str,
     provider: &str,
     model: &str,
 ) -> (String, String) {
@@ -511,11 +511,11 @@ pub(crate) fn resolve_provider_url(
             format!("https://api-inference.huggingface.co/models/{}", model),
             model.to_string(),
         ),
-        // Default to Ollama
+        // Default to LiteLLM
         _ => (
-            ollama_url.to_string(),
+            litellm_url.to_string(),
             if model.is_empty() {
-                ollama_model.to_string()
+                litellm_model.to_string()
             } else {
                 model.to_string()
             },
@@ -595,7 +595,7 @@ mod tests {
                 "_id": oid,
                 "user_sub": "u",
                 "title": "t",
-                "provider": "ollama",
+                "provider": "litellm",
                 "model": "m",
                 "created_at": "c",
                 "updated_at": "u",
@@ -612,7 +612,7 @@ mod tests {
                 "user_sub": "u",
                 "title": "t",
                 "namespace": "News",
-                "provider": "ollama",
+                "provider": "litellm",
                 "model": "m",
                 "created_at": "c",
                 "updated_at": "u",
@@ -684,13 +684,13 @@ mod tests {
 
         // -- resolve_provider_url --
 
-        const TEST_OLLAMA_URL: &str = "http://localhost:11434";
-        const TEST_OLLAMA_MODEL: &str = "llama3.1:8b";
+        const TEST_LITELLM_URL: &str = "http://localhost:4000";
+        const TEST_LITELLM_MODEL: &str = "qwen3-32b";
 
         #[test]
         fn resolve_openai_returns_api_openai() {
             let (url, model) =
-                resolve_provider_url(TEST_OLLAMA_URL, TEST_OLLAMA_MODEL, "openai", "gpt-4o");
+                resolve_provider_url(TEST_LITELLM_URL, TEST_LITELLM_MODEL, "openai", "gpt-4o");
             assert_eq!(url, "https://api.openai.com");
             assert_eq!(model, "gpt-4o");
         }
@@ -698,8 +698,8 @@ mod tests {
         #[test]
         fn resolve_anthropic_returns_api_anthropic() {
             let (url, model) = resolve_provider_url(
-                TEST_OLLAMA_URL,
-                TEST_OLLAMA_MODEL,
+                TEST_LITELLM_URL,
+                TEST_LITELLM_MODEL,
                 "anthropic",
                 "claude-3-opus",
             );
@@ -710,8 +710,8 @@ mod tests {
         #[test]
         fn resolve_huggingface_returns_model_url() {
             let (url, model) = resolve_provider_url(
-                TEST_OLLAMA_URL,
-                TEST_OLLAMA_MODEL,
+                TEST_LITELLM_URL,
+                TEST_LITELLM_MODEL,
                 "huggingface",
                 "meta-llama/Llama-2-7b",
             );
@@ -723,19 +723,19 @@ mod tests {
         }
 
         #[test]
-        fn resolve_unknown_defaults_to_ollama() {
+        fn resolve_unknown_defaults_to_litellm() {
             let (url, model) =
-                resolve_provider_url(TEST_OLLAMA_URL, TEST_OLLAMA_MODEL, "ollama", "mistral:7b");
-            assert_eq!(url, TEST_OLLAMA_URL);
-            assert_eq!(model, "mistral:7b");
+                resolve_provider_url(TEST_LITELLM_URL, TEST_LITELLM_MODEL, "litellm", "qwen3-32b");
+            assert_eq!(url, TEST_LITELLM_URL);
+            assert_eq!(model, "qwen3-32b");
         }
 
         #[test]
         fn resolve_empty_model_falls_back_to_server_default() {
             let (url, model) =
-                resolve_provider_url(TEST_OLLAMA_URL, TEST_OLLAMA_MODEL, "ollama", "");
-            assert_eq!(url, TEST_OLLAMA_URL);
-            assert_eq!(model, TEST_OLLAMA_MODEL);
+                resolve_provider_url(TEST_LITELLM_URL, TEST_LITELLM_MODEL, "litellm", "");
+            assert_eq!(url, TEST_LITELLM_URL);
+            assert_eq!(model, TEST_LITELLM_MODEL);
         }
     }
 }
diff --git a/src/infrastructure/config.rs b/src/infrastructure/config.rs
index 23128fc..8f82d9e 100644
--- a/src/infrastructure/config.rs
+++ b/src/infrastructure/config.rs
@@ -141,13 +141,15 @@ impl SmtpConfig {
 // ServiceUrls
 // ---------------------------------------------------------------------------
 
-/// URLs and credentials for external services (Ollama, SearXNG, S3, etc.).
+/// URLs and credentials for external services (LiteLLM, SearXNG, S3, etc.).
 #[derive(Debug)]
 pub struct ServiceUrls {
-    /// Ollama LLM instance base URL.
-    pub ollama_url: String,
-    /// Default Ollama model to use.
-    pub ollama_model: String,
+    /// LiteLLM proxy base URL.
+    pub litellm_url: String,
+    /// Default LiteLLM model to use.
+    pub litellm_model: String,
+    /// LiteLLM API key for authenticated requests.
+    pub litellm_api_key: String,
     /// SearXNG meta-search engine base URL.
     pub searxng_url: String,
     /// LangChain service URL.
@@ -178,9 +180,10 @@ impl ServiceUrls {
     /// Currently infallible but returns `Result` for consistency.
     pub fn from_env() -> Result<Self, Error> {
         Ok(Self {
-            ollama_url: std::env::var("OLLAMA_URL")
-                .unwrap_or_else(|_| "http://localhost:11434".into()),
-            ollama_model: std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama3.1:8b".into()),
+            litellm_url: std::env::var("LITELLM_URL")
+                .unwrap_or_else(|_| "http://localhost:4000".into()),
+            litellm_model: std::env::var("LITELLM_MODEL").unwrap_or_else(|_| "qwen3-32b".into()),
+            litellm_api_key: optional_env("LITELLM_API_KEY"),
             searxng_url: std::env::var("SEARXNG_URL")
                 .unwrap_or_else(|_| "http://localhost:8888".into()),
             langchain_url: optional_env("LANGCHAIN_URL"),
@@ -231,7 +234,7 @@ impl StripeConfig {
 
 /// Comma-separated list of enabled LLM provider identifiers.
 ///
-/// For example: `LLM_PROVIDERS=ollama,openai,anthropic`
+/// For example: `LLM_PROVIDERS=litellm,openai,anthropic`
 #[derive(Debug)]
 pub struct LlmProvidersConfig {
     /// Parsed provider names.
@@ -331,36 +334,36 @@ mod tests {
     #[test]
     #[serial]
     fn llm_providers_single() {
-        std::env::set_var("LLM_PROVIDERS", "ollama");
+        std::env::set_var("LLM_PROVIDERS", "litellm");
         let cfg = LlmProvidersConfig::from_env().unwrap();
-        assert_eq!(cfg.providers, vec!["ollama"]);
+        assert_eq!(cfg.providers, vec!["litellm"]);
         std::env::remove_var("LLM_PROVIDERS");
     }
 
     #[test]
     #[serial]
     fn llm_providers_multiple() {
-        std::env::set_var("LLM_PROVIDERS", "ollama,openai,anthropic");
+        std::env::set_var("LLM_PROVIDERS", "litellm,openai,anthropic");
         let cfg = LlmProvidersConfig::from_env().unwrap();
-        assert_eq!(cfg.providers, vec!["ollama", "openai", "anthropic"]);
+        assert_eq!(cfg.providers, vec!["litellm", "openai", "anthropic"]);
         std::env::remove_var("LLM_PROVIDERS");
     }
 
     #[test]
     #[serial]
     fn llm_providers_trims_whitespace() {
-        std::env::set_var("LLM_PROVIDERS", " ollama , openai ");
+        std::env::set_var("LLM_PROVIDERS", " litellm , openai ");
         let cfg = LlmProvidersConfig::from_env().unwrap();
-        assert_eq!(cfg.providers, vec!["ollama", "openai"]);
+        assert_eq!(cfg.providers, vec!["litellm", "openai"]);
         std::env::remove_var("LLM_PROVIDERS");
     }
 
     #[test]
     #[serial]
     fn llm_providers_filters_empty_entries() {
-        std::env::set_var("LLM_PROVIDERS", "ollama,,openai,");
+        std::env::set_var("LLM_PROVIDERS", "litellm,,openai,");
         let cfg = LlmProvidersConfig::from_env().unwrap();
-        assert_eq!(cfg.providers, vec!["ollama", "openai"]);
+        assert_eq!(cfg.providers, vec!["litellm", "openai"]);
         std::env::remove_var("LLM_PROVIDERS");
     }
 
@@ -370,18 +373,18 @@ mod tests {
 
     #[test]
     #[serial]
-    fn service_urls_default_ollama_url() {
-        std::env::remove_var("OLLAMA_URL");
+    fn service_urls_default_litellm_url() {
+        std::env::remove_var("LITELLM_URL");
         let svc = ServiceUrls::from_env().unwrap();
-        assert_eq!(svc.ollama_url, "http://localhost:11434");
+        assert_eq!(svc.litellm_url, "http://localhost:4000");
     }
 
     #[test]
     #[serial]
-    fn service_urls_default_ollama_model() {
-        std::env::remove_var("OLLAMA_MODEL");
+    fn service_urls_default_litellm_model() {
+        std::env::remove_var("LITELLM_MODEL");
         let svc = ServiceUrls::from_env().unwrap();
-        assert_eq!(svc.ollama_model, "llama3.1:8b");
+        assert_eq!(svc.litellm_model, "qwen3-32b");
     }
 
     #[test]
@@ -394,11 +397,11 @@ mod tests {
 
     #[test]
     #[serial]
-    fn service_urls_custom_ollama_url() {
-        std::env::set_var("OLLAMA_URL", "http://gpu-host:11434");
+    fn service_urls_custom_litellm_url() {
+        std::env::set_var("LITELLM_URL", "http://litellm-host:4000");
         let svc = ServiceUrls::from_env().unwrap();
-        assert_eq!(svc.ollama_url, "http://gpu-host:11434");
-        std::env::remove_var("OLLAMA_URL");
+        assert_eq!(svc.litellm_url, "http://litellm-host:4000");
+        std::env::remove_var("LITELLM_URL");
     }
 
     #[test]
diff --git a/src/infrastructure/litellm.rs b/src/infrastructure/litellm.rs
new file mode 100644
index 0000000..af367b1
--- /dev/null
+++ b/src/infrastructure/litellm.rs
@@ -0,0 +1,403 @@
+#[cfg(feature = "server")]
+use std::collections::HashMap;
+
+use dioxus::prelude::*;
+use serde::{Deserialize, Serialize};
+
+use crate::models::LitellmUsageStats;
+#[cfg(feature = "server")]
+use crate::models::ModelUsage;
+
+/// Status of a LiteLLM proxy instance, including connectivity and available models.
+///
+/// # Fields
+///
+/// * `online` - Whether the LiteLLM API responded successfully
+/// * `models` - List of model IDs available through the proxy
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct LitellmStatus {
+    pub online: bool,
+    pub models: Vec<String>,
+}
+
+/// Response from LiteLLM's `GET /v1/models` endpoint (OpenAI-compatible).
+#[cfg(feature = "server")]
+#[derive(Deserialize)]
+struct ModelsResponse {
+    data: Vec<ModelObject>,
+}
+
+/// A single model entry from the OpenAI-compatible models list.
+#[cfg(feature = "server")]
+#[derive(Deserialize)]
+struct ModelObject {
+    id: String,
+}
+
+/// Check the status of a LiteLLM proxy by querying its models endpoint.
+///
+/// Calls `GET <litellm_url>/v1/models` to list available models and determine
+/// whether the instance is reachable. Sends the API key as a Bearer token
+/// if configured.
+///
+/// # Arguments
+///
+/// * `litellm_url` - Base URL of the LiteLLM proxy (e.g. "http://localhost:4000")
+///
+/// # Returns
+///
+/// A `LitellmStatus` with `online: true` and model IDs if reachable,
+/// or `online: false` with an empty model list on failure
+///
+/// # Errors
+///
+/// Returns `ServerFnError` only on serialization issues; network failures
+/// are caught and returned as `online: false`
+#[post("/api/litellm-status")]
+pub async fn get_litellm_status(litellm_url: String) -> Result<LitellmStatus, ServerFnError> {
+    let state: crate::infrastructure::ServerState =
+        dioxus_fullstack::FullstackContext::extract().await?;
+
+    let base_url = if litellm_url.is_empty() {
+        state.services.litellm_url.clone()
+    } else {
+        litellm_url
+    };
+
+    let api_key = state.services.litellm_api_key.clone();
+    let url = format!("{}/v1/models", base_url.trim_end_matches('/'));
+
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(5))
+        .build()
+        .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?;
+
+    let mut request = client.get(&url);
+    if !api_key.is_empty() {
+        request = request.header("Authorization", format!("Bearer {api_key}"));
+    }
+
+    let resp = match request.send().await {
+        Ok(r) if r.status().is_success() => r,
+        _ => {
+            return Ok(LitellmStatus {
+                online: false,
+                models: Vec::new(),
+            });
+        }
+    };
+
+    let body: ModelsResponse = match resp.json().await {
+        Ok(b) => b,
+        Err(_) => {
+            return Ok(LitellmStatus {
+                online: true,
+                models: Vec::new(),
+            });
+        }
+    };
+
+    let models = body.data.into_iter().map(|m| m.id).collect();
+
+    Ok(LitellmStatus {
+        online: true,
+        models,
+    })
+}
+
+/// Response from LiteLLM's `GET /global/activity` endpoint.
+///
+/// Returns aggregate token counts and API request totals for a date range.
+/// Available on the free tier (no Enterprise license needed).
+#[cfg(feature = "server")]
+#[derive(Debug, Deserialize)]
+struct ActivityResponse {
+    /// Total tokens across all models in the date range
+    #[serde(default)]
+    sum_total_tokens: u64,
+}
+
+/// Per-model entry from `GET /global/activity/model`.
+///
+/// Each entry contains a model name and its aggregated token total.
+#[cfg(feature = "server")]
+#[derive(Debug, Deserialize)]
+struct ActivityModelEntry {
+    /// Model identifier (may be empty for unattributed traffic)
+    #[serde(default)]
+    model: String,
+    /// Sum of tokens used by this model in the date range
+    #[serde(default)]
+    sum_total_tokens: u64,
+}
+
+/// Per-model spend entry from `GET /global/spend/models`.
+///
+/// Each entry maps a model name to its total spend in USD.
+#[cfg(feature = "server")]
+#[derive(Debug, Deserialize)]
+struct SpendModelEntry {
+    /// Model identifier
+    #[serde(default)]
+    model: String,
+    /// Total spend in USD
+    #[serde(default)]
+    total_spend: f64,
+}
+
+/// Merge per-model token counts and spend data into `ModelUsage` entries.
+///
+/// Joins `activity_models` (tokens) and `spend_models` (spend) by model
+/// name using a HashMap for O(n + m) merge. Entries with empty model
+/// names are skipped.
+///
+/// # Arguments
+///
+/// * `activity_models` - Per-model token data from `/global/activity/model`
+/// * `spend_models` - Per-model spend data from `/global/spend/models`
+///
+/// # Returns
+///
+/// Merged list sorted by total tokens descending
+#[cfg(feature = "server")]
+fn merge_model_data(
+    activity_models: Vec<ActivityModelEntry>,
+    spend_models: Vec<SpendModelEntry>,
+) -> Vec<ModelUsage> {
+    let mut model_map: HashMap<String, ModelUsage> = HashMap::new();
+
+    for entry in activity_models {
+        if entry.model.is_empty() {
+            continue;
+        }
+        model_map
+            .entry(entry.model.clone())
+            .or_insert_with(|| ModelUsage {
+                model: entry.model,
+                ..Default::default()
+            })
+            .total_tokens = entry.sum_total_tokens;
+    }
+
+    for entry in spend_models {
+        if entry.model.is_empty() {
+            continue;
+        }
+        model_map
+            .entry(entry.model.clone())
+            .or_insert_with(|| ModelUsage {
+                model: entry.model,
+                ..Default::default()
+            })
+            .spend = entry.total_spend;
+    }
+
+    let mut result: Vec<ModelUsage> = model_map.into_values().collect();
+    result.sort_by(|a, b| b.total_tokens.cmp(&a.total_tokens));
+    result
+}
+
+/// Fetch aggregated usage statistics from LiteLLM's free-tier APIs.
+///
+/// Combines three endpoints to build a complete usage picture:
+/// - `GET /global/activity` - total token counts
+/// - `GET /global/activity/model` - per-model token breakdown
+/// - `GET /global/spend/models` - per-model spend in USD
+///
+/// # Arguments
+///
+/// * `start_date` - Start of the reporting period in `YYYY-MM-DD` format
+/// * `end_date` - End of the reporting period in `YYYY-MM-DD` format
+///
+/// # Returns
+///
+/// Aggregated usage stats; returns default (zeroed) stats on network
+/// failure or permission errors
+///
+/// # Errors
+///
+/// Returns `ServerFnError` only on HTTP client construction failure
+#[post("/api/litellm-usage")]
+pub async fn get_litellm_usage(
+    start_date: String,
+    end_date: String,
+) -> Result<LitellmUsageStats, ServerFnError> {
+    let state: crate::infrastructure::ServerState =
+        dioxus_fullstack::FullstackContext::extract().await?;
+
+    let base_url = &state.services.litellm_url;
+    let api_key = &state.services.litellm_api_key;
+
+    if base_url.is_empty() {
+        return Ok(LitellmUsageStats::default());
+    }
+
+    let base = base_url.trim_end_matches('/');
+    let date_params = format!("start_date={start_date}&end_date={end_date}");
+
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(10))
+        .build()
+        .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?;
+
+    // Helper closure to build an authenticated GET request
+    let auth_get = |url: String| {
+        let mut req = client.get(url);
+        if !api_key.is_empty() {
+            req = req.header("Authorization", format!("Bearer {api_key}"));
+        }
+        req
+    };
+
+    // Fire all three requests concurrently to minimise latency
+    let (activity_res, model_activity_res, model_spend_res) = tokio::join!(
+        auth_get(format!("{base}/global/activity?{date_params}")).send(),
+        auth_get(format!("{base}/global/activity/model?{date_params}")).send(),
+        auth_get(format!("{base}/global/spend/models?{date_params}")).send(),
+    );
+
+    // Parse total token count from /global/activity
+    let total_tokens = match activity_res {
+        Ok(r) if r.status().is_success() => r
+            .json::<ActivityResponse>()
+            .await
+            .map(|a| a.sum_total_tokens)
+            .unwrap_or(0),
+        _ => 0,
+    };
+
+    // Parse per-model token breakdown from /global/activity/model
+    let activity_models: Vec<ActivityModelEntry> = match model_activity_res {
+        Ok(r) if r.status().is_success() => r.json().await.unwrap_or_default(),
+        _ => Vec::new(),
+    };
+
+    // Parse per-model spend from /global/spend/models
+    let spend_models: Vec<SpendModelEntry> = match model_spend_res {
+        Ok(r) if r.status().is_success() => r.json().await.unwrap_or_default(),
+        _ => Vec::new(),
+    };
+
+    let total_spend: f64 = spend_models.iter().map(|m| m.total_spend).sum();
+    let model_breakdown = merge_model_data(activity_models, spend_models);
+
+    Ok(LitellmUsageStats {
+        total_spend,
+        // Free-tier endpoints don't provide prompt/completion split;
+        // total_tokens comes from /global/activity.
+        total_prompt_tokens: 0,
+        total_completion_tokens: 0,
+        total_tokens,
+        model_breakdown,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn merge_empty_inputs() {
+        let result = merge_model_data(Vec::new(), Vec::new());
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn merge_activity_only() {
+        let activity = vec![ActivityModelEntry {
+            model: "gpt-4".into(),
+            sum_total_tokens: 1500,
+        }];
+        let result = merge_model_data(activity, Vec::new());
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].model, "gpt-4");
+        assert_eq!(result[0].total_tokens, 1500);
+        assert_eq!(result[0].spend, 0.0);
+    }
+
+    #[test]
+    fn merge_spend_only() {
+        let spend = vec![SpendModelEntry {
+            model: "gpt-4".into(),
+            total_spend: 2.5,
+        }];
+        let result = merge_model_data(Vec::new(), spend);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].model, "gpt-4");
+        assert_eq!(result[0].spend, 2.5);
+        assert_eq!(result[0].total_tokens, 0);
+    }
+
+    #[test]
+    fn merge_joins_by_model_name() {
+        let activity = vec![
+            ActivityModelEntry {
+                model: "gpt-4".into(),
+                sum_total_tokens: 5000,
+            },
+            ActivityModelEntry {
+                model: "claude-3".into(),
+                sum_total_tokens: 3000,
+            },
+        ];
+        let spend = vec![
+            SpendModelEntry {
+                model: "gpt-4".into(),
+                total_spend: 1.0,
+            },
+            SpendModelEntry {
+                model: "claude-3".into(),
+                total_spend: 0.5,
+            },
+        ];
+        let result = merge_model_data(activity, spend);
+        assert_eq!(result.len(), 2);
+        // Sorted by tokens descending: gpt-4 (5000) before claude-3 (3000)
+        assert_eq!(result[0].model, "gpt-4");
+        assert_eq!(result[0].total_tokens, 5000);
+        assert_eq!(result[0].spend, 1.0);
+        assert_eq!(result[1].model, "claude-3");
+        assert_eq!(result[1].total_tokens, 3000);
+        assert_eq!(result[1].spend, 0.5);
+    }
+
+    #[test]
+    fn merge_skips_empty_model_names() {
+        let activity = vec![
+            ActivityModelEntry {
+                model: "".into(),
+                sum_total_tokens: 100,
+            },
+            ActivityModelEntry {
+                model: "gpt-4".into(),
+                sum_total_tokens: 500,
+            },
+        ];
+        let spend = vec![SpendModelEntry {
+            model: "".into(),
+            total_spend: 0.01,
+        }];
+        let result = merge_model_data(activity, spend);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].model, "gpt-4");
+    }
+
+    #[test]
+    fn merge_unmatched_models_appear_in_both_directions() {
+        let activity = vec![ActivityModelEntry {
+            model: "tokens-only".into(),
+            sum_total_tokens: 1000,
+        }];
+        let spend = vec![SpendModelEntry {
+            model: "spend-only".into(),
+            total_spend: 0.5,
+        }];
+        let result = merge_model_data(activity, spend);
+        assert_eq!(result.len(), 2);
+        // tokens-only has 1000 tokens, spend-only has 0 tokens
+        assert_eq!(result[0].model, "tokens-only");
+        assert_eq!(result[0].total_tokens, 1000);
+        assert_eq!(result[1].model, "spend-only");
+        assert_eq!(result[1].spend, 0.5);
+    }
+}
diff --git a/src/infrastructure/llm.rs b/src/infrastructure/llm.rs
index b68e2ab..76ece50 100644
--- a/src/infrastructure/llm.rs
+++ b/src/infrastructure/llm.rs
@@ -4,23 +4,23 @@ use dioxus::prelude::*;
 mod inner {
     use serde::{Deserialize, Serialize};
 
-    /// A single message in the OpenAI-compatible chat format used by Ollama.
+    /// A single message in the OpenAI-compatible chat format used by LiteLLM.
     #[derive(Serialize)]
     pub(super) struct ChatMessage {
         pub role: String,
         pub content: String,
     }
 
-    /// Request body for Ollama's OpenAI-compatible chat completions endpoint.
+    /// Request body for the OpenAI-compatible chat completions endpoint.
     #[derive(Serialize)]
-    pub(super) struct OllamaChatRequest {
+    pub(super) struct ChatCompletionRequest {
         pub model: String,
         pub messages: Vec<ChatMessage>,
         /// Disable streaming so we get a single JSON response.
         pub stream: bool,
     }
 
-    /// A single choice in the Ollama chat completions response.
+    /// A single choice in the chat completions response.
     #[derive(Deserialize)]
     pub(super) struct ChatChoice {
         pub message: ChatResponseMessage,
@@ -32,9 +32,9 @@ mod inner {
         pub content: String,
     }
 
-    /// Top-level response from Ollama's `/v1/chat/completions` endpoint.
+    /// Top-level response from the `/v1/chat/completions` endpoint.
     #[derive(Deserialize)]
-    pub(super) struct OllamaChatResponse {
+    pub(super) struct ChatCompletionResponse {
         pub choices: Vec<ChatChoice>,
     }
 
@@ -157,7 +157,7 @@ mod inner {
     }
 }
 
-/// Summarize an article using a local Ollama instance.
+/// Summarize an article using a LiteLLM proxy.
 ///
 /// First attempts to fetch the full article text from the provided URL.
 /// If that fails (paywall, timeout, etc.), falls back to the search snippet.
@@ -167,8 +167,8 @@ mod inner {
 ///
 /// * `snippet` - The search result snippet (fallback content)
 /// * `article_url` - The original article URL to fetch full text from
-/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434")
-/// * `model` - The Ollama model ID to use (e.g. "llama3.1:8b")
+/// * `litellm_url` - Base URL of the LiteLLM proxy (e.g. "http://localhost:4000")
+/// * `model` - The model ID to use (e.g. "qwen3-32b")
 ///
 /// # Returns
 ///
@@ -176,36 +176,38 @@ mod inner {
 ///
 /// # Errors
 ///
-/// Returns `ServerFnError` if the Ollama request fails or response parsing fails
+/// Returns `ServerFnError` if the LiteLLM request fails or response parsing fails
 #[post("/api/summarize")]
 pub async fn summarize_article(
     snippet: String,
     article_url: String,
-    ollama_url: String,
+    litellm_url: String,
     model: String,
 ) -> Result<String, ServerFnError> {
-    use inner::{fetch_article_text, ChatMessage, OllamaChatRequest, OllamaChatResponse};
+    use inner::{fetch_article_text, ChatCompletionRequest, ChatCompletionResponse, ChatMessage};
 
     let state: crate::infrastructure::ServerState =
         dioxus_fullstack::FullstackContext::extract().await?;
 
     // Use caller-provided values or fall back to ServerState config
-    let base_url = if ollama_url.is_empty() {
-        state.services.ollama_url.clone()
+    let base_url = if litellm_url.is_empty() {
+        state.services.litellm_url.clone()
     } else {
-        ollama_url
+        litellm_url
     };
 
     let model = if model.is_empty() {
-        state.services.ollama_model.clone()
+        state.services.litellm_model.clone()
     } else {
         model
     };
 
+    let api_key = state.services.litellm_api_key.clone();
+
     // Try to fetch the full article; fall back to the search snippet
     let article_text = fetch_article_text(&article_url).await.unwrap_or(snippet);
 
-    let request_body = OllamaChatRequest {
+    let request_body = ChatCompletionRequest {
         model,
         stream: false,
         messages: vec![ChatMessage {
@@ -223,42 +225,48 @@ pub async fn summarize_article(
 
     let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
     let client = reqwest::Client::new();
-    let resp = client
+    let mut request = client
         .post(&url)
         .header("content-type", "application/json")
-        .json(&request_body)
+        .json(&request_body);
+
+    if !api_key.is_empty() {
+        request = request.header("Authorization", format!("Bearer {api_key}"));
+    }
+
+    let resp = request
         .send()
         .await
-        .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?;
+        .map_err(|e| ServerFnError::new(format!("LiteLLM request failed: {e}")))?;
 
     if !resp.status().is_success() {
         let status = resp.status();
         let body = resp.text().await.unwrap_or_default();
         return Err(ServerFnError::new(format!(
-            "Ollama returned {status}: {body}"
+            "LiteLLM returned {status}: {body}"
         )));
     }
 
-    let body: OllamaChatResponse = resp
+    let body: ChatCompletionResponse = resp
         .json()
         .await
-        .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?;
+        .map_err(|e| ServerFnError::new(format!("Failed to parse LiteLLM response: {e}")))?;
 
     body.choices
         .first()
         .map(|choice| choice.message.content.clone())
-        .ok_or_else(|| ServerFnError::new("Empty response from Ollama"))
+        .ok_or_else(|| ServerFnError::new("Empty response from LiteLLM"))
 }
 
 /// A lightweight chat message for the follow-up conversation.
-/// Uses simple String role ("system"/"user"/"assistant") for Ollama compatibility.
+/// Uses simple String role ("system"/"user"/"assistant") for OpenAI compatibility.
 #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
 pub struct FollowUpMessage {
     pub role: String,
     pub content: String,
 }
 
-/// Send a follow-up question about an article using a local Ollama instance.
+/// Send a follow-up question about an article using a LiteLLM proxy.
 ///
 /// Accepts the full conversation history (system context + prior turns) and
 /// returns the assistant's next response. The system message should contain
@@ -267,8 +275,8 @@ pub struct FollowUpMessage {
 /// # Arguments
 ///
 /// * `messages` - The conversation history including system context
-/// * `ollama_url` - Base URL of the Ollama instance
-/// * `model` - The Ollama model ID to use
+/// * `litellm_url` - Base URL of the LiteLLM proxy
+/// * `model` - The model ID to use
 ///
 /// # Returns
 ///
@@ -276,30 +284,32 @@ pub struct FollowUpMessage {
 ///
 /// # Errors
 ///
-/// Returns `ServerFnError` if the Ollama request fails or response parsing fails
+/// Returns `ServerFnError` if the LiteLLM request fails or response parsing fails
 #[post("/api/chat")]
 pub async fn chat_followup(
     messages: Vec<FollowUpMessage>,
-    ollama_url: String,
+    litellm_url: String,
     model: String,
 ) -> Result<String, ServerFnError> {
-    use inner::{ChatMessage, OllamaChatRequest, OllamaChatResponse};
+    use inner::{ChatCompletionRequest, ChatCompletionResponse, ChatMessage};
 
     let state: crate::infrastructure::ServerState =
         dioxus_fullstack::FullstackContext::extract().await?;
 
-    let base_url = if ollama_url.is_empty() {
-        state.services.ollama_url.clone()
+    let base_url = if litellm_url.is_empty() {
+        state.services.litellm_url.clone()
     } else {
-        ollama_url
+        litellm_url
     };
 
     let model = if model.is_empty() {
-        state.services.ollama_model.clone()
+        state.services.litellm_model.clone()
     } else {
         model
     };
 
+    let api_key = state.services.litellm_api_key.clone();
+
     // Convert FollowUpMessage to inner ChatMessage for the request
     let chat_messages: Vec<ChatMessage> = messages
         .into_iter()
@@ -309,7 +319,7 @@ pub async fn chat_followup(
         })
         .collect();
 
-    let request_body = OllamaChatRequest {
+    let request_body = ChatCompletionRequest {
         model,
         stream: false,
         messages: chat_messages,
@@ -317,31 +327,37 @@ pub async fn chat_followup(
 
     let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
     let client = reqwest::Client::new();
-    let resp = client
+    let mut request = client
         .post(&url)
         .header("content-type", "application/json")
-        .json(&request_body)
+        .json(&request_body);
+
+    if !api_key.is_empty() {
+        request = request.header("Authorization", format!("Bearer {api_key}"));
+    }
+
+    let resp = request
         .send()
         .await
-        .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?;
+        .map_err(|e| ServerFnError::new(format!("LiteLLM request failed: {e}")))?;
 
     if !resp.status().is_success() {
         let status = resp.status();
         let body = resp.text().await.unwrap_or_default();
         return Err(ServerFnError::new(format!(
-            "Ollama returned {status}: {body}"
+            "LiteLLM returned {status}: {body}"
         )));
     }
 
-    let body: OllamaChatResponse = resp
+    let body: ChatCompletionResponse = resp
         .json()
         .await
-        .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?;
+        .map_err(|e| ServerFnError::new(format!("Failed to parse LiteLLM response: {e}")))?;
 
     body.choices
         .first()
         .map(|choice| choice.message.content.clone())
-        .ok_or_else(|| ServerFnError::new("Empty response from Ollama"))
+        .ok_or_else(|| ServerFnError::new("Empty response from LiteLLM"))
 }
 
 #[cfg(test)]
diff --git a/src/infrastructure/mod.rs b/src/infrastructure/mod.rs
index c18bf52..cbb1341 100644
--- a/src/infrastructure/mod.rs
+++ b/src/infrastructure/mod.rs
@@ -3,8 +3,8 @@
 pub mod auth_check;
 pub mod chat;
 pub mod langgraph;
+pub mod litellm;
 pub mod llm;
-pub mod ollama;
 pub mod searxng;
 
 // Server-only modules (Axum handlers, state, configs, DB, etc.)
diff --git a/src/infrastructure/ollama.rs b/src/infrastructure/ollama.rs
deleted file mode 100644
index d09b03e..0000000
--- a/src/infrastructure/ollama.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-use dioxus::prelude::*;
-use serde::{Deserialize, Serialize};
-
-/// Status of a local Ollama instance, including connectivity and loaded models.
-///
-/// # Fields
-///
-/// * `online` - Whether the Ollama API responded successfully
-/// * `models` - List of model names currently available on the instance
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct OllamaStatus {
-    pub online: bool,
-    pub models: Vec<String>,
-}
-
-/// Response from Ollama's `GET /api/tags` endpoint.
-#[cfg(feature = "server")]
-#[derive(Deserialize)]
-struct OllamaTagsResponse {
-    models: Vec<OllamaModel>,
-}
-
-/// A single model entry from Ollama's tags API.
-#[cfg(feature = "server")]
-#[derive(Deserialize)]
-struct OllamaModel {
-    name: String,
-}
-
-/// Check the status of a local Ollama instance by querying its tags endpoint.
-///
-/// Calls `GET <ollama_url>/api/tags` to list available models and determine
-/// whether the instance is reachable.
-///
-/// # Arguments
-///
-/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434")
-///
-/// # Returns
-///
-/// An `OllamaStatus` with `online: true` and model names if reachable,
-/// or `online: false` with an empty model list on failure
-///
-/// # Errors
-///
-/// Returns `ServerFnError` only on serialization issues; network failures
-/// are caught and returned as `online: false`
-#[post("/api/ollama-status")]
-pub async fn get_ollama_status(ollama_url: String) -> Result<OllamaStatus, ServerFnError> {
-    let state: crate::infrastructure::ServerState =
-        dioxus_fullstack::FullstackContext::extract().await?;
-
-    let base_url = if ollama_url.is_empty() {
-        state.services.ollama_url.clone()
-    } else {
-        ollama_url
-    };
-
-    let url = format!("{}/api/tags", base_url.trim_end_matches('/'));
-
-    let client = reqwest::Client::builder()
-        .timeout(std::time::Duration::from_secs(5))
-        .build()
-        .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?;
-
-    let resp = match client.get(&url).send().await {
-        Ok(r) if r.status().is_success() => r,
-        _ => {
-            return Ok(OllamaStatus {
-                online: false,
-                models: Vec::new(),
-            });
-        }
-    };
-
-    let body: OllamaTagsResponse = match resp.json().await {
-        Ok(b) => b,
-        Err(_) => {
-            return Ok(OllamaStatus {
-                online: true,
-                models: Vec::new(),
-            });
-        }
-    };
-
-    let models = body.models.into_iter().map(|m| m.name).collect();
-
-    Ok(OllamaStatus {
-        online: true,
-        models,
-    })
-}
diff --git a/src/infrastructure/provider_client.rs b/src/infrastructure/provider_client.rs
index 804eba6..2d05023 100644
--- a/src/infrastructure/provider_client.rs
+++ b/src/infrastructure/provider_client.rs
@@ -1,6 +1,6 @@
 //! Unified LLM provider dispatch.
 //!
-//! Routes chat completion requests to Ollama, OpenAI, Anthropic, or
+//! Routes chat completion requests to LiteLLM, OpenAI, Anthropic, or
 //! HuggingFace based on the session's provider setting. All providers
 //! except Anthropic use the OpenAI-compatible chat completions format.
 
@@ -20,11 +20,11 @@ pub struct ProviderMessage {
 ///
 /// # Arguments
 ///
-/// * `state` - Server state (for default Ollama URL/model)
-/// * `provider` - Provider name (`"ollama"`, `"openai"`, `"anthropic"`, `"huggingface"`)
+/// * `state` - Server state (for default LiteLLM URL/model)
+/// * `provider` - Provider name (`"litellm"`, `"openai"`, `"anthropic"`, `"huggingface"`)
 /// * `model` - Model ID
 /// * `messages` - Conversation history
-/// * `api_key` - API key (required for non-Ollama providers)
+/// * `api_key` - API key (required for non-LiteLLM providers; LiteLLM uses server config)
 /// * `stream` - Whether to request streaming
 ///
 /// # Returns
@@ -123,11 +123,11 @@ pub async fn send_chat_request(
                 .send()
                 .await
         }
-        // Default: Ollama (OpenAI-compatible endpoint)
+        // Default: LiteLLM proxy (OpenAI-compatible endpoint)
         _ => {
-            let base_url = &state.services.ollama_url;
+            let base_url = &state.services.litellm_url;
             let resolved_model = if model.is_empty() {
-                &state.services.ollama_model
+                &state.services.litellm_model
             } else {
                 model
             };
@@ -137,12 +137,15 @@ pub async fn send_chat_request(
                 "messages": messages,
                 "stream": stream,
             });
-            client
+            let litellm_key = &state.services.litellm_api_key;
+            let mut request = client
                 .post(&url)
                 .header("content-type", "application/json")
-                .json(&body)
-                .send()
-                .await
+                .json(&body);
+            if !litellm_key.is_empty() {
+                request = request.header("Authorization", format!("Bearer {litellm_key}"));
+            }
+            request.send().await
         }
     }
 }
diff --git a/src/infrastructure/server_state.rs b/src/infrastructure/server_state.rs
index 2817791..ff45de1 100644
--- a/src/infrastructure/server_state.rs
+++ b/src/infrastructure/server_state.rs
@@ -45,7 +45,7 @@ pub struct ServerStateInner {
     pub keycloak: &'static KeycloakConfig,
     /// Outbound email settings.
     pub smtp: &'static SmtpConfig,
-    /// URLs for Ollama, SearXNG, LangChain, S3, etc.
+    /// URLs for LiteLLM, SearXNG, LangChain, S3, etc.
     pub services: &'static ServiceUrls,
     /// Stripe billing keys.
     pub stripe: &'static StripeConfig,
diff --git a/src/models/chat.rs b/src/models/chat.rs
index aa869de..6ff68d8 100644
--- a/src/models/chat.rs
+++ b/src/models/chat.rs
@@ -60,8 +60,8 @@ pub struct Attachment {
 /// * `user_sub` - Keycloak subject ID (session owner)
 /// * `title` - Display title (auto-generated or user-renamed)
 /// * `namespace` - Grouping for sidebar sections
-/// * `provider` - LLM provider used (e.g. "ollama", "openai")
-/// * `model` - Model ID used (e.g. "llama3.1:8b")
+/// * `provider` - LLM provider used (e.g. "litellm", "openai")
+/// * `model` - Model ID used (e.g. "qwen3-32b")
 /// * `created_at` - ISO 8601 creation timestamp
 /// * `updated_at` - ISO 8601 last-activity timestamp
 /// * `article_url` - Source article URL (for News namespace sessions)
@@ -171,8 +171,8 @@ mod tests {
             user_sub: "user-1".into(),
             title: "Test Chat".into(),
             namespace: ChatNamespace::General,
-            provider: "ollama".into(),
-            model: "llama3.1:8b".into(),
+            provider: "litellm".into(),
+            model: "qwen3-32b".into(),
             created_at: "2025-01-01T00:00:00Z".into(),
             updated_at: "2025-01-01T01:00:00Z".into(),
             article_url: None,
@@ -189,7 +189,7 @@ mod tests {
             "_id": "mongo-id",
             "user_sub": "u1",
             "title": "t",
-            "provider": "ollama",
+            "provider": "litellm",
             "model": "m",
             "created_at": "2025-01-01",
             "updated_at": "2025-01-01"
@@ -205,7 +205,7 @@ mod tests {
             user_sub: "u1".into(),
             title: "t".into(),
             namespace: ChatNamespace::default(),
-            provider: "ollama".into(),
+            provider: "litellm".into(),
             model: "m".into(),
             created_at: "2025-01-01".into(),
             updated_at: "2025-01-01".into(),
@@ -223,7 +223,7 @@ mod tests {
             user_sub: "u1".into(),
             title: "t".into(),
             namespace: ChatNamespace::default(),
-            provider: "ollama".into(),
+            provider: "litellm".into(),
             model: "m".into(),
             created_at: "2025-01-01".into(),
             updated_at: "2025-01-01".into(),
diff --git a/src/models/organization.rs b/src/models/organization.rs
index 0c6745d..a3c0fb7 100644
--- a/src/models/organization.rs
+++ b/src/models/organization.rs
@@ -83,6 +83,42 @@ pub struct BillingUsage {
     pub billing_cycle_end: String,
 }
 
+/// Aggregated token usage statistics from LiteLLM's spend tracking API.
+///
+/// # Fields
+///
+/// * `total_spend` - Total cost in USD across all models
+/// * `total_prompt_tokens` - Sum of prompt (input) tokens
+/// * `total_completion_tokens` - Sum of completion (output) tokens
+/// * `total_tokens` - Sum of all tokens (prompt + completion)
+/// * `model_breakdown` - Per-model usage breakdown
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct LitellmUsageStats {
+    pub total_spend: f64,
+    pub total_prompt_tokens: u64,
+    pub total_completion_tokens: u64,
+    pub total_tokens: u64,
+    pub model_breakdown: Vec<ModelUsage>,
+}
+
+/// Token and spend usage for a single LLM model.
+///
+/// # Fields
+///
+/// * `model` - Model identifier (e.g. "gpt-4", "claude-3-opus")
+/// * `spend` - Cost in USD for this model
+/// * `prompt_tokens` - Prompt (input) tokens consumed
+/// * `completion_tokens` - Completion (output) tokens generated
+/// * `total_tokens` - Total tokens (prompt + completion)
+#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
+pub struct ModelUsage {
+    pub model: String,
+    pub spend: f64,
+    pub prompt_tokens: u64,
+    pub completion_tokens: u64,
+    pub total_tokens: u64,
+}
+
 /// Organisation-level settings stored in MongoDB.
 ///
 /// These complement Keycloak's Organizations feature with
@@ -234,4 +270,82 @@ mod tests {
         assert_eq!(record.seats_used, 0);
         assert_eq!(record.tokens_used, 0);
     }
+
+    #[test]
+    fn litellm_usage_stats_default() {
+        let stats = LitellmUsageStats::default();
+        assert_eq!(stats.total_spend, 0.0);
+        assert_eq!(stats.total_prompt_tokens, 0);
+        assert_eq!(stats.total_completion_tokens, 0);
+        assert_eq!(stats.total_tokens, 0);
+        assert!(stats.model_breakdown.is_empty());
+    }
+
+    #[test]
+    fn litellm_usage_stats_serde_round_trip() {
+        let stats = LitellmUsageStats {
+            total_spend: 12.34,
+            total_prompt_tokens: 50_000,
+            total_completion_tokens: 25_000,
+            total_tokens: 75_000,
+            model_breakdown: vec![
+                ModelUsage {
+                    model: "gpt-4".into(),
+                    spend: 10.0,
+                    prompt_tokens: 40_000,
+                    completion_tokens: 20_000,
+                    total_tokens: 60_000,
+                },
+                ModelUsage {
+                    model: "claude-3-opus".into(),
+                    spend: 2.34,
+                    prompt_tokens: 10_000,
+                    completion_tokens: 5_000,
+                    total_tokens: 15_000,
+                },
+            ],
+        };
+        let json = serde_json::to_string(&stats).expect("serialize LitellmUsageStats");
+        let back: LitellmUsageStats =
+            serde_json::from_str(&json).expect("deserialize LitellmUsageStats");
+        assert_eq!(stats, back);
+    }
+
+    #[test]
+    fn model_usage_default() {
+        let usage = ModelUsage::default();
+        assert_eq!(usage.model, "");
+        assert_eq!(usage.spend, 0.0);
+        assert_eq!(usage.prompt_tokens, 0);
+        assert_eq!(usage.completion_tokens, 0);
+        assert_eq!(usage.total_tokens, 0);
+    }
+
+    #[test]
+    fn model_usage_serde_round_trip() {
+        let usage = ModelUsage {
+            model: "gpt-4-turbo".into(),
+            spend: 5.67,
+            prompt_tokens: 30_000,
+            completion_tokens: 15_000,
+            total_tokens: 45_000,
+        };
+        let json = serde_json::to_string(&usage).expect("serialize ModelUsage");
+        let back: ModelUsage = serde_json::from_str(&json).expect("deserialize ModelUsage");
+        assert_eq!(usage, back);
+    }
+
+    #[test]
+    fn litellm_usage_stats_empty_breakdown_round_trip() {
+        let stats = LitellmUsageStats {
+            total_spend: 0.0,
+            total_prompt_tokens: 0,
+            total_completion_tokens: 0,
+            total_tokens: 0,
+            model_breakdown: Vec::new(),
+        };
+        let json = serde_json::to_string(&stats).expect("serialize empty stats");
+        let back: LitellmUsageStats = serde_json::from_str(&json).expect("deserialize empty stats");
+        assert_eq!(stats, back);
+    }
 }
diff --git a/src/models/provider.rs b/src/models/provider.rs
index 48ee498..b4b68f0 100644
--- a/src/models/provider.rs
+++ b/src/models/provider.rs
@@ -3,8 +3,8 @@ use serde::{Deserialize, Serialize};
 /// Supported LLM provider backends.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub enum LlmProvider {
-    /// Self-hosted models via Ollama
-    Ollama,
+    /// LiteLLM proxy for unified model access
+    LiteLlm,
     /// Hugging Face Inference API
     HuggingFace,
     /// OpenAI-compatible endpoints
@@ -17,7 +17,7 @@ impl LlmProvider {
     /// Returns the display name for a provider.
     pub fn label(&self) -> &'static str {
         match self {
-            Self::Ollama => "Ollama",
+            Self::LiteLlm => "LiteLLM",
             Self::HuggingFace => "Hugging Face",
             Self::OpenAi => "OpenAI",
             Self::Anthropic => "Anthropic",
@@ -29,7 +29,7 @@ impl LlmProvider {
 ///
 /// # Fields
 ///
-/// * `id` - Unique model identifier (e.g. "llama3.1:8b")
+/// * `id` - Unique model identifier (e.g. "qwen3-32b")
 /// * `name` - Human-readable display name
 /// * `provider` - Which provider hosts this model
 /// * `context_window` - Maximum context length in tokens
@@ -79,8 +79,8 @@ mod tests {
     use pretty_assertions::assert_eq;
 
     #[test]
-    fn llm_provider_label_ollama() {
-        assert_eq!(LlmProvider::Ollama.label(), "Ollama");
+    fn llm_provider_label_litellm() {
+        assert_eq!(LlmProvider::LiteLlm.label(), "LiteLLM");
     }
 
     #[test]
@@ -101,7 +101,7 @@ mod tests {
     #[test]
     fn llm_provider_serde_round_trip() {
         for variant in [
-            LlmProvider::Ollama,
+            LlmProvider::LiteLlm,
             LlmProvider::HuggingFace,
             LlmProvider::OpenAi,
             LlmProvider::Anthropic,
@@ -117,10 +117,10 @@ mod tests {
     #[test]
     fn model_entry_serde_round_trip() {
         let entry = ModelEntry {
-            id: "llama3.1:8b".into(),
-            name: "Llama 3.1 8B".into(),
-            provider: LlmProvider::Ollama,
-            context_window: 8192,
+            id: "qwen3-32b".into(),
+            name: "Qwen3 32B".into(),
+            provider: LlmProvider::LiteLlm,
+            context_window: 32,
         };
         let json = serde_json::to_string(&entry).expect("serialize ModelEntry");
         let back: ModelEntry = serde_json::from_str(&json).expect("deserialize ModelEntry");
diff --git a/src/models/user.rs b/src/models/user.rs
index cbab583..4b7b615 100644
--- a/src/models/user.rs
+++ b/src/models/user.rs
@@ -35,12 +35,12 @@ pub struct AuthInfo {
 /// Per-user LLM provider configuration stored in MongoDB.
 ///
 /// Controls which provider and model the user's chat sessions default
-/// to, and stores API keys for non-Ollama providers.
+/// to, and stores API keys for non-LiteLLM providers.
 #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
 pub struct UserProviderConfig {
-    /// Default provider name (e.g. "ollama", "openai")
+    /// Default provider name (e.g. "litellm", "openai")
     pub default_provider: String,
-    /// Default model ID (e.g. "llama3.1:8b", "gpt-4o")
+    /// Default model ID (e.g. "qwen3-32b", "gpt-4o")
     pub default_model: String,
     /// OpenAI API key (empty if not configured)
     #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -51,8 +51,8 @@ pub struct UserProviderConfig {
     /// HuggingFace API key
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub huggingface_api_key: Option<String>,
-    /// Custom Ollama URL override (empty = use server default)
-    pub ollama_url_override: String,
+    /// Custom LiteLLM URL override (empty = use server default)
+    pub litellm_url_override: String,
 }
 
 /// Per-user preferences stored in MongoDB.
@@ -66,10 +66,10 @@ pub struct UserPreferences {
     pub org_id: String,
     /// User-selected news/search topics
     pub custom_topics: Vec<String>,
-    /// Per-user Ollama URL override (empty = use server default)
-    pub ollama_url_override: String,
-    /// Per-user Ollama model override (empty = use server default)
-    pub ollama_model_override: String,
+    /// Per-user LiteLLM URL override (empty = use server default)
+    pub litellm_url_override: String,
+    /// Per-user LiteLLM model override (empty = use server default)
+    pub litellm_model_override: String,
     /// Recently searched queries for quick access
     pub recent_searches: Vec<String>,
     /// LLM provider configuration
@@ -132,12 +132,12 @@ mod tests {
     #[test]
     fn user_provider_config_optional_keys_skip_none() {
         let cfg = UserProviderConfig {
-            default_provider: "ollama".into(),
-            default_model: "llama3.1:8b".into(),
+            default_provider: "litellm".into(),
+            default_model: "qwen3-32b".into(),
             openai_api_key: None,
             anthropic_api_key: None,
             huggingface_api_key: None,
-            ollama_url_override: String::new(),
+            litellm_url_override: String::new(),
         };
         let json = serde_json::to_string(&cfg).expect("serialize UserProviderConfig");
         assert!(!json.contains("openai_api_key"));
@@ -153,7 +153,7 @@ mod tests {
             openai_api_key: Some("sk-test".into()),
             anthropic_api_key: Some("ak-test".into()),
             huggingface_api_key: None,
-            ollama_url_override: "http://custom:11434".into(),
+            litellm_url_override: "http://custom:4000".into(),
         };
         let json = serde_json::to_string(&cfg).expect("serialize");
         let back: UserProviderConfig = serde_json::from_str(&json).expect("deserialize");
diff --git a/src/pages/dashboard.rs b/src/pages/dashboard.rs
index aedfbc4..3145473 100644
--- a/src/pages/dashboard.rs
+++ b/src/pages/dashboard.rs
@@ -25,8 +25,8 @@ const DEFAULT_TOPICS: &[&str] = &[
 ///
 /// State is persisted across sessions using localStorage:
 /// - `certifai_topics`: custom user-defined search topics
-/// - `certifai_ollama_url`: Ollama instance URL for summarization
-/// - `certifai_ollama_model`: Ollama model ID for summarization
+/// - `certifai_litellm_url`: LiteLLM proxy URL for summarization
+/// - `certifai_litellm_model`: LiteLLM model ID for summarization
 #[component]
 pub fn DashboardPage() -> Element {
     let locale = use_context::<Signal<Locale>>();
@@ -34,11 +34,11 @@ pub fn DashboardPage() -> Element {
 
     // Persistent state stored in localStorage
     let mut custom_topics = use_persistent("certifai_topics".to_string(), Vec::<String>::new);
-    // Default to empty so the server functions use OLLAMA_URL / OLLAMA_MODEL
+    // Default to empty so the server functions use LITELLM_URL / LITELLM_MODEL
     // from .env. Only stores a non-empty value when the user explicitly saves
     // an override via the Settings panel.
-    let mut ollama_url = use_persistent("certifai_ollama_url".to_string(), String::new);
-    let mut ollama_model = use_persistent("certifai_ollama_model".to_string(), String::new);
+    let mut litellm_url = use_persistent("certifai_litellm_url".to_string(), String::new);
+    let mut litellm_model = use_persistent("certifai_litellm_model".to_string(), String::new);
 
     // Reactive signals for UI state
     let mut active_topic = use_signal(|| "AI".to_string());
@@ -235,8 +235,8 @@ pub fn DashboardPage() -> Element {
                     onclick: move |_| {
                         let currently_shown = *show_settings.read();
                         if !currently_shown {
-                            settings_url.set(ollama_url.read().clone());
-                            settings_model.set(ollama_model.read().clone());
+                            settings_url.set(litellm_url.read().clone());
+                            settings_model.set(litellm_model.read().clone());
                         }
                         show_settings.set(!currently_shown);
                     },
@@ -247,16 +247,16 @@ pub fn DashboardPage() -> Element {
             // Settings panel (collapsible)
             if *show_settings.read() {
                 div { class: "settings-panel",
-                    h4 { class: "settings-panel-title", "{t(l, \"dashboard.ollama_settings\")}" }
+                    h4 { class: "settings-panel-title", "{t(l, \"dashboard.litellm_settings\")}" }
                     p { class: "settings-hint",
                         "{t(l, \"dashboard.settings_hint\")}"
                     }
                     div { class: "settings-field",
-                        label { "{t(l, \"dashboard.ollama_url\")}" }
+                        label { "{t(l, \"dashboard.litellm_url\")}" }
                         input {
                             class: "settings-input",
                             r#type: "text",
-                            placeholder: "{t(l, \"dashboard.ollama_url_placeholder\")}",
+                            placeholder: "{t(l, \"dashboard.litellm_url_placeholder\")}",
                             value: "{settings_url}",
                             oninput: move |e| settings_url.set(e.value()),
                         }
@@ -274,8 +274,8 @@ pub fn DashboardPage() -> Element {
                     button {
                         class: "btn btn-primary",
                         onclick: move |_| {
-                            *ollama_url.write() = settings_url.read().trim().to_string();
-                            *ollama_model.write() = settings_model.read().trim().to_string();
+                            *litellm_url.write() = settings_url.read().trim().to_string();
+                            *litellm_model.write() = settings_model.read().trim().to_string();
                             show_settings.set(false);
                         },
                         "{t(l, \"common.save\")}"
@@ -320,14 +320,14 @@ pub fn DashboardPage() -> Element {
                                             news_session_id.set(None);
 
 
-                                            let oll_url = ollama_url.read().clone();
-                                            let mdl = ollama_model.read().clone();
+                                            let ll_url = litellm_url.read().clone();
+                                            let mdl = litellm_model.read().clone();
                                             spawn(async move {
                                                 is_summarizing.set(true);
                                                 match crate::infrastructure::llm::summarize_article(
                                                         snippet.clone(),
                                                         article_url,
-                                                        oll_url,
+                                                        ll_url,
                                                         mdl,
                                                     )
                                                     .await
@@ -373,8 +373,8 @@ pub fn DashboardPage() -> Element {
                             chat_messages: chat_messages.read().clone(),
                             is_chatting: *is_chatting.read(),
                             on_chat_send: move |question: String| {
-                                let oll_url = ollama_url.read().clone();
-                                let mdl = ollama_model.read().clone();
+                                let ll_url = litellm_url.read().clone();
+                                let mdl = litellm_model.read().clone();
                                 let ctx = article_context.read().clone();
                                 // Capture article info for News session creation
                                 let card_title = selected_card
@@ -394,7 +394,7 @@ pub fn DashboardPage() -> Element {
                                     content: question.clone(),
                                 });
 
-                                // Build full message history for Ollama
+                                // Build full message history for LiteLLM
                                 let system_msg = format!(
                                     "You are a helpful assistant. The user is reading \
                                      a news article. Use the following context to answer \
@@ -422,7 +422,7 @@ pub fn DashboardPage() -> Element {
                                         match create_chat_session(
                                             card_title,
                                             "News".to_string(),
-                                            "ollama".to_string(),
+                                            "litellm".to_string(),
                                             mdl.clone(),
                                             card_url,
                                         )
@@ -458,7 +458,7 @@ pub fn DashboardPage() -> Element {
                                     }
 
                                     match crate::infrastructure::llm::chat_followup(
-                                        msgs, oll_url, mdl,
+                                        msgs, ll_url, mdl,
                                     )
                                     .await
                                     {
@@ -495,7 +495,7 @@ pub fn DashboardPage() -> Element {
                 // Right: sidebar (when no card selected)
                 if !has_selection {
                     DashboardSidebar {
-                        ollama_url: ollama_url.read().clone(),
+                        litellm_url: litellm_url.read().clone(),
                         trending: trending_topics.clone(),
                         recent_searches: recent_searches.read().clone(),
                         on_topic_click: move |topic: String| {
diff --git a/src/pages/organization/dashboard.rs b/src/pages/organization/dashboard.rs
index a0e369b..716c9a9 100644
--- a/src/pages/organization/dashboard.rs
+++ b/src/pages/organization/dashboard.rs
@@ -2,12 +2,14 @@ use dioxus::prelude::*;
 
 use crate::components::{MemberRow, PageHeader};
 use crate::i18n::{t, tw, Locale};
-use crate::models::{BillingUsage, MemberRole, OrgMember};
+use crate::infrastructure::litellm::get_litellm_usage;
+use crate::models::{BillingUsage, LitellmUsageStats, MemberRole, OrgMember};
 
 /// Organization dashboard with billing stats, member table, and invite modal.
 ///
-/// Shows current billing usage, a table of organization members
-/// with role management, and a button to invite new members.
+/// Shows current billing usage (fetched from LiteLLM), a per-model
+/// breakdown table, a table of organization members with role
+/// management, and a button to invite new members.
 #[component]
 pub fn OrgDashboardPage() -> Element {
     let locale = use_context::<Signal<Locale>>();
@@ -20,6 +22,20 @@ pub fn OrgDashboardPage() -> Element {
 
     let members_list = members.read().clone();
 
+    // Compute date range: 1st of current month to today
+    let (start_date, end_date) = current_month_range();
+
+    // Fetch real usage stats from LiteLLM via server function.
+    // use_resource memoises and won't re-fire on parent re-renders.
+    let usage_resource = use_resource(move || {
+        let start = start_date.clone();
+        let end = end_date.clone();
+        async move { get_litellm_usage(start, end).await }
+    });
+
+    // Clone out of Signal to avoid holding the borrow across rsx!
+    let usage_snapshot = usage_resource.read().clone();
+
     // Format token counts for display
     let tokens_display = format_tokens(usage.tokens_used);
     let tokens_limit_display = format_tokens(usage.tokens_limit);
@@ -30,26 +46,39 @@ pub fn OrgDashboardPage() -> Element {
                 title: t(l, "org.title"),
                 subtitle: t(l, "org.subtitle"),
                 actions: rsx! {
-                    button { class: "btn-primary", onclick: move |_| show_invite.set(true), {t(l, "org.invite_member")} }
+                    button {
+                        class: "btn-primary",
+                        onclick: move |_| show_invite.set(true),
+                        {t(l, "org.invite_member")}
+                    }
                 },
             }
 
             // Stats bar
             div { class: "org-stats-bar",
                 div { class: "org-stat",
-                    span { class: "org-stat-value", "{usage.seats_used}/{usage.seats_total}" }
+                    span { class: "org-stat-value",
+                        "{usage.seats_used}/{usage.seats_total}"
+                    }
                     span { class: "org-stat-label", {t(l, "org.seats_used")} }
                 }
                 div { class: "org-stat",
                     span { class: "org-stat-value", "{tokens_display}" }
-                    span { class: "org-stat-label", {tw(l, "org.of_tokens", &[("limit", &tokens_limit_display)])} }
+                    span { class: "org-stat-label",
+                        {tw(l, "org.of_tokens", &[("limit", &tokens_limit_display)])}
+                    }
                 }
                 div { class: "org-stat",
-                    span { class: "org-stat-value", "{usage.billing_cycle_end}" }
+                    span { class: "org-stat-value",
+                        "{usage.billing_cycle_end}"
+                    }
                     span { class: "org-stat-label", {t(l, "org.cycle_ends")} }
                 }
             }
 
+            // LiteLLM usage stats section
+            {render_usage_section(l, &usage_snapshot)}
+
             // Members table
             div { class: "org-table-wrapper",
                 table { class: "org-table",
@@ -114,6 +143,144 @@ pub fn OrgDashboardPage() -> Element {
     }
 }
 
+/// Render the LiteLLM usage stats section: totals bar + per-model table.
+///
+/// Shows a loading state while the resource is pending, an error/empty
+/// message on failure, and the full breakdown on success.
+fn render_usage_section(
+    l: Locale,
+    snapshot: &Option<Result<LitellmUsageStats, ServerFnError>>,
+) -> Element {
+    match snapshot {
+        None => rsx! {
+            div { class: "org-usage-loading",
+                span { {t(l, "org.loading_usage")} }
+            }
+        },
+        Some(Err(_)) => rsx! {
+            div { class: "org-usage-unavailable",
+                span { {t(l, "org.usage_unavailable")} }
+            }
+        },
+        Some(Ok(stats)) if stats.total_tokens == 0 && stats.model_breakdown.is_empty() => {
+            rsx! {
+                div { class: "org-usage-unavailable",
+                    span { {t(l, "org.usage_unavailable")} }
+                }
+            }
+        }
+        Some(Ok(stats)) => {
+            let spend_display = format!("${:.2}", stats.total_spend);
+            let total_display = format_tokens(stats.total_tokens);
+            // Free-tier LiteLLM doesn't provide prompt/completion split
+            let has_token_split =
+                stats.total_prompt_tokens > 0 || stats.total_completion_tokens > 0;
+
+            rsx! {
+                // Usage totals bar
+                div { class: "org-stats-bar",
+                    div { class: "org-stat",
+                        span { class: "org-stat-value", "{spend_display}" }
+                        span { class: "org-stat-label",
+                            {t(l, "org.total_spend")}
+                        }
+                    }
+                    div { class: "org-stat",
+                        span { class: "org-stat-value",
+                            "{total_display}"
+                        }
+                        span { class: "org-stat-label",
+                            {t(l, "org.total_tokens")}
+                        }
+                    }
+                    // Only show prompt/completion split when available
+                    if has_token_split {
+                        div { class: "org-stat",
+                            span { class: "org-stat-value",
+                                {format_tokens(stats.total_prompt_tokens)}
+                            }
+                            span { class: "org-stat-label",
+                                {t(l, "org.prompt_tokens")}
+                            }
+                        }
+                        div { class: "org-stat",
+                            span { class: "org-stat-value",
+                                {format_tokens(stats.total_completion_tokens)}
+                            }
+                            span { class: "org-stat-label",
+                                {t(l, "org.completion_tokens")}
+                            }
+                        }
+                    }
+                }
+
+                // Per-model breakdown table
+                if !stats.model_breakdown.is_empty() {
+                    h3 { class: "org-section-title",
+                        {t(l, "org.model_usage")}
+                    }
+                    div { class: "org-table-wrapper",
+                        table { class: "org-table",
+                            thead {
+                                tr {
+                                    th { {t(l, "org.model")} }
+                                    th { {t(l, "org.tokens")} }
+                                    th { {t(l, "org.spend")} }
+                                }
+                            }
+                            tbody {
+                                for model in &stats.model_breakdown {
+                                    tr { key: "{model.model}",
+                                        td { "{model.model}" }
+                                        td {
+                                            {format_tokens(model.total_tokens)}
+                                        }
+                                        td {
+                                            {format!(
+                                                "${:.2}", model.spend
+                                            )}
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Compute the date range for the current billing month.
+///
+/// Returns `(start_date, end_date)` as `YYYY-MM-DD` strings where
+/// start_date is the 1st of the current month and end_date is today.
+///
+/// On the web target this uses `js_sys::Date` to read the browser clock.
+/// On the server target (SSR) it falls back to `chrono::Utc::now()`.
+fn current_month_range() -> (String, String) {
+    #[cfg(feature = "web")]
+    {
+        // js_sys::Date accesses the browser's local clock in WASM.
+        let now = js_sys::Date::new_0();
+        let year = now.get_full_year();
+        // JS months are 0-indexed, so add 1 for calendar month
+        let month = now.get_month() + 1;
+        let day = now.get_date();
+        let start = format!("{year:04}-{month:02}-01");
+        let end = format!("{year:04}-{month:02}-{day:02}");
+        (start, end)
+    }
+    #[cfg(not(feature = "web"))]
+    {
+        use chrono::Datelike;
+        let today = chrono::Utc::now().date_naive();
+        let start = format!("{:04}-{:02}-01", today.year(), today.month());
+        let end = today.format("%Y-%m-%d").to_string();
+        (start, end)
+    }
+}
+
 /// Formats a token count into a human-readable string (e.g. "1.2M").
 fn format_tokens(count: u64) -> String {
     const M: u64 = 1_000_000;
diff --git a/src/pages/providers.rs b/src/pages/providers.rs
index 9a6e039..b13fa96 100644
--- a/src/pages/providers.rs
+++ b/src/pages/providers.rs
@@ -13,8 +13,8 @@ pub fn ProvidersPage() -> Element {
     let locale = use_context::<Signal<Locale>>();
     let l = *locale.read();
 
-    let mut selected_provider = use_signal(|| LlmProvider::Ollama);
-    let mut selected_model = use_signal(|| "llama3.1:8b".to_string());
+    let mut selected_provider = use_signal(|| LlmProvider::LiteLlm);
+    let mut selected_model = use_signal(|| "qwen3-32b".to_string());
     let mut selected_embedding = use_signal(|| "nomic-embed-text".to_string());
     let mut api_key = use_signal(String::new);
     let mut saved = use_signal(|| false);
@@ -59,12 +59,12 @@ pub fn ProvidersPage() -> Element {
                                     "Hugging Face" => LlmProvider::HuggingFace,
                                     "OpenAI" => LlmProvider::OpenAi,
                                     "Anthropic" => LlmProvider::Anthropic,
-                                    _ => LlmProvider::Ollama,
+                                    _ => LlmProvider::LiteLlm,
                                 };
                                 selected_provider.set(prov);
                                 saved.set(false);
                             },
-                            option { value: "Ollama", "Ollama" }
+                            option { value: "LiteLLM", "LiteLLM" }
                             option { value: "Hugging Face", "Hugging Face" }
                             option { value: "OpenAI", "OpenAI" }
                             option { value: "Anthropic", "Anthropic" }
@@ -156,23 +156,29 @@ pub fn ProvidersPage() -> Element {
 fn mock_models() -> Vec<ModelEntry> {
     vec![
         ModelEntry {
-            id: "llama3.1:8b".into(),
-            name: "Llama 3.1 8B".into(),
-            provider: LlmProvider::Ollama,
-            context_window: 128,
-        },
-        ModelEntry {
-            id: "llama3.1:70b".into(),
-            name: "Llama 3.1 70B".into(),
-            provider: LlmProvider::Ollama,
-            context_window: 128,
-        },
-        ModelEntry {
-            id: "mistral:7b".into(),
-            name: "Mistral 7B".into(),
-            provider: LlmProvider::Ollama,
+            id: "qwen3-32b".into(),
+            name: "Qwen3 32B".into(),
+            provider: LlmProvider::LiteLlm,
             context_window: 32,
         },
+        ModelEntry {
+            id: "llama-3.3-70b".into(),
+            name: "Llama 3.3 70B".into(),
+            provider: LlmProvider::LiteLlm,
+            context_window: 128,
+        },
+        ModelEntry {
+            id: "mistral-small-24b".into(),
+            name: "Mistral Small 24B".into(),
+            provider: LlmProvider::LiteLlm,
+            context_window: 32,
+        },
+        ModelEntry {
+            id: "deepseek-r1-70b".into(),
+            name: "DeepSeek R1 70B".into(),
+            provider: LlmProvider::LiteLlm,
+            context_window: 64,
+        },
         ModelEntry {
             id: "meta-llama/Llama-3.1-8B".into(),
             name: "Llama 3.1 8B".into(),
@@ -200,7 +206,7 @@ fn mock_embeddings() -> Vec<EmbeddingEntry> {
         EmbeddingEntry {
             id: "nomic-embed-text".into(),
             name: "Nomic Embed Text".into(),
-            provider: LlmProvider::Ollama,
+            provider: LlmProvider::LiteLlm,
             dimensions: 768,
         },
         EmbeddingEntry {