From fe4f8e84ae517c40afd00a84c3f1b1cbd6558efe Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar Date: Thu, 26 Feb 2026 17:52:47 +0000 Subject: [PATCH] feat: replaced ollama with litellm (#18) Co-authored-by: Sharang Parnerkar Reviewed-on: https://gitea.meghsakha.com/sharang/certifai/pulls/18 --- .env.example | 9 +- Cargo.lock | 1 + Cargo.toml | 3 +- assets/i18n/de.json | 22 +- assets/i18n/en.json | 22 +- assets/i18n/es.json | 22 +- assets/i18n/fr.json | 22 +- assets/i18n/pt.json | 22 +- assets/tailwind.css | 245 ++++++---------- docker-compose.yml | 3 +- librechat/librechat.yaml | 18 +- src/components/dashboard_sidebar.rs | 22 +- src/components/news_card.rs | 12 +- src/infrastructure/chat.rs | 56 ++-- src/infrastructure/config.rs | 57 ++-- src/infrastructure/litellm.rs | 403 ++++++++++++++++++++++++++ src/infrastructure/llm.rs | 102 ++++--- src/infrastructure/mod.rs | 2 +- src/infrastructure/ollama.rs | 92 ------ src/infrastructure/provider_client.rs | 25 +- src/infrastructure/server_state.rs | 2 +- src/models/chat.rs | 14 +- src/models/organization.rs | 114 ++++++++ src/models/provider.rs | 22 +- src/models/user.rs | 26 +- src/pages/dashboard.rs | 42 +-- src/pages/organization/dashboard.rs | 181 +++++++++++- src/pages/providers.rs | 46 +-- 28 files changed, 1107 insertions(+), 500 deletions(-) create mode 100644 src/infrastructure/litellm.rs delete mode 100644 src/infrastructure/ollama.rs diff --git a/.env.example b/.env.example index f5a8f34..a401835 100644 --- a/.env.example +++ b/.env.example @@ -34,10 +34,11 @@ MONGODB_DATABASE=certifai SEARXNG_URL=http://localhost:8888 # --------------------------------------------------------------------------- -# Ollama LLM instance [OPTIONAL - defaults shown] +# LiteLLM proxy [OPTIONAL - defaults shown] # --------------------------------------------------------------------------- -OLLAMA_URL=http://localhost:11434 -OLLAMA_MODEL=llama3.1:8b +LITELLM_URL=http://localhost:4000 +LITELLM_MODEL=qwen3-32b +LITELLM_API_KEY= # --------------------------------------------------------------------------- # LibreChat (external chat via SSO) [OPTIONAL - default: http://localhost:3080] @@ -47,7 +48,7 @@ LIBRECHAT_URL=http://localhost:3080 # --------------------------------------------------------------------------- # LLM Providers (comma-separated list) [OPTIONAL] # --------------------------------------------------------------------------- -LLM_PROVIDERS=ollama +LLM_PROVIDERS=litellm # --------------------------------------------------------------------------- # SMTP (transactional email) [OPTIONAL] diff --git a/Cargo.lock b/Cargo.lock index 9e9ff42..e7f4119 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -773,6 +773,7 @@ dependencies = [ "dioxus-sdk", "dotenvy", "futures", + "js-sys", "maud", "mongodb", "petname", diff --git a/Cargo.toml b/Cargo.toml index 9de0a77..5a126a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,7 @@ secrecy = { version = "0.10", default-features = false, optional = true } serde_json = { version = "1.0.133", default-features = false } maud = { version = "0.27", default-features = false } url = { version = "2.5.4", default-features = false, optional = true } +js-sys = { version = "0.3", optional = true } wasm-bindgen = { version = "0.2", optional = true } web-sys = { version = "0.3", optional = true, features = [ "Clipboard", @@ -91,7 +92,7 @@ bytes = { version = "1", optional = true } [features] # default = ["web"] -web = ["dioxus/web", "dep:reqwest", "dep:web-sys", "dep:wasm-bindgen"] +web = ["dioxus/web", "dep:reqwest", "dep:web-sys", "dep:wasm-bindgen", "dep:js-sys"] server = [ "dioxus/server", "dep:axum", diff --git a/assets/i18n/de.json b/assets/i18n/de.json index dade857..4f985ff 100644 --- a/assets/i18n/de.json +++ b/assets/i18n/de.json @@ -58,15 +58,15 @@ "title": "Dashboard", "subtitle": "KI-Nachrichten und Neuigkeiten", "topic_placeholder": "Themenname...", - "ollama_settings": "Ollama-Einstellungen", - "settings_hint": "Leer lassen, um OLLAMA_URL / OLLAMA_MODEL aus .env zu verwenden", - "ollama_url": "Ollama-URL", - "ollama_url_placeholder": "Verwendet OLLAMA_URL aus .env", + "litellm_settings": "LiteLLM-Einstellungen", + "settings_hint": "Leer lassen, um LITELLM_URL / LITELLM_MODEL aus .env zu verwenden", + "litellm_url": "LiteLLM-URL", + "litellm_url_placeholder": "Verwendet LITELLM_URL aus .env", "model": "Modell", - "model_placeholder": "Verwendet OLLAMA_MODEL aus .env", + "model_placeholder": "Verwendet LITELLM_MODEL aus .env", "searching": "Suche laeuft...", "search_failed": "Suche fehlgeschlagen: {e}", - "ollama_status": "Ollama-Status", + "litellm_status": "LiteLLM-Status", "trending": "Im Trend", "recent_searches": "Letzte Suchen" }, @@ -144,6 +144,16 @@ "email_address": "E-Mail-Adresse", "email_placeholder": "kollege@firma.de", "send_invite": "Einladung senden", + "total_spend": "Gesamtausgaben", + "total_tokens": "Tokens gesamt", + "model_usage": "Nutzung nach Modell", + "model": "Modell", + "tokens": "Tokens", + "spend": "Ausgaben", + "usage_unavailable": "Nutzungsdaten nicht verfuegbar", + "loading_usage": "Nutzungsdaten werden geladen...", + "prompt_tokens": "Prompt-Tokens", + "completion_tokens": "Antwort-Tokens", "pricing_title": "Preise", "pricing_subtitle": "Waehlen Sie den passenden Plan fuer Ihre Organisation" }, diff --git a/assets/i18n/en.json b/assets/i18n/en.json index 666890e..038ca71 100644 --- a/assets/i18n/en.json +++ b/assets/i18n/en.json @@ -58,15 +58,15 @@ "title": "Dashboard", "subtitle": "AI news and updates", "topic_placeholder": "Topic name...", - "ollama_settings": "Ollama Settings", - "settings_hint": "Leave empty to use OLLAMA_URL / OLLAMA_MODEL from .env", - "ollama_url": "Ollama URL", - "ollama_url_placeholder": "Uses OLLAMA_URL from .env", + "litellm_settings": "LiteLLM Settings", + "settings_hint": "Leave empty to use LITELLM_URL / LITELLM_MODEL from .env", + "litellm_url": "LiteLLM URL", + "litellm_url_placeholder": "Uses LITELLM_URL from .env", "model": "Model", - "model_placeholder": "Uses OLLAMA_MODEL from .env", + "model_placeholder": "Uses LITELLM_MODEL from .env", "searching": "Searching...", "search_failed": "Search failed: {e}", - "ollama_status": "Ollama Status", + "litellm_status": "LiteLLM Status", "trending": "Trending", "recent_searches": "Recent Searches" }, @@ -144,6 +144,16 @@ "email_address": "Email Address", "email_placeholder": "colleague@company.com", "send_invite": "Send Invite", + "total_spend": "Total Spend", + "total_tokens": "Total Tokens", + "model_usage": "Usage by Model", + "model": "Model", + "tokens": "Tokens", + "spend": "Spend", + "usage_unavailable": "Usage data unavailable", + "loading_usage": "Loading usage data...", + "prompt_tokens": "Prompt Tokens", + "completion_tokens": "Completion Tokens", "pricing_title": "Pricing", "pricing_subtitle": "Choose the plan that fits your organization" }, diff --git a/assets/i18n/es.json b/assets/i18n/es.json index ae356e9..07187e1 100644 --- a/assets/i18n/es.json +++ b/assets/i18n/es.json @@ -58,15 +58,15 @@ "title": "Panel de control", "subtitle": "Noticias y actualizaciones de IA", "topic_placeholder": "Nombre del tema...", - "ollama_settings": "Configuracion de Ollama", - "settings_hint": "Dejar vacio para usar OLLAMA_URL / OLLAMA_MODEL del archivo .env", - "ollama_url": "URL de Ollama", - "ollama_url_placeholder": "Usa OLLAMA_URL del archivo .env", + "litellm_settings": "Configuracion de LiteLLM", + "settings_hint": "Dejar vacio para usar LITELLM_URL / LITELLM_MODEL del archivo .env", + "litellm_url": "URL de LiteLLM", + "litellm_url_placeholder": "Usa LITELLM_URL del archivo .env", "model": "Modelo", - "model_placeholder": "Usa OLLAMA_MODEL del archivo .env", + "model_placeholder": "Usa LITELLM_MODEL del archivo .env", "searching": "Buscando...", "search_failed": "La busqueda fallo: {e}", - "ollama_status": "Estado de Ollama", + "litellm_status": "Estado de LiteLLM", "trending": "Tendencias", "recent_searches": "Busquedas recientes" }, @@ -144,6 +144,16 @@ "email_address": "Direccion de correo electronico", "email_placeholder": "colega@empresa.com", "send_invite": "Enviar invitacion", + "total_spend": "Gasto total", + "total_tokens": "Tokens totales", + "model_usage": "Uso por modelo", + "model": "Modelo", + "tokens": "Tokens", + "spend": "Gasto", + "usage_unavailable": "Datos de uso no disponibles", + "loading_usage": "Cargando datos de uso...", + "prompt_tokens": "Tokens de entrada", + "completion_tokens": "Tokens de respuesta", "pricing_title": "Precios", "pricing_subtitle": "Elija el plan que se adapte a su organizacion" }, diff --git a/assets/i18n/fr.json b/assets/i18n/fr.json index 3c134a4..f58f9db 100644 --- a/assets/i18n/fr.json +++ b/assets/i18n/fr.json @@ -58,15 +58,15 @@ "title": "Tableau de bord", "subtitle": "Actualites et mises a jour IA", "topic_placeholder": "Nom du sujet...", - "ollama_settings": "Parametres Ollama", - "settings_hint": "Laissez vide pour utiliser OLLAMA_URL / OLLAMA_MODEL du fichier .env", - "ollama_url": "URL Ollama", - "ollama_url_placeholder": "Utilise OLLAMA_URL du fichier .env", + "litellm_settings": "Parametres LiteLLM", + "settings_hint": "Laissez vide pour utiliser LITELLM_URL / LITELLM_MODEL du fichier .env", + "litellm_url": "URL LiteLLM", + "litellm_url_placeholder": "Utilise LITELLM_URL du fichier .env", "model": "Modele", - "model_placeholder": "Utilise OLLAMA_MODEL du fichier .env", + "model_placeholder": "Utilise LITELLM_MODEL du fichier .env", "searching": "Recherche en cours...", "search_failed": "Echec de la recherche : {e}", - "ollama_status": "Statut Ollama", + "litellm_status": "Statut LiteLLM", "trending": "Tendances", "recent_searches": "Recherches recentes" }, @@ -144,6 +144,16 @@ "email_address": "Adresse e-mail", "email_placeholder": "collegue@entreprise.com", "send_invite": "Envoyer l'invitation", + "total_spend": "Depenses totales", + "total_tokens": "Tokens totaux", + "model_usage": "Utilisation par modele", + "model": "Modele", + "tokens": "Tokens", + "spend": "Depenses", + "usage_unavailable": "Donnees d'utilisation indisponibles", + "loading_usage": "Chargement des donnees d'utilisation...", + "prompt_tokens": "Tokens d'entree", + "completion_tokens": "Tokens de reponse", "pricing_title": "Tarifs", "pricing_subtitle": "Choisissez le plan adapte a votre organisation" }, diff --git a/assets/i18n/pt.json b/assets/i18n/pt.json index 5eeb480..d930bf9 100644 --- a/assets/i18n/pt.json +++ b/assets/i18n/pt.json @@ -58,15 +58,15 @@ "title": "Painel", "subtitle": "Noticias e atualizacoes de IA", "topic_placeholder": "Nome do topico...", - "ollama_settings": "Definicoes do Ollama", - "settings_hint": "Deixe vazio para usar OLLAMA_URL / OLLAMA_MODEL do .env", - "ollama_url": "URL do Ollama", - "ollama_url_placeholder": "Utiliza OLLAMA_URL do .env", + "litellm_settings": "Definicoes do LiteLLM", + "settings_hint": "Deixe vazio para usar LITELLM_URL / LITELLM_MODEL do .env", + "litellm_url": "URL do LiteLLM", + "litellm_url_placeholder": "Utiliza LITELLM_URL do .env", "model": "Modelo", - "model_placeholder": "Utiliza OLLAMA_MODEL do .env", + "model_placeholder": "Utiliza LITELLM_MODEL do .env", "searching": "A pesquisar...", "search_failed": "A pesquisa falhou: {e}", - "ollama_status": "Estado do Ollama", + "litellm_status": "Estado do LiteLLM", "trending": "Em destaque", "recent_searches": "Pesquisas recentes" }, @@ -144,6 +144,16 @@ "email_address": "Endereco de Email", "email_placeholder": "colleague@company.com", "send_invite": "Enviar Convite", + "total_spend": "Gasto total", + "total_tokens": "Tokens totais", + "model_usage": "Uso por modelo", + "model": "Modelo", + "tokens": "Tokens", + "spend": "Gasto", + "usage_unavailable": "Dados de uso indisponiveis", + "loading_usage": "Carregando dados de uso...", + "prompt_tokens": "Tokens de entrada", + "completion_tokens": "Tokens de resposta", "pricing_title": "Precos", "pricing_subtitle": "Escolha o plano adequado a sua organizacao" }, diff --git a/assets/tailwind.css b/assets/tailwind.css index c7c9fdd..89ec9cf 100644 --- a/assets/tailwind.css +++ b/assets/tailwind.css @@ -1,4 +1,4 @@ -/*! tailwindcss v4.2.0 | MIT License | https://tailwindcss.com */ +/*! tailwindcss v4.2.1 | MIT License | https://tailwindcss.com */ @layer properties; @layer theme, base, components, utilities; @layer theme { @@ -162,59 +162,6 @@ } } @layer utilities { - .diff { - @layer daisyui.l1.l2.l3 { - position: relative; - display: grid; - width: 100%; - overflow: hidden; - webkit-user-select: none; - user-select: none; - grid-template-rows: 1fr 1.8rem 1fr; - direction: ltr; - container-type: inline-size; - grid-template-columns: auto 1fr; - &:focus-visible, &:has(.diff-item-1:focus-visible) { - outline-style: var(--tw-outline-style); - outline-width: 2px; - outline-offset: 1px; - outline-color: var(--color-base-content); - } - &:focus-visible { - outline-style: var(--tw-outline-style); - outline-width: 2px; - outline-offset: 1px; - outline-color: var(--color-base-content); - .diff-resizer { - min-width: 95cqi; - max-width: 95cqi; - } - } - &:has(.diff-item-1:focus-visible) { - outline-style: var(--tw-outline-style); - outline-width: 2px; - outline-offset: 1px; - .diff-resizer { - min-width: 5cqi; - max-width: 5cqi; - } - } - @supports (-webkit-overflow-scrolling: touch) and (overflow: -webkit-paged-x) { - &:focus { - .diff-resizer { - min-width: 5cqi; - max-width: 5cqi; - } - } - &:has(.diff-item-1:focus) { - .diff-resizer { - min-width: 95cqi; - max-width: 95cqi; - } - } - } - } - } .modal { @layer daisyui.l1.l2.l3 { pointer-events: none; @@ -1110,31 +1057,98 @@ } } } - .chat-bubble { + .range { @layer daisyui.l1.l2.l3 { - position: relative; - display: block; - width: fit-content; - border-radius: var(--radius-field); - background-color: var(--color-base-300); - padding-inline: calc(0.25rem * 4); - padding-block: calc(0.25rem * 2); - color: var(--color-base-content); - grid-row-end: 3; - min-height: 2rem; - min-width: 2.5rem; - max-width: 90%; - &:before { - position: absolute; - bottom: calc(0.25rem * 0); - height: calc(0.25rem * 3); - width: calc(0.25rem * 3); - background-color: inherit; - content: ""; - mask-repeat: no-repeat; - mask-image: var(--mask-chat); - mask-position: 0px -1px; - mask-size: 0.8125rem; + appearance: none; + webkit-appearance: none; + --range-thumb: var(--color-base-100); + --range-thumb-size: calc(var(--size-selector, 0.25rem) * 6); + --range-progress: currentColor; + --range-fill: 1; + --range-p: 0.25rem; + --range-bg: currentColor; + @supports (color: color-mix(in lab, red, red)) { + --range-bg: color-mix(in oklab, currentColor 10%, #0000); + } + cursor: pointer; + overflow: hidden; + background-color: transparent; + vertical-align: middle; + width: clamp(3rem, 20rem, 100%); + --radius-selector-max: calc( + var(--radius-selector) + var(--radius-selector) + var(--radius-selector) + ); + border-radius: calc(var(--radius-selector) + min(var(--range-p), var(--radius-selector-max))); + border: none; + height: var(--range-thumb-size); + [dir="rtl"] & { + --range-dir: -1; + } + &:focus { + outline: none; + } + &:focus-visible { + outline: 2px solid; + outline-offset: 2px; + } + &::-webkit-slider-runnable-track { + width: 100%; + background-color: var(--range-bg); + border-radius: var(--radius-selector); + height: calc(var(--range-thumb-size) * 0.5); + } + @media (forced-colors: active) { + &::-webkit-slider-runnable-track { + border: 1px solid; + } + } + @media (forced-colors: active) { + &::-moz-range-track { + border: 1px solid; + } + } + &::-webkit-slider-thumb { + position: relative; + box-sizing: border-box; + border-radius: calc(var(--radius-selector) + min(var(--range-p), var(--radius-selector-max))); + background-color: var(--range-thumb); + height: var(--range-thumb-size); + width: var(--range-thumb-size); + border: var(--range-p) solid; + appearance: none; + webkit-appearance: none; + top: 50%; + color: var(--range-progress); + transform: translateY(-50%); + box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px currentColor, 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill)); + @supports (color: color-mix(in lab, red, red)) { + box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px color-mix(in oklab, currentColor calc(var(--depth) * 10%), #0000), 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill)); + } + } + &::-moz-range-track { + width: 100%; + background-color: var(--range-bg); + border-radius: var(--radius-selector); + height: calc(var(--range-thumb-size) * 0.5); + } + &::-moz-range-thumb { + position: relative; + box-sizing: border-box; + border-radius: calc(var(--radius-selector) + min(var(--range-p), var(--radius-selector-max))); + background-color: currentColor; + height: var(--range-thumb-size); + width: var(--range-thumb-size); + border: var(--range-p) solid; + top: 50%; + color: var(--range-progress); + box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px currentColor, 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill)); + @supports (color: color-mix(in lab, red, red)) { + box-shadow: 0 -1px oklch(0% 0 0 / calc(var(--depth) * 0.1)) inset, 0 8px 0 -4px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset, 0 1px color-mix(in oklab, currentColor calc(var(--depth) * 10%), #0000), 0 0 0 2rem var(--range-thumb) inset, calc((var(--range-dir, 1) * -100cqw) - (var(--range-dir, 1) * var(--range-thumb-size) / 2)) 0 0 calc(100cqw * var(--range-fill)); + } + } + &:disabled { + cursor: not-allowed; + opacity: 30%; } } } @@ -1525,81 +1539,6 @@ padding: calc(0.25rem * 4); } } - .textarea { - @layer daisyui.l1.l2.l3 { - border: var(--border) solid #0000; - min-height: calc(0.25rem * 20); - flex-shrink: 1; - appearance: none; - border-radius: var(--radius-field); - background-color: var(--color-base-100); - padding-block: calc(0.25rem * 2); - vertical-align: middle; - width: clamp(3rem, 20rem, 100%); - padding-inline-start: 0.75rem; - padding-inline-end: 0.75rem; - font-size: max(var(--font-size, 0.875rem), 0.875rem); - touch-action: manipulation; - border-color: var(--input-color); - box-shadow: 0 1px var(--input-color) inset, 0 -1px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset; - @supports (color: color-mix(in lab, red, red)) { - box-shadow: 0 1px color-mix(in oklab, var(--input-color) calc(var(--depth) * 10%), #0000) inset, 0 -1px oklch(100% 0 0 / calc(var(--depth) * 0.1)) inset; - } - --input-color: var(--color-base-content); - @supports (color: color-mix(in lab, red, red)) { - --input-color: color-mix(in oklab, var(--color-base-content) 20%, #0000); - } - textarea { - appearance: none; - background-color: transparent; - border: none; - &:focus, &:focus-within { - --tw-outline-style: none; - outline-style: none; - @media (forced-colors: active) { - outline: 2px solid transparent; - outline-offset: 2px; - } - } - } - &:focus, &:focus-within { - --input-color: var(--color-base-content); - box-shadow: 0 1px var(--input-color); - @supports (color: color-mix(in lab, red, red)) { - box-shadow: 0 1px color-mix(in oklab, var(--input-color) calc(var(--depth) * 10%), #0000); - } - outline: 2px solid var(--input-color); - outline-offset: 2px; - isolation: isolate; - } - @media (pointer: coarse) { - @supports (-webkit-touch-callout: none) { - &:focus, &:focus-within { - --font-size: 1rem; - } - } - } - &:has(> textarea[disabled]), &:is(:disabled, [disabled]) { - cursor: not-allowed; - border-color: var(--color-base-200); - background-color: var(--color-base-200); - color: var(--color-base-content); - @supports (color: color-mix(in lab, red, red)) { - color: color-mix(in oklab, var(--color-base-content) 40%, transparent); - } - &::placeholder { - color: var(--color-base-content); - @supports (color: color-mix(in lab, red, red)) { - color: color-mix(in oklab, var(--color-base-content) 20%, transparent); - } - } - box-shadow: none; - } - &:has(> textarea[disabled]) > textarea[disabled] { - cursor: not-allowed; - } - } - } .stack { @layer daisyui.l1.l2.l3 { display: inline-grid; diff --git a/docker-compose.yml b/docker-compose.yml index 3f7b1e3..58fafe9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -55,6 +55,8 @@ services: mongo: condition: service_started environment: + # LiteLLM API key (used by librechat.yaml endpoint config) + LITELLM_API_KEY: ${LITELLM_API_KEY:-} # MongoDB (use localhost since we're on host network) MONGO_URI: mongodb://root:example@localhost:27017/librechat?authSource=admin DOMAIN_CLIENT: http://localhost:3080 @@ -70,7 +72,6 @@ services: OPENID_CALLBACK_URL: /oauth/openid/callback OPENID_SCOPE: openid profile email OPENID_BUTTON_LABEL: Login with CERTifAI - OPENID_AUTH_EXTRA_PARAMS: prompt=none # Disable local auth (SSO only) ALLOW_EMAIL_LOGIN: "false" ALLOW_REGISTRATION: "false" diff --git a/librechat/librechat.yaml b/librechat/librechat.yaml index 7ba5233..8c09fc5 100644 --- a/librechat/librechat.yaml +++ b/librechat/librechat.yaml @@ -1,5 +1,5 @@ # CERTifAI LibreChat Configuration -# Ollama backend for self-hosted LLM inference. +# LiteLLM proxy for unified multi-provider LLM access. version: 1.2.8 cache: true @@ -19,22 +19,16 @@ interface: endpoints: custom: - - name: "Ollama" - apiKey: "ollama" - baseURL: "https://mac-mini-von-benjamin-2:11434/v1/" + - name: "LiteLLM" + apiKey: "${LITELLM_API_KEY}" + baseURL: "https://llm-dev.meghsakha.com/v1/" models: default: - - "llama3.1:8b" - - "qwen3:30b-a3b" + - "Qwen3-Coder-30B-A3B-Instruct" fetch: true titleConvo: true titleModel: "current_model" summarize: false summaryModel: "current_model" forcePrompt: false - modelDisplayLabel: "CERTifAI Ollama" - dropParams: - - stop - - user - - frequency_penalty - - presence_penalty + modelDisplayLabel: "CERTifAI LiteLLM" diff --git a/src/components/dashboard_sidebar.rs b/src/components/dashboard_sidebar.rs index 0623dfb..d89ab56 100644 --- a/src/components/dashboard_sidebar.rs +++ b/src/components/dashboard_sidebar.rs @@ -1,9 +1,9 @@ use dioxus::prelude::*; use crate::i18n::{t, Locale}; -use crate::infrastructure::ollama::{get_ollama_status, OllamaStatus}; +use crate::infrastructure::litellm::{get_litellm_status, LitellmStatus}; -/// Right sidebar for the dashboard, showing Ollama status, trending topics, +/// Right sidebar for the dashboard, showing LiteLLM status, trending topics, /// and recent search history. /// /// Appears when no article card is selected. Disappears when the user opens @@ -11,13 +11,13 @@ use crate::infrastructure::ollama::{get_ollama_status, OllamaStatus}; /// /// # Props /// -/// * `ollama_url` - Ollama instance URL for status polling +/// * `litellm_url` - LiteLLM proxy URL for status polling /// * `trending` - Trending topic keywords extracted from recent news headlines /// * `recent_searches` - Recent search topics stored in localStorage /// * `on_topic_click` - Fires when a trending or recent topic is clicked #[component] pub fn DashboardSidebar( - ollama_url: String, + litellm_url: String, trending: Vec, recent_searches: Vec, on_topic_click: EventHandler, @@ -25,26 +25,26 @@ pub fn DashboardSidebar( let locale = use_context::>(); let l = *locale.read(); - // Fetch Ollama status once on mount. + // Fetch LiteLLM status once on mount. // use_resource with no signal dependencies runs exactly once and // won't re-fire on parent re-renders (unlike use_effect). - let url = ollama_url.clone(); + let url = litellm_url.clone(); let status_resource = use_resource(move || { let u = url.clone(); async move { - get_ollama_status(u).await.unwrap_or(OllamaStatus { + get_litellm_status(u).await.unwrap_or(LitellmStatus { online: false, models: Vec::new(), }) } }); - let current_status: OllamaStatus = + let current_status: LitellmStatus = status_resource .read() .as_ref() .cloned() - .unwrap_or(OllamaStatus { + .unwrap_or(LitellmStatus { online: false, models: Vec::new(), }); @@ -52,9 +52,9 @@ pub fn DashboardSidebar( rsx! { aside { class: "dashboard-sidebar", - // -- Ollama Status Section -- + // -- LiteLLM Status Section -- div { class: "sidebar-section", - h4 { class: "sidebar-section-title", "{t(l, \"dashboard.ollama_status\")}" } + h4 { class: "sidebar-section-title", "{t(l, \"dashboard.litellm_status\")}" } div { class: "sidebar-status-row", span { class: if current_status.online { "sidebar-status-dot sidebar-status-dot--online" } else { "sidebar-status-dot sidebar-status-dot--offline" } } span { class: "sidebar-status-label", diff --git a/src/components/news_card.rs b/src/components/news_card.rs index 3eeba56..1e328a2 100644 --- a/src/components/news_card.rs +++ b/src/components/news_card.rs @@ -112,12 +112,12 @@ pub fn mock_news() -> Vec { published_at: "2026-02-16".into(), }, NewsCardModel { - title: "Ollama Adds Multi-GPU Scheduling".into(), - source: "Ollama".into(), - summary: "Run large models across multiple GPUs with automatic sharding.".into(), - content: "Ollama now supports multi-GPU scheduling with automatic \ - model sharding. Users can run models across multiple GPUs \ - for improved inference performance." + title: "LiteLLM Adds Multi-Provider Routing".into(), + source: "LiteLLM".into(), + summary: "Route requests across multiple LLM providers with automatic fallback.".into(), + content: "LiteLLM now supports multi-provider routing with automatic \ + fallback. Users can route requests across multiple providers \ + for improved reliability and cost optimization." .into(), category: "Infrastructure".into(), url: "#".into(), diff --git a/src/infrastructure/chat.rs b/src/infrastructure/chat.rs index 5b5e99a..983bdd8 100644 --- a/src/infrastructure/chat.rs +++ b/src/infrastructure/chat.rs @@ -134,7 +134,7 @@ pub async fn list_chat_sessions() -> Result, ServerFnError> { /// /// * `title` - Display title for the session /// * `namespace` - Namespace string: `"General"` or `"News"` -/// * `provider` - LLM provider name (e.g. "ollama") +/// * `provider` - LLM provider name (e.g. "litellm") /// * `model` - Model ID (e.g. "llama3.1:8b") /// * `article_url` - Source article URL (only for `News` namespace, empty if none) /// @@ -441,8 +441,8 @@ pub async fn chat_complete( // Resolve provider URL and model let (base_url, model) = resolve_provider_url( - &state.services.ollama_url, - &state.services.ollama_model, + &state.services.litellm_url, + &state.services.litellm_model, &session.provider, &session.model, ); @@ -485,22 +485,22 @@ pub async fn chat_complete( .ok_or_else(|| ServerFnError::new("empty LLM response")) } -/// Resolve the base URL for a provider, falling back to Ollama defaults. +/// Resolve the base URL for a provider, falling back to LiteLLM defaults. /// /// # Arguments /// -/// * `ollama_url` - Default Ollama base URL from config -/// * `ollama_model` - Default Ollama model from config +/// * `litellm_url` - Default LiteLLM base URL from config +/// * `litellm_model` - Default LiteLLM model from config /// * `provider` - Provider name (e.g. "openai", "anthropic", "huggingface") -/// * `model` - Model ID (may be empty for Ollama default) +/// * `model` - Model ID (may be empty for LiteLLM default) /// /// # Returns /// /// A `(base_url, model)` tuple resolved for the given provider. #[cfg(feature = "server")] pub(crate) fn resolve_provider_url( - ollama_url: &str, - ollama_model: &str, + litellm_url: &str, + litellm_model: &str, provider: &str, model: &str, ) -> (String, String) { @@ -511,11 +511,11 @@ pub(crate) fn resolve_provider_url( format!("https://api-inference.huggingface.co/models/{}", model), model.to_string(), ), - // Default to Ollama + // Default to LiteLLM _ => ( - ollama_url.to_string(), + litellm_url.to_string(), if model.is_empty() { - ollama_model.to_string() + litellm_model.to_string() } else { model.to_string() }, @@ -595,7 +595,7 @@ mod tests { "_id": oid, "user_sub": "u", "title": "t", - "provider": "ollama", + "provider": "litellm", "model": "m", "created_at": "c", "updated_at": "u", @@ -612,7 +612,7 @@ mod tests { "user_sub": "u", "title": "t", "namespace": "News", - "provider": "ollama", + "provider": "litellm", "model": "m", "created_at": "c", "updated_at": "u", @@ -684,13 +684,13 @@ mod tests { // -- resolve_provider_url -- - const TEST_OLLAMA_URL: &str = "http://localhost:11434"; - const TEST_OLLAMA_MODEL: &str = "llama3.1:8b"; + const TEST_LITELLM_URL: &str = "http://localhost:4000"; + const TEST_LITELLM_MODEL: &str = "qwen3-32b"; #[test] fn resolve_openai_returns_api_openai() { let (url, model) = - resolve_provider_url(TEST_OLLAMA_URL, TEST_OLLAMA_MODEL, "openai", "gpt-4o"); + resolve_provider_url(TEST_LITELLM_URL, TEST_LITELLM_MODEL, "openai", "gpt-4o"); assert_eq!(url, "https://api.openai.com"); assert_eq!(model, "gpt-4o"); } @@ -698,8 +698,8 @@ mod tests { #[test] fn resolve_anthropic_returns_api_anthropic() { let (url, model) = resolve_provider_url( - TEST_OLLAMA_URL, - TEST_OLLAMA_MODEL, + TEST_LITELLM_URL, + TEST_LITELLM_MODEL, "anthropic", "claude-3-opus", ); @@ -710,8 +710,8 @@ mod tests { #[test] fn resolve_huggingface_returns_model_url() { let (url, model) = resolve_provider_url( - TEST_OLLAMA_URL, - TEST_OLLAMA_MODEL, + TEST_LITELLM_URL, + TEST_LITELLM_MODEL, "huggingface", "meta-llama/Llama-2-7b", ); @@ -723,19 +723,19 @@ mod tests { } #[test] - fn resolve_unknown_defaults_to_ollama() { + fn resolve_unknown_defaults_to_litellm() { let (url, model) = - resolve_provider_url(TEST_OLLAMA_URL, TEST_OLLAMA_MODEL, "ollama", "mistral:7b"); - assert_eq!(url, TEST_OLLAMA_URL); - assert_eq!(model, "mistral:7b"); + resolve_provider_url(TEST_LITELLM_URL, TEST_LITELLM_MODEL, "litellm", "qwen3-32b"); + assert_eq!(url, TEST_LITELLM_URL); + assert_eq!(model, "qwen3-32b"); } #[test] fn resolve_empty_model_falls_back_to_server_default() { let (url, model) = - resolve_provider_url(TEST_OLLAMA_URL, TEST_OLLAMA_MODEL, "ollama", ""); - assert_eq!(url, TEST_OLLAMA_URL); - assert_eq!(model, TEST_OLLAMA_MODEL); + resolve_provider_url(TEST_LITELLM_URL, TEST_LITELLM_MODEL, "litellm", ""); + assert_eq!(url, TEST_LITELLM_URL); + assert_eq!(model, TEST_LITELLM_MODEL); } } } diff --git a/src/infrastructure/config.rs b/src/infrastructure/config.rs index 23128fc..8f82d9e 100644 --- a/src/infrastructure/config.rs +++ b/src/infrastructure/config.rs @@ -141,13 +141,15 @@ impl SmtpConfig { // ServiceUrls // --------------------------------------------------------------------------- -/// URLs and credentials for external services (Ollama, SearXNG, S3, etc.). +/// URLs and credentials for external services (LiteLLM, SearXNG, S3, etc.). #[derive(Debug)] pub struct ServiceUrls { - /// Ollama LLM instance base URL. - pub ollama_url: String, - /// Default Ollama model to use. - pub ollama_model: String, + /// LiteLLM proxy base URL. + pub litellm_url: String, + /// Default LiteLLM model to use. + pub litellm_model: String, + /// LiteLLM API key for authenticated requests. + pub litellm_api_key: String, /// SearXNG meta-search engine base URL. pub searxng_url: String, /// LangChain service URL. @@ -178,9 +180,10 @@ impl ServiceUrls { /// Currently infallible but returns `Result` for consistency. pub fn from_env() -> Result { Ok(Self { - ollama_url: std::env::var("OLLAMA_URL") - .unwrap_or_else(|_| "http://localhost:11434".into()), - ollama_model: std::env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama3.1:8b".into()), + litellm_url: std::env::var("LITELLM_URL") + .unwrap_or_else(|_| "http://localhost:4000".into()), + litellm_model: std::env::var("LITELLM_MODEL").unwrap_or_else(|_| "qwen3-32b".into()), + litellm_api_key: optional_env("LITELLM_API_KEY"), searxng_url: std::env::var("SEARXNG_URL") .unwrap_or_else(|_| "http://localhost:8888".into()), langchain_url: optional_env("LANGCHAIN_URL"), @@ -231,7 +234,7 @@ impl StripeConfig { /// Comma-separated list of enabled LLM provider identifiers. /// -/// For example: `LLM_PROVIDERS=ollama,openai,anthropic` +/// For example: `LLM_PROVIDERS=litellm,openai,anthropic` #[derive(Debug)] pub struct LlmProvidersConfig { /// Parsed provider names. @@ -331,36 +334,36 @@ mod tests { #[test] #[serial] fn llm_providers_single() { - std::env::set_var("LLM_PROVIDERS", "ollama"); + std::env::set_var("LLM_PROVIDERS", "litellm"); let cfg = LlmProvidersConfig::from_env().unwrap(); - assert_eq!(cfg.providers, vec!["ollama"]); + assert_eq!(cfg.providers, vec!["litellm"]); std::env::remove_var("LLM_PROVIDERS"); } #[test] #[serial] fn llm_providers_multiple() { - std::env::set_var("LLM_PROVIDERS", "ollama,openai,anthropic"); + std::env::set_var("LLM_PROVIDERS", "litellm,openai,anthropic"); let cfg = LlmProvidersConfig::from_env().unwrap(); - assert_eq!(cfg.providers, vec!["ollama", "openai", "anthropic"]); + assert_eq!(cfg.providers, vec!["litellm", "openai", "anthropic"]); std::env::remove_var("LLM_PROVIDERS"); } #[test] #[serial] fn llm_providers_trims_whitespace() { - std::env::set_var("LLM_PROVIDERS", " ollama , openai "); + std::env::set_var("LLM_PROVIDERS", " litellm , openai "); let cfg = LlmProvidersConfig::from_env().unwrap(); - assert_eq!(cfg.providers, vec!["ollama", "openai"]); + assert_eq!(cfg.providers, vec!["litellm", "openai"]); std::env::remove_var("LLM_PROVIDERS"); } #[test] #[serial] fn llm_providers_filters_empty_entries() { - std::env::set_var("LLM_PROVIDERS", "ollama,,openai,"); + std::env::set_var("LLM_PROVIDERS", "litellm,,openai,"); let cfg = LlmProvidersConfig::from_env().unwrap(); - assert_eq!(cfg.providers, vec!["ollama", "openai"]); + assert_eq!(cfg.providers, vec!["litellm", "openai"]); std::env::remove_var("LLM_PROVIDERS"); } @@ -370,18 +373,18 @@ mod tests { #[test] #[serial] - fn service_urls_default_ollama_url() { - std::env::remove_var("OLLAMA_URL"); + fn service_urls_default_litellm_url() { + std::env::remove_var("LITELLM_URL"); let svc = ServiceUrls::from_env().unwrap(); - assert_eq!(svc.ollama_url, "http://localhost:11434"); + assert_eq!(svc.litellm_url, "http://localhost:4000"); } #[test] #[serial] - fn service_urls_default_ollama_model() { - std::env::remove_var("OLLAMA_MODEL"); + fn service_urls_default_litellm_model() { + std::env::remove_var("LITELLM_MODEL"); let svc = ServiceUrls::from_env().unwrap(); - assert_eq!(svc.ollama_model, "llama3.1:8b"); + assert_eq!(svc.litellm_model, "qwen3-32b"); } #[test] @@ -394,11 +397,11 @@ mod tests { #[test] #[serial] - fn service_urls_custom_ollama_url() { - std::env::set_var("OLLAMA_URL", "http://gpu-host:11434"); + fn service_urls_custom_litellm_url() { + std::env::set_var("LITELLM_URL", "http://litellm-host:4000"); let svc = ServiceUrls::from_env().unwrap(); - assert_eq!(svc.ollama_url, "http://gpu-host:11434"); - std::env::remove_var("OLLAMA_URL"); + assert_eq!(svc.litellm_url, "http://litellm-host:4000"); + std::env::remove_var("LITELLM_URL"); } #[test] diff --git a/src/infrastructure/litellm.rs b/src/infrastructure/litellm.rs new file mode 100644 index 0000000..af367b1 --- /dev/null +++ b/src/infrastructure/litellm.rs @@ -0,0 +1,403 @@ +#[cfg(feature = "server")] +use std::collections::HashMap; + +use dioxus::prelude::*; +use serde::{Deserialize, Serialize}; + +use crate::models::LitellmUsageStats; +#[cfg(feature = "server")] +use crate::models::ModelUsage; + +/// Status of a LiteLLM proxy instance, including connectivity and available models. +/// +/// # Fields +/// +/// * `online` - Whether the LiteLLM API responded successfully +/// * `models` - List of model IDs available through the proxy +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct LitellmStatus { + pub online: bool, + pub models: Vec, +} + +/// Response from LiteLLM's `GET /v1/models` endpoint (OpenAI-compatible). +#[cfg(feature = "server")] +#[derive(Deserialize)] +struct ModelsResponse { + data: Vec, +} + +/// A single model entry from the OpenAI-compatible models list. +#[cfg(feature = "server")] +#[derive(Deserialize)] +struct ModelObject { + id: String, +} + +/// Check the status of a LiteLLM proxy by querying its models endpoint. +/// +/// Calls `GET /v1/models` to list available models and determine +/// whether the instance is reachable. Sends the API key as a Bearer token +/// if configured. +/// +/// # Arguments +/// +/// * `litellm_url` - Base URL of the LiteLLM proxy (e.g. "http://localhost:4000") +/// +/// # Returns +/// +/// A `LitellmStatus` with `online: true` and model IDs if reachable, +/// or `online: false` with an empty model list on failure +/// +/// # Errors +/// +/// Returns `ServerFnError` only on serialization issues; network failures +/// are caught and returned as `online: false` +#[post("/api/litellm-status")] +pub async fn get_litellm_status(litellm_url: String) -> Result { + let state: crate::infrastructure::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + + let base_url = if litellm_url.is_empty() { + state.services.litellm_url.clone() + } else { + litellm_url + }; + + let api_key = state.services.litellm_api_key.clone(); + let url = format!("{}/v1/models", base_url.trim_end_matches('/')); + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; + + let mut request = client.get(&url); + if !api_key.is_empty() { + request = request.header("Authorization", format!("Bearer {api_key}")); + } + + let resp = match request.send().await { + Ok(r) if r.status().is_success() => r, + _ => { + return Ok(LitellmStatus { + online: false, + models: Vec::new(), + }); + } + }; + + let body: ModelsResponse = match resp.json().await { + Ok(b) => b, + Err(_) => { + return Ok(LitellmStatus { + online: true, + models: Vec::new(), + }); + } + }; + + let models = body.data.into_iter().map(|m| m.id).collect(); + + Ok(LitellmStatus { + online: true, + models, + }) +} + +/// Response from LiteLLM's `GET /global/activity` endpoint. +/// +/// Returns aggregate token counts and API request totals for a date range. +/// Available on the free tier (no Enterprise license needed). +#[cfg(feature = "server")] +#[derive(Debug, Deserialize)] +struct ActivityResponse { + /// Total tokens across all models in the date range + #[serde(default)] + sum_total_tokens: u64, +} + +/// Per-model entry from `GET /global/activity/model`. +/// +/// Each entry contains a model name and its aggregated token total. +#[cfg(feature = "server")] +#[derive(Debug, Deserialize)] +struct ActivityModelEntry { + /// Model identifier (may be empty for unattributed traffic) + #[serde(default)] + model: String, + /// Sum of tokens used by this model in the date range + #[serde(default)] + sum_total_tokens: u64, +} + +/// Per-model spend entry from `GET /global/spend/models`. +/// +/// Each entry maps a model name to its total spend in USD. +#[cfg(feature = "server")] +#[derive(Debug, Deserialize)] +struct SpendModelEntry { + /// Model identifier + #[serde(default)] + model: String, + /// Total spend in USD + #[serde(default)] + total_spend: f64, +} + +/// Merge per-model token counts and spend data into `ModelUsage` entries. +/// +/// Joins `activity_models` (tokens) and `spend_models` (spend) by model +/// name using a HashMap for O(n + m) merge. Entries with empty model +/// names are skipped. +/// +/// # Arguments +/// +/// * `activity_models` - Per-model token data from `/global/activity/model` +/// * `spend_models` - Per-model spend data from `/global/spend/models` +/// +/// # Returns +/// +/// Merged list sorted by total tokens descending +#[cfg(feature = "server")] +fn merge_model_data( + activity_models: Vec, + spend_models: Vec, +) -> Vec { + let mut model_map: HashMap = HashMap::new(); + + for entry in activity_models { + if entry.model.is_empty() { + continue; + } + model_map + .entry(entry.model.clone()) + .or_insert_with(|| ModelUsage { + model: entry.model, + ..Default::default() + }) + .total_tokens = entry.sum_total_tokens; + } + + for entry in spend_models { + if entry.model.is_empty() { + continue; + } + model_map + .entry(entry.model.clone()) + .or_insert_with(|| ModelUsage { + model: entry.model, + ..Default::default() + }) + .spend = entry.total_spend; + } + + let mut result: Vec = model_map.into_values().collect(); + result.sort_by(|a, b| b.total_tokens.cmp(&a.total_tokens)); + result +} + +/// Fetch aggregated usage statistics from LiteLLM's free-tier APIs. +/// +/// Combines three endpoints to build a complete usage picture: +/// - `GET /global/activity` - total token counts +/// - `GET /global/activity/model` - per-model token breakdown +/// - `GET /global/spend/models` - per-model spend in USD +/// +/// # Arguments +/// +/// * `start_date` - Start of the reporting period in `YYYY-MM-DD` format +/// * `end_date` - End of the reporting period in `YYYY-MM-DD` format +/// +/// # Returns +/// +/// Aggregated usage stats; returns default (zeroed) stats on network +/// failure or permission errors +/// +/// # Errors +/// +/// Returns `ServerFnError` only on HTTP client construction failure +#[post("/api/litellm-usage")] +pub async fn get_litellm_usage( + start_date: String, + end_date: String, +) -> Result { + let state: crate::infrastructure::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + + let base_url = &state.services.litellm_url; + let api_key = &state.services.litellm_api_key; + + if base_url.is_empty() { + return Ok(LitellmUsageStats::default()); + } + + let base = base_url.trim_end_matches('/'); + let date_params = format!("start_date={start_date}&end_date={end_date}"); + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(10)) + .build() + .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; + + // Helper closure to build an authenticated GET request + let auth_get = |url: String| { + let mut req = client.get(url); + if !api_key.is_empty() { + req = req.header("Authorization", format!("Bearer {api_key}")); + } + req + }; + + // Fire all three requests concurrently to minimise latency + let (activity_res, model_activity_res, model_spend_res) = tokio::join!( + auth_get(format!("{base}/global/activity?{date_params}")).send(), + auth_get(format!("{base}/global/activity/model?{date_params}")).send(), + auth_get(format!("{base}/global/spend/models?{date_params}")).send(), + ); + + // Parse total token count from /global/activity + let total_tokens = match activity_res { + Ok(r) if r.status().is_success() => r + .json::() + .await + .map(|a| a.sum_total_tokens) + .unwrap_or(0), + _ => 0, + }; + + // Parse per-model token breakdown from /global/activity/model + let activity_models: Vec = match model_activity_res { + Ok(r) if r.status().is_success() => r.json().await.unwrap_or_default(), + _ => Vec::new(), + }; + + // Parse per-model spend from /global/spend/models + let spend_models: Vec = match model_spend_res { + Ok(r) if r.status().is_success() => r.json().await.unwrap_or_default(), + _ => Vec::new(), + }; + + let total_spend: f64 = spend_models.iter().map(|m| m.total_spend).sum(); + let model_breakdown = merge_model_data(activity_models, spend_models); + + Ok(LitellmUsageStats { + total_spend, + // Free-tier endpoints don't provide prompt/completion split; + // total_tokens comes from /global/activity. + total_prompt_tokens: 0, + total_completion_tokens: 0, + total_tokens, + model_breakdown, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn merge_empty_inputs() { + let result = merge_model_data(Vec::new(), Vec::new()); + assert!(result.is_empty()); + } + + #[test] + fn merge_activity_only() { + let activity = vec![ActivityModelEntry { + model: "gpt-4".into(), + sum_total_tokens: 1500, + }]; + let result = merge_model_data(activity, Vec::new()); + assert_eq!(result.len(), 1); + assert_eq!(result[0].model, "gpt-4"); + assert_eq!(result[0].total_tokens, 1500); + assert_eq!(result[0].spend, 0.0); + } + + #[test] + fn merge_spend_only() { + let spend = vec![SpendModelEntry { + model: "gpt-4".into(), + total_spend: 2.5, + }]; + let result = merge_model_data(Vec::new(), spend); + assert_eq!(result.len(), 1); + assert_eq!(result[0].model, "gpt-4"); + assert_eq!(result[0].spend, 2.5); + assert_eq!(result[0].total_tokens, 0); + } + + #[test] + fn merge_joins_by_model_name() { + let activity = vec![ + ActivityModelEntry { + model: "gpt-4".into(), + sum_total_tokens: 5000, + }, + ActivityModelEntry { + model: "claude-3".into(), + sum_total_tokens: 3000, + }, + ]; + let spend = vec![ + SpendModelEntry { + model: "gpt-4".into(), + total_spend: 1.0, + }, + SpendModelEntry { + model: "claude-3".into(), + total_spend: 0.5, + }, + ]; + let result = merge_model_data(activity, spend); + assert_eq!(result.len(), 2); + // Sorted by tokens descending: gpt-4 (5000) before claude-3 (3000) + assert_eq!(result[0].model, "gpt-4"); + assert_eq!(result[0].total_tokens, 5000); + assert_eq!(result[0].spend, 1.0); + assert_eq!(result[1].model, "claude-3"); + assert_eq!(result[1].total_tokens, 3000); + assert_eq!(result[1].spend, 0.5); + } + + #[test] + fn merge_skips_empty_model_names() { + let activity = vec![ + ActivityModelEntry { + model: "".into(), + sum_total_tokens: 100, + }, + ActivityModelEntry { + model: "gpt-4".into(), + sum_total_tokens: 500, + }, + ]; + let spend = vec![SpendModelEntry { + model: "".into(), + total_spend: 0.01, + }]; + let result = merge_model_data(activity, spend); + assert_eq!(result.len(), 1); + assert_eq!(result[0].model, "gpt-4"); + } + + #[test] + fn merge_unmatched_models_appear_in_both_directions() { + let activity = vec![ActivityModelEntry { + model: "tokens-only".into(), + sum_total_tokens: 1000, + }]; + let spend = vec![SpendModelEntry { + model: "spend-only".into(), + total_spend: 0.5, + }]; + let result = merge_model_data(activity, spend); + assert_eq!(result.len(), 2); + // tokens-only has 1000 tokens, spend-only has 0 tokens + assert_eq!(result[0].model, "tokens-only"); + assert_eq!(result[0].total_tokens, 1000); + assert_eq!(result[1].model, "spend-only"); + assert_eq!(result[1].spend, 0.5); + } +} diff --git a/src/infrastructure/llm.rs b/src/infrastructure/llm.rs index b68e2ab..76ece50 100644 --- a/src/infrastructure/llm.rs +++ b/src/infrastructure/llm.rs @@ -4,23 +4,23 @@ use dioxus::prelude::*; mod inner { use serde::{Deserialize, Serialize}; - /// A single message in the OpenAI-compatible chat format used by Ollama. + /// A single message in the OpenAI-compatible chat format used by LiteLLM. #[derive(Serialize)] pub(super) struct ChatMessage { pub role: String, pub content: String, } - /// Request body for Ollama's OpenAI-compatible chat completions endpoint. + /// Request body for the OpenAI-compatible chat completions endpoint. #[derive(Serialize)] - pub(super) struct OllamaChatRequest { + pub(super) struct ChatCompletionRequest { pub model: String, pub messages: Vec, /// Disable streaming so we get a single JSON response. pub stream: bool, } - /// A single choice in the Ollama chat completions response. + /// A single choice in the chat completions response. #[derive(Deserialize)] pub(super) struct ChatChoice { pub message: ChatResponseMessage, @@ -32,9 +32,9 @@ mod inner { pub content: String, } - /// Top-level response from Ollama's `/v1/chat/completions` endpoint. + /// Top-level response from the `/v1/chat/completions` endpoint. #[derive(Deserialize)] - pub(super) struct OllamaChatResponse { + pub(super) struct ChatCompletionResponse { pub choices: Vec, } @@ -157,7 +157,7 @@ mod inner { } } -/// Summarize an article using a local Ollama instance. +/// Summarize an article using a LiteLLM proxy. /// /// First attempts to fetch the full article text from the provided URL. /// If that fails (paywall, timeout, etc.), falls back to the search snippet. @@ -167,8 +167,8 @@ mod inner { /// /// * `snippet` - The search result snippet (fallback content) /// * `article_url` - The original article URL to fetch full text from -/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434") -/// * `model` - The Ollama model ID to use (e.g. "llama3.1:8b") +/// * `litellm_url` - Base URL of the LiteLLM proxy (e.g. "http://localhost:4000") +/// * `model` - The model ID to use (e.g. "qwen3-32b") /// /// # Returns /// @@ -176,36 +176,38 @@ mod inner { /// /// # Errors /// -/// Returns `ServerFnError` if the Ollama request fails or response parsing fails +/// Returns `ServerFnError` if the LiteLLM request fails or response parsing fails #[post("/api/summarize")] pub async fn summarize_article( snippet: String, article_url: String, - ollama_url: String, + litellm_url: String, model: String, ) -> Result { - use inner::{fetch_article_text, ChatMessage, OllamaChatRequest, OllamaChatResponse}; + use inner::{fetch_article_text, ChatCompletionRequest, ChatCompletionResponse, ChatMessage}; let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; // Use caller-provided values or fall back to ServerState config - let base_url = if ollama_url.is_empty() { - state.services.ollama_url.clone() + let base_url = if litellm_url.is_empty() { + state.services.litellm_url.clone() } else { - ollama_url + litellm_url }; let model = if model.is_empty() { - state.services.ollama_model.clone() + state.services.litellm_model.clone() } else { model }; + let api_key = state.services.litellm_api_key.clone(); + // Try to fetch the full article; fall back to the search snippet let article_text = fetch_article_text(&article_url).await.unwrap_or(snippet); - let request_body = OllamaChatRequest { + let request_body = ChatCompletionRequest { model, stream: false, messages: vec![ChatMessage { @@ -223,42 +225,48 @@ pub async fn summarize_article( let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/')); let client = reqwest::Client::new(); - let resp = client + let mut request = client .post(&url) .header("content-type", "application/json") - .json(&request_body) + .json(&request_body); + + if !api_key.is_empty() { + request = request.header("Authorization", format!("Bearer {api_key}")); + } + + let resp = request .send() .await - .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?; + .map_err(|e| ServerFnError::new(format!("LiteLLM request failed: {e}")))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); return Err(ServerFnError::new(format!( - "Ollama returned {status}: {body}" + "LiteLLM returned {status}: {body}" ))); } - let body: OllamaChatResponse = resp + let body: ChatCompletionResponse = resp .json() .await - .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?; + .map_err(|e| ServerFnError::new(format!("Failed to parse LiteLLM response: {e}")))?; body.choices .first() .map(|choice| choice.message.content.clone()) - .ok_or_else(|| ServerFnError::new("Empty response from Ollama")) + .ok_or_else(|| ServerFnError::new("Empty response from LiteLLM")) } /// A lightweight chat message for the follow-up conversation. -/// Uses simple String role ("system"/"user"/"assistant") for Ollama compatibility. +/// Uses simple String role ("system"/"user"/"assistant") for OpenAI compatibility. #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] pub struct FollowUpMessage { pub role: String, pub content: String, } -/// Send a follow-up question about an article using a local Ollama instance. +/// Send a follow-up question about an article using a LiteLLM proxy. /// /// Accepts the full conversation history (system context + prior turns) and /// returns the assistant's next response. The system message should contain @@ -267,8 +275,8 @@ pub struct FollowUpMessage { /// # Arguments /// /// * `messages` - The conversation history including system context -/// * `ollama_url` - Base URL of the Ollama instance -/// * `model` - The Ollama model ID to use +/// * `litellm_url` - Base URL of the LiteLLM proxy +/// * `model` - The model ID to use /// /// # Returns /// @@ -276,30 +284,32 @@ pub struct FollowUpMessage { /// /// # Errors /// -/// Returns `ServerFnError` if the Ollama request fails or response parsing fails +/// Returns `ServerFnError` if the LiteLLM request fails or response parsing fails #[post("/api/chat")] pub async fn chat_followup( messages: Vec, - ollama_url: String, + litellm_url: String, model: String, ) -> Result { - use inner::{ChatMessage, OllamaChatRequest, OllamaChatResponse}; + use inner::{ChatCompletionRequest, ChatCompletionResponse, ChatMessage}; let state: crate::infrastructure::ServerState = dioxus_fullstack::FullstackContext::extract().await?; - let base_url = if ollama_url.is_empty() { - state.services.ollama_url.clone() + let base_url = if litellm_url.is_empty() { + state.services.litellm_url.clone() } else { - ollama_url + litellm_url }; let model = if model.is_empty() { - state.services.ollama_model.clone() + state.services.litellm_model.clone() } else { model }; + let api_key = state.services.litellm_api_key.clone(); + // Convert FollowUpMessage to inner ChatMessage for the request let chat_messages: Vec = messages .into_iter() @@ -309,7 +319,7 @@ pub async fn chat_followup( }) .collect(); - let request_body = OllamaChatRequest { + let request_body = ChatCompletionRequest { model, stream: false, messages: chat_messages, @@ -317,31 +327,37 @@ pub async fn chat_followup( let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/')); let client = reqwest::Client::new(); - let resp = client + let mut request = client .post(&url) .header("content-type", "application/json") - .json(&request_body) + .json(&request_body); + + if !api_key.is_empty() { + request = request.header("Authorization", format!("Bearer {api_key}")); + } + + let resp = request .send() .await - .map_err(|e| ServerFnError::new(format!("Ollama request failed: {e}")))?; + .map_err(|e| ServerFnError::new(format!("LiteLLM request failed: {e}")))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); return Err(ServerFnError::new(format!( - "Ollama returned {status}: {body}" + "LiteLLM returned {status}: {body}" ))); } - let body: OllamaChatResponse = resp + let body: ChatCompletionResponse = resp .json() .await - .map_err(|e| ServerFnError::new(format!("Failed to parse Ollama response: {e}")))?; + .map_err(|e| ServerFnError::new(format!("Failed to parse LiteLLM response: {e}")))?; body.choices .first() .map(|choice| choice.message.content.clone()) - .ok_or_else(|| ServerFnError::new("Empty response from Ollama")) + .ok_or_else(|| ServerFnError::new("Empty response from LiteLLM")) } #[cfg(test)] diff --git a/src/infrastructure/mod.rs b/src/infrastructure/mod.rs index c18bf52..cbb1341 100644 --- a/src/infrastructure/mod.rs +++ b/src/infrastructure/mod.rs @@ -3,8 +3,8 @@ pub mod auth_check; pub mod chat; pub mod langgraph; +pub mod litellm; pub mod llm; -pub mod ollama; pub mod searxng; // Server-only modules (Axum handlers, state, configs, DB, etc.) diff --git a/src/infrastructure/ollama.rs b/src/infrastructure/ollama.rs deleted file mode 100644 index d09b03e..0000000 --- a/src/infrastructure/ollama.rs +++ /dev/null @@ -1,92 +0,0 @@ -use dioxus::prelude::*; -use serde::{Deserialize, Serialize}; - -/// Status of a local Ollama instance, including connectivity and loaded models. -/// -/// # Fields -/// -/// * `online` - Whether the Ollama API responded successfully -/// * `models` - List of model names currently available on the instance -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OllamaStatus { - pub online: bool, - pub models: Vec, -} - -/// Response from Ollama's `GET /api/tags` endpoint. -#[cfg(feature = "server")] -#[derive(Deserialize)] -struct OllamaTagsResponse { - models: Vec, -} - -/// A single model entry from Ollama's tags API. -#[cfg(feature = "server")] -#[derive(Deserialize)] -struct OllamaModel { - name: String, -} - -/// Check the status of a local Ollama instance by querying its tags endpoint. -/// -/// Calls `GET /api/tags` to list available models and determine -/// whether the instance is reachable. -/// -/// # Arguments -/// -/// * `ollama_url` - Base URL of the Ollama instance (e.g. "http://localhost:11434") -/// -/// # Returns -/// -/// An `OllamaStatus` with `online: true` and model names if reachable, -/// or `online: false` with an empty model list on failure -/// -/// # Errors -/// -/// Returns `ServerFnError` only on serialization issues; network failures -/// are caught and returned as `online: false` -#[post("/api/ollama-status")] -pub async fn get_ollama_status(ollama_url: String) -> Result { - let state: crate::infrastructure::ServerState = - dioxus_fullstack::FullstackContext::extract().await?; - - let base_url = if ollama_url.is_empty() { - state.services.ollama_url.clone() - } else { - ollama_url - }; - - let url = format!("{}/api/tags", base_url.trim_end_matches('/')); - - let client = reqwest::Client::builder() - .timeout(std::time::Duration::from_secs(5)) - .build() - .map_err(|e| ServerFnError::new(format!("HTTP client error: {e}")))?; - - let resp = match client.get(&url).send().await { - Ok(r) if r.status().is_success() => r, - _ => { - return Ok(OllamaStatus { - online: false, - models: Vec::new(), - }); - } - }; - - let body: OllamaTagsResponse = match resp.json().await { - Ok(b) => b, - Err(_) => { - return Ok(OllamaStatus { - online: true, - models: Vec::new(), - }); - } - }; - - let models = body.models.into_iter().map(|m| m.name).collect(); - - Ok(OllamaStatus { - online: true, - models, - }) -} diff --git a/src/infrastructure/provider_client.rs b/src/infrastructure/provider_client.rs index 804eba6..2d05023 100644 --- a/src/infrastructure/provider_client.rs +++ b/src/infrastructure/provider_client.rs @@ -1,6 +1,6 @@ //! Unified LLM provider dispatch. //! -//! Routes chat completion requests to Ollama, OpenAI, Anthropic, or +//! Routes chat completion requests to LiteLLM, OpenAI, Anthropic, or //! HuggingFace based on the session's provider setting. All providers //! except Anthropic use the OpenAI-compatible chat completions format. @@ -20,11 +20,11 @@ pub struct ProviderMessage { /// /// # Arguments /// -/// * `state` - Server state (for default Ollama URL/model) -/// * `provider` - Provider name (`"ollama"`, `"openai"`, `"anthropic"`, `"huggingface"`) +/// * `state` - Server state (for default LiteLLM URL/model) +/// * `provider` - Provider name (`"litellm"`, `"openai"`, `"anthropic"`, `"huggingface"`) /// * `model` - Model ID /// * `messages` - Conversation history -/// * `api_key` - API key (required for non-Ollama providers) +/// * `api_key` - API key (required for non-LiteLLM providers; LiteLLM uses server config) /// * `stream` - Whether to request streaming /// /// # Returns @@ -123,11 +123,11 @@ pub async fn send_chat_request( .send() .await } - // Default: Ollama (OpenAI-compatible endpoint) + // Default: LiteLLM proxy (OpenAI-compatible endpoint) _ => { - let base_url = &state.services.ollama_url; + let base_url = &state.services.litellm_url; let resolved_model = if model.is_empty() { - &state.services.ollama_model + &state.services.litellm_model } else { model }; @@ -137,12 +137,15 @@ pub async fn send_chat_request( "messages": messages, "stream": stream, }); - client + let litellm_key = &state.services.litellm_api_key; + let mut request = client .post(&url) .header("content-type", "application/json") - .json(&body) - .send() - .await + .json(&body); + if !litellm_key.is_empty() { + request = request.header("Authorization", format!("Bearer {litellm_key}")); + } + request.send().await } } } diff --git a/src/infrastructure/server_state.rs b/src/infrastructure/server_state.rs index 2817791..ff45de1 100644 --- a/src/infrastructure/server_state.rs +++ b/src/infrastructure/server_state.rs @@ -45,7 +45,7 @@ pub struct ServerStateInner { pub keycloak: &'static KeycloakConfig, /// Outbound email settings. pub smtp: &'static SmtpConfig, - /// URLs for Ollama, SearXNG, LangChain, S3, etc. + /// URLs for LiteLLM, SearXNG, LangChain, S3, etc. pub services: &'static ServiceUrls, /// Stripe billing keys. pub stripe: &'static StripeConfig, diff --git a/src/models/chat.rs b/src/models/chat.rs index aa869de..6ff68d8 100644 --- a/src/models/chat.rs +++ b/src/models/chat.rs @@ -60,8 +60,8 @@ pub struct Attachment { /// * `user_sub` - Keycloak subject ID (session owner) /// * `title` - Display title (auto-generated or user-renamed) /// * `namespace` - Grouping for sidebar sections -/// * `provider` - LLM provider used (e.g. "ollama", "openai") -/// * `model` - Model ID used (e.g. "llama3.1:8b") +/// * `provider` - LLM provider used (e.g. "litellm", "openai") +/// * `model` - Model ID used (e.g. "qwen3-32b") /// * `created_at` - ISO 8601 creation timestamp /// * `updated_at` - ISO 8601 last-activity timestamp /// * `article_url` - Source article URL (for News namespace sessions) @@ -171,8 +171,8 @@ mod tests { user_sub: "user-1".into(), title: "Test Chat".into(), namespace: ChatNamespace::General, - provider: "ollama".into(), - model: "llama3.1:8b".into(), + provider: "litellm".into(), + model: "qwen3-32b".into(), created_at: "2025-01-01T00:00:00Z".into(), updated_at: "2025-01-01T01:00:00Z".into(), article_url: None, @@ -189,7 +189,7 @@ mod tests { "_id": "mongo-id", "user_sub": "u1", "title": "t", - "provider": "ollama", + "provider": "litellm", "model": "m", "created_at": "2025-01-01", "updated_at": "2025-01-01" @@ -205,7 +205,7 @@ mod tests { user_sub: "u1".into(), title: "t".into(), namespace: ChatNamespace::default(), - provider: "ollama".into(), + provider: "litellm".into(), model: "m".into(), created_at: "2025-01-01".into(), updated_at: "2025-01-01".into(), @@ -223,7 +223,7 @@ mod tests { user_sub: "u1".into(), title: "t".into(), namespace: ChatNamespace::default(), - provider: "ollama".into(), + provider: "litellm".into(), model: "m".into(), created_at: "2025-01-01".into(), updated_at: "2025-01-01".into(), diff --git a/src/models/organization.rs b/src/models/organization.rs index 0c6745d..a3c0fb7 100644 --- a/src/models/organization.rs +++ b/src/models/organization.rs @@ -83,6 +83,42 @@ pub struct BillingUsage { pub billing_cycle_end: String, } +/// Aggregated token usage statistics from LiteLLM's spend tracking API. +/// +/// # Fields +/// +/// * `total_spend` - Total cost in USD across all models +/// * `total_prompt_tokens` - Sum of prompt (input) tokens +/// * `total_completion_tokens` - Sum of completion (output) tokens +/// * `total_tokens` - Sum of all tokens (prompt + completion) +/// * `model_breakdown` - Per-model usage breakdown +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +pub struct LitellmUsageStats { + pub total_spend: f64, + pub total_prompt_tokens: u64, + pub total_completion_tokens: u64, + pub total_tokens: u64, + pub model_breakdown: Vec, +} + +/// Token and spend usage for a single LLM model. +/// +/// # Fields +/// +/// * `model` - Model identifier (e.g. "gpt-4", "claude-3-opus") +/// * `spend` - Cost in USD for this model +/// * `prompt_tokens` - Prompt (input) tokens consumed +/// * `completion_tokens` - Completion (output) tokens generated +/// * `total_tokens` - Total tokens (prompt + completion) +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +pub struct ModelUsage { + pub model: String, + pub spend: f64, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, +} + /// Organisation-level settings stored in MongoDB. /// /// These complement Keycloak's Organizations feature with @@ -234,4 +270,82 @@ mod tests { assert_eq!(record.seats_used, 0); assert_eq!(record.tokens_used, 0); } + + #[test] + fn litellm_usage_stats_default() { + let stats = LitellmUsageStats::default(); + assert_eq!(stats.total_spend, 0.0); + assert_eq!(stats.total_prompt_tokens, 0); + assert_eq!(stats.total_completion_tokens, 0); + assert_eq!(stats.total_tokens, 0); + assert!(stats.model_breakdown.is_empty()); + } + + #[test] + fn litellm_usage_stats_serde_round_trip() { + let stats = LitellmUsageStats { + total_spend: 12.34, + total_prompt_tokens: 50_000, + total_completion_tokens: 25_000, + total_tokens: 75_000, + model_breakdown: vec![ + ModelUsage { + model: "gpt-4".into(), + spend: 10.0, + prompt_tokens: 40_000, + completion_tokens: 20_000, + total_tokens: 60_000, + }, + ModelUsage { + model: "claude-3-opus".into(), + spend: 2.34, + prompt_tokens: 10_000, + completion_tokens: 5_000, + total_tokens: 15_000, + }, + ], + }; + let json = serde_json::to_string(&stats).expect("serialize LitellmUsageStats"); + let back: LitellmUsageStats = + serde_json::from_str(&json).expect("deserialize LitellmUsageStats"); + assert_eq!(stats, back); + } + + #[test] + fn model_usage_default() { + let usage = ModelUsage::default(); + assert_eq!(usage.model, ""); + assert_eq!(usage.spend, 0.0); + assert_eq!(usage.prompt_tokens, 0); + assert_eq!(usage.completion_tokens, 0); + assert_eq!(usage.total_tokens, 0); + } + + #[test] + fn model_usage_serde_round_trip() { + let usage = ModelUsage { + model: "gpt-4-turbo".into(), + spend: 5.67, + prompt_tokens: 30_000, + completion_tokens: 15_000, + total_tokens: 45_000, + }; + let json = serde_json::to_string(&usage).expect("serialize ModelUsage"); + let back: ModelUsage = serde_json::from_str(&json).expect("deserialize ModelUsage"); + assert_eq!(usage, back); + } + + #[test] + fn litellm_usage_stats_empty_breakdown_round_trip() { + let stats = LitellmUsageStats { + total_spend: 0.0, + total_prompt_tokens: 0, + total_completion_tokens: 0, + total_tokens: 0, + model_breakdown: Vec::new(), + }; + let json = serde_json::to_string(&stats).expect("serialize empty stats"); + let back: LitellmUsageStats = serde_json::from_str(&json).expect("deserialize empty stats"); + assert_eq!(stats, back); + } } diff --git a/src/models/provider.rs b/src/models/provider.rs index 48ee498..b4b68f0 100644 --- a/src/models/provider.rs +++ b/src/models/provider.rs @@ -3,8 +3,8 @@ use serde::{Deserialize, Serialize}; /// Supported LLM provider backends. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum LlmProvider { - /// Self-hosted models via Ollama - Ollama, + /// LiteLLM proxy for unified model access + LiteLlm, /// Hugging Face Inference API HuggingFace, /// OpenAI-compatible endpoints @@ -17,7 +17,7 @@ impl LlmProvider { /// Returns the display name for a provider. pub fn label(&self) -> &'static str { match self { - Self::Ollama => "Ollama", + Self::LiteLlm => "LiteLLM", Self::HuggingFace => "Hugging Face", Self::OpenAi => "OpenAI", Self::Anthropic => "Anthropic", @@ -29,7 +29,7 @@ impl LlmProvider { /// /// # Fields /// -/// * `id` - Unique model identifier (e.g. "llama3.1:8b") +/// * `id` - Unique model identifier (e.g. "qwen3-32b") /// * `name` - Human-readable display name /// * `provider` - Which provider hosts this model /// * `context_window` - Maximum context length in tokens @@ -79,8 +79,8 @@ mod tests { use pretty_assertions::assert_eq; #[test] - fn llm_provider_label_ollama() { - assert_eq!(LlmProvider::Ollama.label(), "Ollama"); + fn llm_provider_label_litellm() { + assert_eq!(LlmProvider::LiteLlm.label(), "LiteLLM"); } #[test] @@ -101,7 +101,7 @@ mod tests { #[test] fn llm_provider_serde_round_trip() { for variant in [ - LlmProvider::Ollama, + LlmProvider::LiteLlm, LlmProvider::HuggingFace, LlmProvider::OpenAi, LlmProvider::Anthropic, @@ -117,10 +117,10 @@ mod tests { #[test] fn model_entry_serde_round_trip() { let entry = ModelEntry { - id: "llama3.1:8b".into(), - name: "Llama 3.1 8B".into(), - provider: LlmProvider::Ollama, - context_window: 8192, + id: "qwen3-32b".into(), + name: "Qwen3 32B".into(), + provider: LlmProvider::LiteLlm, + context_window: 32, }; let json = serde_json::to_string(&entry).expect("serialize ModelEntry"); let back: ModelEntry = serde_json::from_str(&json).expect("deserialize ModelEntry"); diff --git a/src/models/user.rs b/src/models/user.rs index cbab583..4b7b615 100644 --- a/src/models/user.rs +++ b/src/models/user.rs @@ -35,12 +35,12 @@ pub struct AuthInfo { /// Per-user LLM provider configuration stored in MongoDB. /// /// Controls which provider and model the user's chat sessions default -/// to, and stores API keys for non-Ollama providers. +/// to, and stores API keys for non-LiteLLM providers. #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub struct UserProviderConfig { - /// Default provider name (e.g. "ollama", "openai") + /// Default provider name (e.g. "litellm", "openai") pub default_provider: String, - /// Default model ID (e.g. "llama3.1:8b", "gpt-4o") + /// Default model ID (e.g. "qwen3-32b", "gpt-4o") pub default_model: String, /// OpenAI API key (empty if not configured) #[serde(default, skip_serializing_if = "Option::is_none")] @@ -51,8 +51,8 @@ pub struct UserProviderConfig { /// HuggingFace API key #[serde(default, skip_serializing_if = "Option::is_none")] pub huggingface_api_key: Option, - /// Custom Ollama URL override (empty = use server default) - pub ollama_url_override: String, + /// Custom LiteLLM URL override (empty = use server default) + pub litellm_url_override: String, } /// Per-user preferences stored in MongoDB. @@ -66,10 +66,10 @@ pub struct UserPreferences { pub org_id: String, /// User-selected news/search topics pub custom_topics: Vec, - /// Per-user Ollama URL override (empty = use server default) - pub ollama_url_override: String, - /// Per-user Ollama model override (empty = use server default) - pub ollama_model_override: String, + /// Per-user LiteLLM URL override (empty = use server default) + pub litellm_url_override: String, + /// Per-user LiteLLM model override (empty = use server default) + pub litellm_model_override: String, /// Recently searched queries for quick access pub recent_searches: Vec, /// LLM provider configuration @@ -132,12 +132,12 @@ mod tests { #[test] fn user_provider_config_optional_keys_skip_none() { let cfg = UserProviderConfig { - default_provider: "ollama".into(), - default_model: "llama3.1:8b".into(), + default_provider: "litellm".into(), + default_model: "qwen3-32b".into(), openai_api_key: None, anthropic_api_key: None, huggingface_api_key: None, - ollama_url_override: String::new(), + litellm_url_override: String::new(), }; let json = serde_json::to_string(&cfg).expect("serialize UserProviderConfig"); assert!(!json.contains("openai_api_key")); @@ -153,7 +153,7 @@ mod tests { openai_api_key: Some("sk-test".into()), anthropic_api_key: Some("ak-test".into()), huggingface_api_key: None, - ollama_url_override: "http://custom:11434".into(), + litellm_url_override: "http://custom:4000".into(), }; let json = serde_json::to_string(&cfg).expect("serialize"); let back: UserProviderConfig = serde_json::from_str(&json).expect("deserialize"); diff --git a/src/pages/dashboard.rs b/src/pages/dashboard.rs index aedfbc4..3145473 100644 --- a/src/pages/dashboard.rs +++ b/src/pages/dashboard.rs @@ -25,8 +25,8 @@ const DEFAULT_TOPICS: &[&str] = &[ /// /// State is persisted across sessions using localStorage: /// - `certifai_topics`: custom user-defined search topics -/// - `certifai_ollama_url`: Ollama instance URL for summarization -/// - `certifai_ollama_model`: Ollama model ID for summarization +/// - `certifai_litellm_url`: LiteLLM proxy URL for summarization +/// - `certifai_litellm_model`: LiteLLM model ID for summarization #[component] pub fn DashboardPage() -> Element { let locale = use_context::>(); @@ -34,11 +34,11 @@ pub fn DashboardPage() -> Element { // Persistent state stored in localStorage let mut custom_topics = use_persistent("certifai_topics".to_string(), Vec::::new); - // Default to empty so the server functions use OLLAMA_URL / OLLAMA_MODEL + // Default to empty so the server functions use LITELLM_URL / LITELLM_MODEL // from .env. Only stores a non-empty value when the user explicitly saves // an override via the Settings panel. - let mut ollama_url = use_persistent("certifai_ollama_url".to_string(), String::new); - let mut ollama_model = use_persistent("certifai_ollama_model".to_string(), String::new); + let mut litellm_url = use_persistent("certifai_litellm_url".to_string(), String::new); + let mut litellm_model = use_persistent("certifai_litellm_model".to_string(), String::new); // Reactive signals for UI state let mut active_topic = use_signal(|| "AI".to_string()); @@ -235,8 +235,8 @@ pub fn DashboardPage() -> Element { onclick: move |_| { let currently_shown = *show_settings.read(); if !currently_shown { - settings_url.set(ollama_url.read().clone()); - settings_model.set(ollama_model.read().clone()); + settings_url.set(litellm_url.read().clone()); + settings_model.set(litellm_model.read().clone()); } show_settings.set(!currently_shown); }, @@ -247,16 +247,16 @@ pub fn DashboardPage() -> Element { // Settings panel (collapsible) if *show_settings.read() { div { class: "settings-panel", - h4 { class: "settings-panel-title", "{t(l, \"dashboard.ollama_settings\")}" } + h4 { class: "settings-panel-title", "{t(l, \"dashboard.litellm_settings\")}" } p { class: "settings-hint", "{t(l, \"dashboard.settings_hint\")}" } div { class: "settings-field", - label { "{t(l, \"dashboard.ollama_url\")}" } + label { "{t(l, \"dashboard.litellm_url\")}" } input { class: "settings-input", r#type: "text", - placeholder: "{t(l, \"dashboard.ollama_url_placeholder\")}", + placeholder: "{t(l, \"dashboard.litellm_url_placeholder\")}", value: "{settings_url}", oninput: move |e| settings_url.set(e.value()), } @@ -274,8 +274,8 @@ pub fn DashboardPage() -> Element { button { class: "btn btn-primary", onclick: move |_| { - *ollama_url.write() = settings_url.read().trim().to_string(); - *ollama_model.write() = settings_model.read().trim().to_string(); + *litellm_url.write() = settings_url.read().trim().to_string(); + *litellm_model.write() = settings_model.read().trim().to_string(); show_settings.set(false); }, "{t(l, \"common.save\")}" @@ -320,14 +320,14 @@ pub fn DashboardPage() -> Element { news_session_id.set(None); - let oll_url = ollama_url.read().clone(); - let mdl = ollama_model.read().clone(); + let ll_url = litellm_url.read().clone(); + let mdl = litellm_model.read().clone(); spawn(async move { is_summarizing.set(true); match crate::infrastructure::llm::summarize_article( snippet.clone(), article_url, - oll_url, + ll_url, mdl, ) .await @@ -373,8 +373,8 @@ pub fn DashboardPage() -> Element { chat_messages: chat_messages.read().clone(), is_chatting: *is_chatting.read(), on_chat_send: move |question: String| { - let oll_url = ollama_url.read().clone(); - let mdl = ollama_model.read().clone(); + let ll_url = litellm_url.read().clone(); + let mdl = litellm_model.read().clone(); let ctx = article_context.read().clone(); // Capture article info for News session creation let card_title = selected_card @@ -394,7 +394,7 @@ pub fn DashboardPage() -> Element { content: question.clone(), }); - // Build full message history for Ollama + // Build full message history for LiteLLM let system_msg = format!( "You are a helpful assistant. The user is reading \ a news article. Use the following context to answer \ @@ -422,7 +422,7 @@ pub fn DashboardPage() -> Element { match create_chat_session( card_title, "News".to_string(), - "ollama".to_string(), + "litellm".to_string(), mdl.clone(), card_url, ) @@ -458,7 +458,7 @@ pub fn DashboardPage() -> Element { } match crate::infrastructure::llm::chat_followup( - msgs, oll_url, mdl, + msgs, ll_url, mdl, ) .await { @@ -495,7 +495,7 @@ pub fn DashboardPage() -> Element { // Right: sidebar (when no card selected) if !has_selection { DashboardSidebar { - ollama_url: ollama_url.read().clone(), + litellm_url: litellm_url.read().clone(), trending: trending_topics.clone(), recent_searches: recent_searches.read().clone(), on_topic_click: move |topic: String| { diff --git a/src/pages/organization/dashboard.rs b/src/pages/organization/dashboard.rs index a0e369b..716c9a9 100644 --- a/src/pages/organization/dashboard.rs +++ b/src/pages/organization/dashboard.rs @@ -2,12 +2,14 @@ use dioxus::prelude::*; use crate::components::{MemberRow, PageHeader}; use crate::i18n::{t, tw, Locale}; -use crate::models::{BillingUsage, MemberRole, OrgMember}; +use crate::infrastructure::litellm::get_litellm_usage; +use crate::models::{BillingUsage, LitellmUsageStats, MemberRole, OrgMember}; /// Organization dashboard with billing stats, member table, and invite modal. /// -/// Shows current billing usage, a table of organization members -/// with role management, and a button to invite new members. +/// Shows current billing usage (fetched from LiteLLM), a per-model +/// breakdown table, a table of organization members with role +/// management, and a button to invite new members. #[component] pub fn OrgDashboardPage() -> Element { let locale = use_context::>(); @@ -20,6 +22,20 @@ pub fn OrgDashboardPage() -> Element { let members_list = members.read().clone(); + // Compute date range: 1st of current month to today + let (start_date, end_date) = current_month_range(); + + // Fetch real usage stats from LiteLLM via server function. + // use_resource memoises and won't re-fire on parent re-renders. + let usage_resource = use_resource(move || { + let start = start_date.clone(); + let end = end_date.clone(); + async move { get_litellm_usage(start, end).await } + }); + + // Clone out of Signal to avoid holding the borrow across rsx! + let usage_snapshot = usage_resource.read().clone(); + // Format token counts for display let tokens_display = format_tokens(usage.tokens_used); let tokens_limit_display = format_tokens(usage.tokens_limit); @@ -30,26 +46,39 @@ pub fn OrgDashboardPage() -> Element { title: t(l, "org.title"), subtitle: t(l, "org.subtitle"), actions: rsx! { - button { class: "btn-primary", onclick: move |_| show_invite.set(true), {t(l, "org.invite_member")} } + button { + class: "btn-primary", + onclick: move |_| show_invite.set(true), + {t(l, "org.invite_member")} + } }, } // Stats bar div { class: "org-stats-bar", div { class: "org-stat", - span { class: "org-stat-value", "{usage.seats_used}/{usage.seats_total}" } + span { class: "org-stat-value", + "{usage.seats_used}/{usage.seats_total}" + } span { class: "org-stat-label", {t(l, "org.seats_used")} } } div { class: "org-stat", span { class: "org-stat-value", "{tokens_display}" } - span { class: "org-stat-label", {tw(l, "org.of_tokens", &[("limit", &tokens_limit_display)])} } + span { class: "org-stat-label", + {tw(l, "org.of_tokens", &[("limit", &tokens_limit_display)])} + } } div { class: "org-stat", - span { class: "org-stat-value", "{usage.billing_cycle_end}" } + span { class: "org-stat-value", + "{usage.billing_cycle_end}" + } span { class: "org-stat-label", {t(l, "org.cycle_ends")} } } } + // LiteLLM usage stats section + {render_usage_section(l, &usage_snapshot)} + // Members table div { class: "org-table-wrapper", table { class: "org-table", @@ -114,6 +143,144 @@ pub fn OrgDashboardPage() -> Element { } } +/// Render the LiteLLM usage stats section: totals bar + per-model table. +/// +/// Shows a loading state while the resource is pending, an error/empty +/// message on failure, and the full breakdown on success. +fn render_usage_section( + l: Locale, + snapshot: &Option>, +) -> Element { + match snapshot { + None => rsx! { + div { class: "org-usage-loading", + span { {t(l, "org.loading_usage")} } + } + }, + Some(Err(_)) => rsx! { + div { class: "org-usage-unavailable", + span { {t(l, "org.usage_unavailable")} } + } + }, + Some(Ok(stats)) if stats.total_tokens == 0 && stats.model_breakdown.is_empty() => { + rsx! { + div { class: "org-usage-unavailable", + span { {t(l, "org.usage_unavailable")} } + } + } + } + Some(Ok(stats)) => { + let spend_display = format!("${:.2}", stats.total_spend); + let total_display = format_tokens(stats.total_tokens); + // Free-tier LiteLLM doesn't provide prompt/completion split + let has_token_split = + stats.total_prompt_tokens > 0 || stats.total_completion_tokens > 0; + + rsx! { + // Usage totals bar + div { class: "org-stats-bar", + div { class: "org-stat", + span { class: "org-stat-value", "{spend_display}" } + span { class: "org-stat-label", + {t(l, "org.total_spend")} + } + } + div { class: "org-stat", + span { class: "org-stat-value", + "{total_display}" + } + span { class: "org-stat-label", + {t(l, "org.total_tokens")} + } + } + // Only show prompt/completion split when available + if has_token_split { + div { class: "org-stat", + span { class: "org-stat-value", + {format_tokens(stats.total_prompt_tokens)} + } + span { class: "org-stat-label", + {t(l, "org.prompt_tokens")} + } + } + div { class: "org-stat", + span { class: "org-stat-value", + {format_tokens(stats.total_completion_tokens)} + } + span { class: "org-stat-label", + {t(l, "org.completion_tokens")} + } + } + } + } + + // Per-model breakdown table + if !stats.model_breakdown.is_empty() { + h3 { class: "org-section-title", + {t(l, "org.model_usage")} + } + div { class: "org-table-wrapper", + table { class: "org-table", + thead { + tr { + th { {t(l, "org.model")} } + th { {t(l, "org.tokens")} } + th { {t(l, "org.spend")} } + } + } + tbody { + for model in &stats.model_breakdown { + tr { key: "{model.model}", + td { "{model.model}" } + td { + {format_tokens(model.total_tokens)} + } + td { + {format!( + "${:.2}", model.spend + )} + } + } + } + } + } + } + } + } + } + } +} + +/// Compute the date range for the current billing month. +/// +/// Returns `(start_date, end_date)` as `YYYY-MM-DD` strings where +/// start_date is the 1st of the current month and end_date is today. +/// +/// On the web target this uses `js_sys::Date` to read the browser clock. +/// On the server target (SSR) it falls back to `chrono::Utc::now()`. +fn current_month_range() -> (String, String) { + #[cfg(feature = "web")] + { + // js_sys::Date accesses the browser's local clock in WASM. + let now = js_sys::Date::new_0(); + let year = now.get_full_year(); + // JS months are 0-indexed, so add 1 for calendar month + let month = now.get_month() + 1; + let day = now.get_date(); + let start = format!("{year:04}-{month:02}-01"); + let end = format!("{year:04}-{month:02}-{day:02}"); + (start, end) + } + #[cfg(not(feature = "web"))] + { + use chrono::Datelike; + let today = chrono::Utc::now().date_naive(); + let start = format!("{:04}-{:02}-01", today.year(), today.month()); + let end = today.format("%Y-%m-%d").to_string(); + (start, end) + } +} + /// Formats a token count into a human-readable string (e.g. "1.2M"). fn format_tokens(count: u64) -> String { const M: u64 = 1_000_000; diff --git a/src/pages/providers.rs b/src/pages/providers.rs index 9a6e039..b13fa96 100644 --- a/src/pages/providers.rs +++ b/src/pages/providers.rs @@ -13,8 +13,8 @@ pub fn ProvidersPage() -> Element { let locale = use_context::>(); let l = *locale.read(); - let mut selected_provider = use_signal(|| LlmProvider::Ollama); - let mut selected_model = use_signal(|| "llama3.1:8b".to_string()); + let mut selected_provider = use_signal(|| LlmProvider::LiteLlm); + let mut selected_model = use_signal(|| "qwen3-32b".to_string()); let mut selected_embedding = use_signal(|| "nomic-embed-text".to_string()); let mut api_key = use_signal(String::new); let mut saved = use_signal(|| false); @@ -59,12 +59,12 @@ pub fn ProvidersPage() -> Element { "Hugging Face" => LlmProvider::HuggingFace, "OpenAI" => LlmProvider::OpenAi, "Anthropic" => LlmProvider::Anthropic, - _ => LlmProvider::Ollama, + _ => LlmProvider::LiteLlm, }; selected_provider.set(prov); saved.set(false); }, - option { value: "Ollama", "Ollama" } + option { value: "LiteLLM", "LiteLLM" } option { value: "Hugging Face", "Hugging Face" } option { value: "OpenAI", "OpenAI" } option { value: "Anthropic", "Anthropic" } @@ -156,23 +156,29 @@ pub fn ProvidersPage() -> Element { fn mock_models() -> Vec { vec![ ModelEntry { - id: "llama3.1:8b".into(), - name: "Llama 3.1 8B".into(), - provider: LlmProvider::Ollama, - context_window: 128, - }, - ModelEntry { - id: "llama3.1:70b".into(), - name: "Llama 3.1 70B".into(), - provider: LlmProvider::Ollama, - context_window: 128, - }, - ModelEntry { - id: "mistral:7b".into(), - name: "Mistral 7B".into(), - provider: LlmProvider::Ollama, + id: "qwen3-32b".into(), + name: "Qwen3 32B".into(), + provider: LlmProvider::LiteLlm, context_window: 32, }, + ModelEntry { + id: "llama-3.3-70b".into(), + name: "Llama 3.3 70B".into(), + provider: LlmProvider::LiteLlm, + context_window: 128, + }, + ModelEntry { + id: "mistral-small-24b".into(), + name: "Mistral Small 24B".into(), + provider: LlmProvider::LiteLlm, + context_window: 32, + }, + ModelEntry { + id: "deepseek-r1-70b".into(), + name: "DeepSeek R1 70B".into(), + provider: LlmProvider::LiteLlm, + context_window: 64, + }, ModelEntry { id: "meta-llama/Llama-3.1-8B".into(), name: "Llama 3.1 8B".into(), @@ -200,7 +206,7 @@ fn mock_embeddings() -> Vec { EmbeddingEntry { id: "nomic-embed-text".into(), name: "Nomic Embed Text".into(), - provider: LlmProvider::Ollama, + provider: LlmProvider::LiteLlm, dimensions: 768, }, EmbeddingEntry {