feat(chat): added chat interface and connection to ollama (#10)

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #10
2026-02-20 19:40:25 +00:00
parent 4acb4558b7
commit 50237f5377
28 changed files with 3148 additions and 196 deletions
--- a/src/infrastructure/chat.rs
+++ b/src/infrastructure/chat.rs
@@ -0,0 +1,507 @@
+//! Chat CRUD server functions for session and message persistence.
+//!
+//! Each function extracts the user's `sub` from the tower-sessions session
+//! to scope all queries to the authenticated user. The `ServerState` provides
+//! access to the MongoDB [`Database`](super::database::Database).
+
+use crate::models::{ChatMessage, ChatSession};
+use dioxus::prelude::*;
+
+/// Convert a raw BSON document to a `ChatSession`, extracting `_id` as a hex string.
+#[cfg(feature = "server")]
+pub(crate) fn doc_to_chat_session(doc: &mongodb::bson::Document) -> ChatSession {
+    use crate::models::ChatNamespace;
+
+    let id = doc
+        .get_object_id("_id")
+        .map(|oid| oid.to_hex())
+        .unwrap_or_default();
+    let namespace = match doc.get_str("namespace").unwrap_or("General") {
+        "News" => ChatNamespace::News,
+        _ => ChatNamespace::General,
+    };
+    let article_url = doc
+        .get_str("article_url")
+        .ok()
+        .map(String::from)
+        .filter(|s| !s.is_empty());
+
+    ChatSession {
+        id,
+        user_sub: doc.get_str("user_sub").unwrap_or_default().to_string(),
+        title: doc.get_str("title").unwrap_or_default().to_string(),
+        namespace,
+        provider: doc.get_str("provider").unwrap_or_default().to_string(),
+        model: doc.get_str("model").unwrap_or_default().to_string(),
+        created_at: doc.get_str("created_at").unwrap_or_default().to_string(),
+        updated_at: doc.get_str("updated_at").unwrap_or_default().to_string(),
+        article_url,
+    }
+}
+
+/// Convert a raw BSON document to a `ChatMessage`, extracting `_id` as a hex string.
+#[cfg(feature = "server")]
+pub(crate) fn doc_to_chat_message(doc: &mongodb::bson::Document) -> ChatMessage {
+    use crate::models::ChatRole;
+
+    let id = doc
+        .get_object_id("_id")
+        .map(|oid| oid.to_hex())
+        .unwrap_or_default();
+    let role = match doc.get_str("role").unwrap_or("User") {
+        "Assistant" => ChatRole::Assistant,
+        "System" => ChatRole::System,
+        _ => ChatRole::User,
+    };
+    ChatMessage {
+        id,
+        session_id: doc.get_str("session_id").unwrap_or_default().to_string(),
+        role,
+        content: doc.get_str("content").unwrap_or_default().to_string(),
+        attachments: Vec::new(),
+        timestamp: doc.get_str("timestamp").unwrap_or_default().to_string(),
+    }
+}
+
+/// Helper: extract the authenticated user's `sub` from the session.
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if the session is missing or unreadable.
+#[cfg(feature = "server")]
+async fn require_user_sub() -> Result<String, ServerFnError> {
+    use crate::infrastructure::auth::LOGGED_IN_USER_SESS_KEY;
+    use crate::infrastructure::state::UserStateInner;
+    use dioxus_fullstack::FullstackContext;
+
+    let session: tower_sessions::Session = FullstackContext::extract().await?;
+    let user: UserStateInner = session
+        .get(LOGGED_IN_USER_SESS_KEY)
+        .await
+        .map_err(|e| ServerFnError::new(format!("session read failed: {e}")))?
+        .ok_or_else(|| ServerFnError::new("not authenticated"))?;
+    Ok(user.sub)
+}
+
+/// Helper: extract the [`ServerState`] from the request context.
+#[cfg(feature = "server")]
+async fn require_state() -> Result<crate::infrastructure::ServerState, ServerFnError> {
+    dioxus_fullstack::FullstackContext::extract().await
+}
+
+/// List all chat sessions for the authenticated user, ordered by
+/// `updated_at` descending (most recent first).
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if authentication or the database query fails.
+#[server(endpoint = "list-chat-sessions")]
+pub async fn list_chat_sessions() -> Result<Vec<ChatSession>, ServerFnError> {
+    use mongodb::bson::doc;
+    use mongodb::options::FindOptions;
+
+    let user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    let opts = FindOptions::builder()
+        .sort(doc! { "updated_at": -1 })
+        .build();
+
+    let mut cursor = state
+        .db
+        .raw_collection("chat_sessions")
+        .find(doc! { "user_sub": &user_sub })
+        .with_options(opts)
+        .await
+        .map_err(|e| ServerFnError::new(format!("db error: {e}")))?;
+
+    let mut sessions = Vec::new();
+    use futures::TryStreamExt;
+    while let Some(raw_doc) = cursor
+        .try_next()
+        .await
+        .map_err(|e| ServerFnError::new(format!("cursor error: {e}")))?
+    {
+        sessions.push(doc_to_chat_session(&raw_doc));
+    }
+
+    Ok(sessions)
+}
+
+/// Create a new chat session and return it with the MongoDB-generated ID.
+///
+/// # Arguments
+///
+/// * `title` - Display title for the session
+/// * `namespace` - Namespace string: `"General"` or `"News"`
+/// * `provider` - LLM provider name (e.g. "ollama")
+/// * `model` - Model ID (e.g. "llama3.1:8b")
+/// * `article_url` - Source article URL (only for `News` namespace, empty if none)
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if authentication or the insert fails.
+#[server(endpoint = "create-chat-session")]
+pub async fn create_chat_session(
+    title: String,
+    namespace: String,
+    provider: String,
+    model: String,
+    article_url: String,
+) -> Result<ChatSession, ServerFnError> {
+    use crate::models::ChatNamespace;
+
+    let user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    let ns = if namespace == "News" {
+        ChatNamespace::News
+    } else {
+        ChatNamespace::General
+    };
+
+    let url = if article_url.is_empty() {
+        None
+    } else {
+        Some(article_url)
+    };
+
+    let now = chrono::Utc::now().to_rfc3339();
+    let session = ChatSession {
+        id: String::new(), // MongoDB will generate _id
+        user_sub,
+        title,
+        namespace: ns,
+        provider,
+        model,
+        created_at: now.clone(),
+        updated_at: now,
+        article_url: url,
+    };
+
+    let result = state
+        .db
+        .chat_sessions()
+        .insert_one(&session)
+        .await
+        .map_err(|e| ServerFnError::new(format!("insert failed: {e}")))?;
+
+    // Return the session with the generated ID
+    let id = result
+        .inserted_id
+        .as_object_id()
+        .map(|oid| oid.to_hex())
+        .unwrap_or_default();
+
+    Ok(ChatSession { id, ..session })
+}
+
+/// Rename a chat session.
+///
+/// # Arguments
+///
+/// * `session_id` - The MongoDB document ID of the session
+/// * `new_title` - The new title to set
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if authentication, the session is not found,
+/// or the update fails.
+#[server(endpoint = "rename-chat-session")]
+pub async fn rename_chat_session(
+    session_id: String,
+    new_title: String,
+) -> Result<(), ServerFnError> {
+    use mongodb::bson::{doc, oid::ObjectId};
+
+    let user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    let oid = ObjectId::parse_str(&session_id)
+        .map_err(|e| ServerFnError::new(format!("invalid session id: {e}")))?;
+
+    let result = state
+        .db
+        .chat_sessions()
+        .update_one(
+            doc! { "_id": oid, "user_sub": &user_sub },
+            doc! { "$set": { "title": &new_title, "updated_at": chrono::Utc::now().to_rfc3339() } },
+        )
+        .await
+        .map_err(|e| ServerFnError::new(format!("update failed: {e}")))?;
+
+    if result.matched_count == 0 {
+        return Err(ServerFnError::new("session not found or not owned by user"));
+    }
+
+    Ok(())
+}
+
+/// Delete a chat session and all its messages.
+///
+/// # Arguments
+///
+/// * `session_id` - The MongoDB document ID of the session
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if authentication or the delete fails.
+#[server(endpoint = "delete-chat-session")]
+pub async fn delete_chat_session(session_id: String) -> Result<(), ServerFnError> {
+    use mongodb::bson::{doc, oid::ObjectId};
+
+    let user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    let oid = ObjectId::parse_str(&session_id)
+        .map_err(|e| ServerFnError::new(format!("invalid session id: {e}")))?;
+
+    // Delete the session (scoped to user)
+    state
+        .db
+        .chat_sessions()
+        .delete_one(doc! { "_id": oid, "user_sub": &user_sub })
+        .await
+        .map_err(|e| ServerFnError::new(format!("delete session failed: {e}")))?;
+
+    // Delete all messages belonging to this session
+    state
+        .db
+        .chat_messages()
+        .delete_many(doc! { "session_id": &session_id })
+        .await
+        .map_err(|e| ServerFnError::new(format!("delete messages failed: {e}")))?;
+
+    Ok(())
+}
+
+/// Load all messages for a chat session, ordered by timestamp ascending.
+///
+/// # Arguments
+///
+/// * `session_id` - The MongoDB document ID of the session
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if authentication or the query fails.
+#[server(endpoint = "list-chat-messages")]
+pub async fn list_chat_messages(session_id: String) -> Result<Vec<ChatMessage>, ServerFnError> {
+    use mongodb::bson::doc;
+    use mongodb::options::FindOptions;
+
+    // Verify the user owns this session
+    let user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    // Verify the user owns this session using ObjectId for _id matching
+    use mongodb::bson::oid::ObjectId;
+    let session_oid = ObjectId::parse_str(&session_id)
+        .map_err(|e| ServerFnError::new(format!("invalid session id: {e}")))?;
+
+    let session_exists = state
+        .db
+        .raw_collection("chat_sessions")
+        .count_documents(doc! { "_id": session_oid, "user_sub": &user_sub })
+        .await
+        .map_err(|e| ServerFnError::new(format!("db error: {e}")))?;
+
+    if session_exists == 0 {
+        return Err(ServerFnError::new("session not found or not owned by user"));
+    }
+
+    let opts = FindOptions::builder().sort(doc! { "timestamp": 1 }).build();
+
+    let mut cursor = state
+        .db
+        .raw_collection("chat_messages")
+        .find(doc! { "session_id": &session_id })
+        .with_options(opts)
+        .await
+        .map_err(|e| ServerFnError::new(format!("db error: {e}")))?;
+
+    let mut messages = Vec::new();
+    use futures::TryStreamExt;
+    while let Some(raw_doc) = cursor
+        .try_next()
+        .await
+        .map_err(|e| ServerFnError::new(format!("cursor error: {e}")))?
+    {
+        messages.push(doc_to_chat_message(&raw_doc));
+    }
+
+    Ok(messages)
+}
+
+/// Persist a single chat message and return it with the MongoDB-generated ID.
+///
+/// Also updates the parent session's `updated_at` timestamp.
+///
+/// # Arguments
+///
+/// * `session_id` - The session this message belongs to
+/// * `role` - Message role string: `"user"`, `"assistant"`, or `"system"`
+/// * `content` - Message text content
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if authentication or the insert fails.
+#[server(endpoint = "save-chat-message")]
+pub async fn save_chat_message(
+    session_id: String,
+    role: String,
+    content: String,
+) -> Result<ChatMessage, ServerFnError> {
+    use crate::models::ChatRole;
+    use mongodb::bson::{doc, oid::ObjectId};
+
+    let _user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    let chat_role = match role.as_str() {
+        "assistant" => ChatRole::Assistant,
+        "system" => ChatRole::System,
+        _ => ChatRole::User,
+    };
+
+    let now = chrono::Utc::now().to_rfc3339();
+    let message = ChatMessage {
+        id: String::new(),
+        session_id: session_id.clone(),
+        role: chat_role,
+        content,
+        attachments: Vec::new(),
+        timestamp: now.clone(),
+    };
+
+    let result = state
+        .db
+        .chat_messages()
+        .insert_one(&message)
+        .await
+        .map_err(|e| ServerFnError::new(format!("insert failed: {e}")))?;
+
+    let id = result
+        .inserted_id
+        .as_object_id()
+        .map(|oid| oid.to_hex())
+        .unwrap_or_default();
+
+    // Update session's updated_at timestamp
+    if let Ok(session_oid) = ObjectId::parse_str(&session_id) {
+        let _ = state
+            .db
+            .chat_sessions()
+            .update_one(
+                doc! { "_id": session_oid },
+                doc! { "$set": { "updated_at": &now } },
+            )
+            .await;
+    }
+
+    Ok(ChatMessage { id, ..message })
+}
+
+/// Non-streaming chat completion (fallback for article panel).
+///
+/// Sends the full conversation history to the configured LLM provider
+/// and returns the complete response. Used where SSE streaming is not
+/// needed (e.g. dashboard article follow-up panel).
+///
+/// # Arguments
+///
+/// * `session_id` - The chat session ID (loads provider/model config)
+/// * `messages_json` - Conversation history as JSON string:
+///   `[{"role":"user","content":"..."},...]`
+///
+/// # Errors
+///
+/// Returns `ServerFnError` if the LLM request fails.
+#[server(endpoint = "chat-complete")]
+pub async fn chat_complete(
+    session_id: String,
+    messages_json: String,
+) -> Result<String, ServerFnError> {
+    use mongodb::bson::{doc, oid::ObjectId};
+
+    let _user_sub = require_user_sub().await?;
+    let state = require_state().await?;
+
+    // Load the session to get provider/model
+    let session_oid = ObjectId::parse_str(&session_id)
+        .map_err(|e| ServerFnError::new(format!("invalid session id: {e}")))?;
+
+    let session_doc = state
+        .db
+        .raw_collection("chat_sessions")
+        .find_one(doc! { "_id": session_oid })
+        .await
+        .map_err(|e| ServerFnError::new(format!("db error: {e}")))?
+        .ok_or_else(|| ServerFnError::new("session not found"))?;
+    let session = doc_to_chat_session(&session_doc);
+
+    // Resolve provider URL and model
+    let (base_url, model) = resolve_provider_url(&state, &session.provider, &session.model);
+
+    // Parse messages from JSON
+    let chat_msgs: Vec<serde_json::Value> = serde_json::from_str(&messages_json)
+        .map_err(|e| ServerFnError::new(format!("invalid messages JSON: {e}")))?;
+
+    let body = serde_json::json!({
+        "model": model,
+        "messages": chat_msgs,
+        "stream": false,
+    });
+
+    let client = reqwest::Client::new();
+    let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
+
+    let resp = client
+        .post(&url)
+        .header("content-type", "application/json")
+        .json(&body)
+        .send()
+        .await
+        .map_err(|e| ServerFnError::new(format!("LLM request failed: {e}")))?;
+
+    if !resp.status().is_success() {
+        let status = resp.status();
+        let text = resp.text().await.unwrap_or_default();
+        return Err(ServerFnError::new(format!("LLM returned {status}: {text}")));
+    }
+
+    let json: serde_json::Value = resp
+        .json()
+        .await
+        .map_err(|e| ServerFnError::new(format!("parse error: {e}")))?;
+
+    json["choices"][0]["message"]["content"]
+        .as_str()
+        .map(String::from)
+        .ok_or_else(|| ServerFnError::new("empty LLM response"))
+}
+
+/// Resolve the base URL for a provider, falling back to server defaults.
+#[cfg(feature = "server")]
+fn resolve_provider_url(
+    state: &crate::infrastructure::ServerState,
+    provider: &str,
+    model: &str,
+) -> (String, String) {
+    match provider {
+        "openai" => ("https://api.openai.com".to_string(), model.to_string()),
+        "anthropic" => ("https://api.anthropic.com".to_string(), model.to_string()),
+        "huggingface" => (
+            format!("https://api-inference.huggingface.co/models/{}", model),
+            model.to_string(),
+        ),
+        // Default to Ollama
+        _ => (
+            state.services.ollama_url.clone(),
+            if model.is_empty() {
+                state.services.ollama_model.clone()
+            } else {
+                model.to_string()
+            },
+        ),
+    }
+}
--- a/src/infrastructure/chat_stream.rs
+++ b/src/infrastructure/chat_stream.rs
@@ -0,0 +1,266 @@
+//! SSE streaming endpoint for chat completions.
+//!
+//! Exposes `GET /api/chat/stream?session_id=<id>` which:
+//! 1. Authenticates the user via tower-sessions
+//! 2. Loads the session and its messages from MongoDB
+//! 3. Streams LLM tokens as SSE events to the frontend
+//! 4. Persists the complete assistant message on finish
+
+use axum::{
+    extract::Query,
+    response::{
+        sse::{Event, KeepAlive, Sse},
+        IntoResponse, Response,
+    },
+    Extension,
+};
+use futures::stream::Stream;
+use reqwest::StatusCode;
+use serde::Deserialize;
+use tower_sessions::Session;
+
+use super::{
+    auth::LOGGED_IN_USER_SESS_KEY,
+    chat::{doc_to_chat_message, doc_to_chat_session},
+    provider_client::{send_chat_request, ProviderMessage},
+    server_state::ServerState,
+    state::UserStateInner,
+};
+use crate::models::{ChatMessage, ChatRole};
+
+/// Query parameters for the SSE stream endpoint.
+#[derive(Deserialize)]
+pub struct StreamQuery {
+    session_id: String,
+}
+
+/// SSE streaming handler for chat completions.
+///
+/// Reads the session's provider/model config, loads conversation history,
+/// sends to the LLM with `stream: true`, and forwards tokens as SSE events.
+///
+/// # SSE Event Format
+///
+/// - `data: {"token": "..."}` -- partial token
+/// - `data: {"done": true, "message_id": "..."}` -- stream complete
+/// - `data: {"error": "..."}` -- on failure
+pub async fn chat_stream_handler(
+    session: Session,
+    Extension(state): Extension<ServerState>,
+    Query(params): Query<StreamQuery>,
+) -> Response {
+    // Authenticate
+    let user_state: Option<UserStateInner> = match session.get(LOGGED_IN_USER_SESS_KEY).await {
+        Ok(u) => u,
+        Err(_) => return (StatusCode::UNAUTHORIZED, "session error").into_response(),
+    };
+    let user = match user_state {
+        Some(u) => u,
+        None => return (StatusCode::UNAUTHORIZED, "not authenticated").into_response(),
+    };
+
+    // Load session from MongoDB (raw document to handle ObjectId -> String)
+    let chat_session = {
+        use mongodb::bson::{doc, oid::ObjectId};
+        let oid = match ObjectId::parse_str(&params.session_id) {
+            Ok(o) => o,
+            Err(_) => return (StatusCode::BAD_REQUEST, "invalid session_id").into_response(),
+        };
+        match state
+            .db
+            .raw_collection("chat_sessions")
+            .find_one(doc! { "_id": oid, "user_sub": &user.sub })
+            .await
+        {
+            Ok(Some(doc)) => doc_to_chat_session(&doc),
+            Ok(None) => return (StatusCode::NOT_FOUND, "session not found").into_response(),
+            Err(e) => {
+                tracing::error!("db error loading session: {e}");
+                return (StatusCode::INTERNAL_SERVER_ERROR, "db error").into_response();
+            }
+        }
+    };
+
+    // Load messages (raw documents to handle ObjectId -> String)
+    let messages = {
+        use mongodb::bson::doc;
+        use mongodb::options::FindOptions;
+
+        let opts = FindOptions::builder().sort(doc! { "timestamp": 1 }).build();
+
+        match state
+            .db
+            .raw_collection("chat_messages")
+            .find(doc! { "session_id": &params.session_id })
+            .with_options(opts)
+            .await
+        {
+            Ok(mut cursor) => {
+                use futures::TryStreamExt;
+                let mut msgs = Vec::new();
+                while let Some(doc) = TryStreamExt::try_next(&mut cursor).await.unwrap_or(None) {
+                    msgs.push(doc_to_chat_message(&doc));
+                }
+                msgs
+            }
+            Err(e) => {
+                tracing::error!("db error loading messages: {e}");
+                return (StatusCode::INTERNAL_SERVER_ERROR, "db error").into_response();
+            }
+        }
+    };
+
+    // Convert to provider format
+    let provider_msgs: Vec<ProviderMessage> = messages
+        .iter()
+        .map(|m| ProviderMessage {
+            role: match m.role {
+                ChatRole::User => "user".to_string(),
+                ChatRole::Assistant => "assistant".to_string(),
+                ChatRole::System => "system".to_string(),
+            },
+            content: m.content.clone(),
+        })
+        .collect();
+
+    let provider = chat_session.provider.clone();
+    let model = chat_session.model.clone();
+    let session_id = params.session_id.clone();
+
+    // TODO: Load user's API key from preferences for non-Ollama providers.
+    // For now, Ollama (no key needed) is the default path.
+    let api_key: Option<String> = None;
+
+    // Send streaming request to LLM
+    let llm_resp = match send_chat_request(
+        &state,
+        &provider,
+        &model,
+        &provider_msgs,
+        api_key.as_deref(),
+        true,
+    )
+    .await
+    {
+        Ok(r) => r,
+        Err(e) => {
+            tracing::error!("LLM request failed: {e}");
+            return (StatusCode::BAD_GATEWAY, "LLM request failed").into_response();
+        }
+    };
+
+    if !llm_resp.status().is_success() {
+        let status = llm_resp.status();
+        let body = llm_resp.text().await.unwrap_or_default();
+        tracing::error!("LLM returned {status}: {body}");
+        return (StatusCode::BAD_GATEWAY, format!("LLM error: {status}")).into_response();
+    }
+
+    // Stream the response bytes as SSE events
+    let byte_stream = llm_resp.bytes_stream();
+    let state_clone = state.clone();
+
+    let sse_stream = build_sse_stream(byte_stream, state_clone, session_id, provider.clone());
+
+    Sse::new(sse_stream)
+        .keep_alive(KeepAlive::default())
+        .into_response()
+}
+
+/// Build an SSE stream that parses OpenAI-compatible streaming chunks
+/// and emits token events. On completion, persists the full message.
+fn build_sse_stream(
+    byte_stream: impl Stream<Item = Result<bytes::Bytes, reqwest::Error>> + Send + 'static,
+    state: ServerState,
+    session_id: String,
+    _provider: String,
+) -> impl Stream<Item = Result<Event, std::convert::Infallible>> + Send + 'static {
+    // Use an async stream to process chunks
+    async_stream::stream! {
+        use futures::StreamExt;
+
+        let mut full_content = String::new();
+        let mut buffer = String::new();
+
+        // Pin the byte stream for iteration
+        let mut stream = std::pin::pin!(byte_stream);
+
+        while let Some(chunk_result) = StreamExt::next(&mut stream).await {
+            let chunk = match chunk_result {
+                Ok(bytes) => bytes,
+                Err(e) => {
+                    let err_json = serde_json::json!({ "error": e.to_string() });
+                    yield Ok(Event::default().data(err_json.to_string()));
+                    break;
+                }
+            };
+
+            let text = String::from_utf8_lossy(&chunk);
+            buffer.push_str(&text);
+
+            // Process complete SSE lines from the buffer.
+            // OpenAI streaming format: `data: {...}\n\n`
+            while let Some(line_end) = buffer.find('\n') {
+                let line = buffer[..line_end].trim().to_string();
+                buffer = buffer[line_end + 1..].to_string();
+
+                if line.is_empty() || line == "data: [DONE]" {
+                    continue;
+                }
+
+                if let Some(json_str) = line.strip_prefix("data: ") {
+                    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(json_str) {
+                        // Extract token from OpenAI delta format
+                        if let Some(token) = parsed["choices"][0]["delta"]["content"].as_str() {
+                            full_content.push_str(token);
+                            let event_data = serde_json::json!({ "token": token });
+                            yield Ok(Event::default().data(event_data.to_string()));
+                        }
+                    }
+                }
+            }
+        }
+
+        // Persist the complete assistant message
+        if !full_content.is_empty() {
+            let now = chrono::Utc::now().to_rfc3339();
+            let message = ChatMessage {
+                id: String::new(),
+                session_id: session_id.clone(),
+                role: ChatRole::Assistant,
+                content: full_content,
+                attachments: Vec::new(),
+                timestamp: now.clone(),
+            };
+
+            let msg_id = match state.db.chat_messages().insert_one(&message).await {
+                Ok(result) => result
+                    .inserted_id
+                    .as_object_id()
+                    .map(|oid| oid.to_hex())
+                    .unwrap_or_default(),
+                Err(e) => {
+                    tracing::error!("failed to persist assistant message: {e}");
+                    String::new()
+                }
+            };
+
+            // Update session timestamp
+            if let Ok(session_oid) =
+                mongodb::bson::oid::ObjectId::parse_str(&session_id)
+            {
+                let _ = state
+                    .db
+                    .chat_sessions()
+                    .update_one(
+                        mongodb::bson::doc! { "_id": session_oid },
+                        mongodb::bson::doc! { "$set": { "updated_at": &now } },
+                    )
+                    .await;
+            }
+
+            let done_data = serde_json::json!({ "done": true, "message_id": msg_id });
+            yield Ok(Event::default().data(done_data.to_string()));
+        }
+    }
+}
--- a/src/infrastructure/database.rs
+++ b/src/infrastructure/database.rs
@@ -3,7 +3,7 @@
 use mongodb::{bson::doc, Client, Collection};

 use super::Error;
-use crate::models::{OrgBillingRecord, OrgSettings, UserPreferences};
+use crate::models::{ChatMessage, ChatSession, OrgBillingRecord, OrgSettings, UserPreferences};

 /// Thin wrapper around [`mongodb::Database`] that provides typed
 /// collection accessors for the application's domain models.
@@ -49,4 +49,20 @@ impl Database {
    pub fn org_billing(&self) -> Collection<OrgBillingRecord> {
        self.inner.collection("org_billing")
    }
+
+    /// Collection for persisted chat sessions (sidebar listing).
+    pub fn chat_sessions(&self) -> Collection<ChatSession> {
+        self.inner.collection("chat_sessions")
+    }
+
+    /// Collection for individual chat messages within sessions.
+    pub fn chat_messages(&self) -> Collection<ChatMessage> {
+        self.inner.collection("chat_messages")
+    }
+
+    /// Raw BSON document collection for queries that need manual
+    /// `_id` → `String` conversion (avoids `ObjectId` deserialization issues).
+    pub fn raw_collection(&self, name: &str) -> Collection<mongodb::bson::Document> {
+        self.inner.collection(name)
+    }
 }
--- a/src/infrastructure/mod.rs
+++ b/src/infrastructure/mod.rs
@@ -1,6 +1,7 @@
 // Server function modules (compiled for both web and server features;
 // the #[server] macro generates client stubs for the web target)
 pub mod auth_check;
+pub mod chat;
 pub mod llm;
 pub mod ollama;
 pub mod searxng;
@@ -11,12 +12,16 @@ mod auth;
 #[cfg(feature = "server")]
 mod auth_middleware;
 #[cfg(feature = "server")]
+mod chat_stream;
+#[cfg(feature = "server")]
 pub mod config;
 #[cfg(feature = "server")]
 pub mod database;
 #[cfg(feature = "server")]
 mod error;
 #[cfg(feature = "server")]
+pub mod provider_client;
+#[cfg(feature = "server")]
 mod server;
 #[cfg(feature = "server")]
 pub mod server_state;
@@ -28,6 +33,8 @@ pub use auth::*;
 #[cfg(feature = "server")]
 pub use auth_middleware::*;
 #[cfg(feature = "server")]
+pub use chat_stream::*;
+#[cfg(feature = "server")]
 pub use error::*;
 #[cfg(feature = "server")]
 pub use server::*;
--- a/src/infrastructure/provider_client.rs
+++ b/src/infrastructure/provider_client.rs
@@ -0,0 +1,148 @@
+//! Unified LLM provider dispatch.
+//!
+//! Routes chat completion requests to Ollama, OpenAI, Anthropic, or
+//! HuggingFace based on the session's provider setting. All providers
+//! except Anthropic use the OpenAI-compatible chat completions format.
+
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+
+use super::server_state::ServerState;
+
+/// OpenAI-compatible chat message used for request bodies.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProviderMessage {
+    pub role: String,
+    pub content: String,
+}
+
+/// Send a chat completion request to the configured provider.
+///
+/// # Arguments
+///
+/// * `state` - Server state (for default Ollama URL/model)
+/// * `provider` - Provider name (`"ollama"`, `"openai"`, `"anthropic"`, `"huggingface"`)
+/// * `model` - Model ID
+/// * `messages` - Conversation history
+/// * `api_key` - API key (required for non-Ollama providers)
+/// * `stream` - Whether to request streaming
+///
+/// # Returns
+///
+/// The raw `reqwest::Response` for the caller to consume (streaming or not).
+///
+/// # Errors
+///
+/// Returns an error if the HTTP request fails.
+pub async fn send_chat_request(
+    state: &ServerState,
+    provider: &str,
+    model: &str,
+    messages: &[ProviderMessage],
+    api_key: Option<&str>,
+    stream: bool,
+) -> Result<reqwest::Response, reqwest::Error> {
+    let client = Client::new();
+
+    match provider {
+        "openai" => {
+            let body = serde_json::json!({
+                "model": model,
+                "messages": messages,
+                "stream": stream,
+            });
+            client
+                .post("https://api.openai.com/v1/chat/completions")
+                .header("content-type", "application/json")
+                .header(
+                    "Authorization",
+                    format!("Bearer {}", api_key.unwrap_or_default()),
+                )
+                .json(&body)
+                .send()
+                .await
+        }
+        "anthropic" => {
+            // Anthropic uses a different API format -- translate.
+            // Extract system message separately, convert roles.
+            let system_msg: String = messages
+                .iter()
+                .filter(|m| m.role == "system")
+                .map(|m| m.content.clone())
+                .collect::<Vec<_>>()
+                .join("\n");
+
+            let anthropic_msgs: Vec<serde_json::Value> = messages
+                .iter()
+                .filter(|m| m.role != "system")
+                .map(|m| {
+                    serde_json::json!({
+                        "role": m.role,
+                        "content": m.content,
+                    })
+                })
+                .collect();
+
+            let mut body = serde_json::json!({
+                "model": model,
+                "messages": anthropic_msgs,
+                "max_tokens": 4096,
+                "stream": stream,
+            });
+            if !system_msg.is_empty() {
+                body["system"] = serde_json::Value::String(system_msg);
+            }
+
+            client
+                .post("https://api.anthropic.com/v1/messages")
+                .header("content-type", "application/json")
+                .header("x-api-key", api_key.unwrap_or_default())
+                .header("anthropic-version", "2023-06-01")
+                .json(&body)
+                .send()
+                .await
+        }
+        "huggingface" => {
+            let url = format!(
+                "https://api-inference.huggingface.co/models/{}/v1/chat/completions",
+                model
+            );
+            let body = serde_json::json!({
+                "model": model,
+                "messages": messages,
+                "stream": stream,
+            });
+            client
+                .post(&url)
+                .header("content-type", "application/json")
+                .header(
+                    "Authorization",
+                    format!("Bearer {}", api_key.unwrap_or_default()),
+                )
+                .json(&body)
+                .send()
+                .await
+        }
+        // Default: Ollama (OpenAI-compatible endpoint)
+        _ => {
+            let base_url = &state.services.ollama_url;
+            let resolved_model = if model.is_empty() {
+                &state.services.ollama_model
+            } else {
+                model
+            };
+            let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
+            let body = serde_json::json!({
+                "model": resolved_model,
+                "messages": messages,
+                "stream": stream,
+            });
+            client
+                .post(&url)
+                .header("content-type", "application/json")
+                .json(&body)
+                .send()
+                .await
+        }
+    }
+}
--- a/src/infrastructure/server.rs
+++ b/src/infrastructure/server.rs
@@ -6,7 +6,7 @@ use time::Duration;
 use tower_sessions::{cookie::Key, MemoryStore, SessionManagerLayer};

 use crate::infrastructure::{
-    auth_callback, auth_login,
+    auth_callback, auth_login, chat_stream_handler,
    config::{KeycloakConfig, LlmProvidersConfig, ServiceUrls, SmtpConfig, StripeConfig},
    database::Database,
    logout, require_auth,
@@ -82,6 +82,7 @@ pub fn server_start(app: fn() -> Element) -> Result<(), super::Error> {
            .route("/auth", get(auth_login))
            .route("/auth/callback", get(auth_callback))
            .route("/logout", get(logout))
+            .route("/api/chat/stream", get(chat_stream_handler))
            .serve_dioxus_application(ServeConfig::new(), app)
            .layer(Extension(PendingOAuthStore::default()))
            .layer(Extension(server_state))