feat(dashboard): proactively refresh expired Keycloak tokens

The dashboard stored a refresh_token in the session at login (auth.rs) but never used it. Once the access_token's 5-minute lifespan ran out, every subsequent agent call failed with 401 ExpiredSignature. The UI showed "unable to load X" until the user logged out and back in. Fix: before attaching the bearer, decode the JWT's `exp` claim and proactively refresh via the stored refresh_token if the token is expired or within REFRESH_SKEW_SECS (30s) of expiry. Updates the session with the new access_token (and rotated refresh_token if KC sends one). Refresh failures fall through with the stale token so the agent's 401 surfaces to the UI rather than failing the request at the dashboard layer. Why "proactive" instead of "retry on 401" - Saves a wasted round-trip on every agent call once the token has aged past 5 min. - Doesn't require cloning RequestBuilder bodies for retry. - Same end state — fresh token reaches the agent. Test plan - cargo test -p compliance-dashboard --features server --no-default-features infrastructure::agent_client::tests — 5 pass: * expired JWT → refresh * near-expiry within skew window → refresh * fresh JWT → no refresh * malformed/empty JWT → refresh (defensive) * JWT without exp claim → refresh (defensive) - Manual after deploy: dashboard works past the 5-min token lifespan without manual re-login. Note - The refresh code addresses the ExpiredSignature failure mode. The separate "JWT is missing tenant_id claim" 401 is a Keycloak realm config issue (the user logging in lacks the M7.1 attributes that the protocol mappers consume) and is fixed by realm/attribute config, not by this PR. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-17 21:38:06 +02:00
8 changed files with 5 additions and 167 deletions
@@ -1,115 +0,0 @@
 //! Cross-tenant admin endpoints (`/api/v1/admin/*`).
 //!
 //! Operator-only. Auth is a **static bearer token** (`ADMIN_API_TOKEN`
 //! env on the agent) — explicitly NOT a Keycloak JWT, because the
 //! whole point of these endpoints is to operate ACROSS tenants. A
 //! customer JWT (which always carries a single tenant_id) has no
 //! business mounting them.
 //!
 //! Routes are only registered when `ADMIN_API_TOKEN` is set. With no
 //! token, the endpoints don't exist at all (404), which is a stronger
 //! guarantee than "401 if you guess the path".
 //!
 //! Operations:
 //! - `GET    /api/v1/admin/tenants`              — list tenant DBs
 //! - `DELETE /api/v1/admin/tenants/{tenant_id}`  — GDPR delete
 //!
 //! Tenant ids in URLs are passed as-is to `DatabasePool::drop_tenant`,
 //! which sanitises them the same way it does for creation. Listing
 //! returns the raw DB names from `list_tenant_db_names` — operators
 //! can reverse-derive the tenant_id from the prefix.
 use axum::extract::{Extension, Path, Request};
 use axum::http::{header, StatusCode};
 use axum::middleware::Next;
 use axum::response::{IntoResponse, Response};
 use axum::Json;
 use secrecy::ExposeSecret;
 use serde::Serialize;
 use super::dto::AgentExt;
 #[derive(Serialize)]
 pub struct ListTenantDbsResponse {
    pub tenant_db_names: Vec<String>,
 }
 #[tracing::instrument(skip_all)]
 pub async fn list_tenant_dbs(
    Extension(agent): AgentExt,
 ) -> Result<Json<ListTenantDbsResponse>, StatusCode> {
    let names = agent.db_pool.list_tenant_db_names().await.map_err(|e| {
        tracing::error!("admin: list_tenant_db_names failed: {e}");
        StatusCode::INTERNAL_SERVER_ERROR
    })?;
    Ok(Json(ListTenantDbsResponse {
        tenant_db_names: names,
    }))
 }
 #[tracing::instrument(skip_all, fields(tenant_id = %tenant_id))]
 pub async fn drop_tenant_db(
    Extension(agent): AgentExt,
    Path(tenant_id): Path<String>,
 ) -> Result<Json<serde_json::Value>, StatusCode> {
    agent.db_pool.drop_tenant(&tenant_id).await.map_err(|e| {
        tracing::error!("admin: drop_tenant failed: {e}");
        StatusCode::INTERNAL_SERVER_ERROR
    })?;
    Ok(Json(serde_json::json!({ "status": "dropped" })))
 }
 /// Constant-time-ish comparison of the configured admin token against
 /// the incoming bearer. Uses `subtle`-style byte equality so timing
 /// attacks can't probe the token character by character.
 fn tokens_eq(a: &str, b: &str) -> bool {
    if a.len() != b.len() {
        return false;
    }
    let mut diff = 0u8;
    for (x, y) in a.bytes().zip(b.bytes()) {
        diff |= x ^ y;
    }
    diff == 0
 }
 /// Middleware enforcing the static `ADMIN_API_TOKEN`. Mounted only on
 /// the admin sub-router, so this never runs on customer routes.
 pub async fn require_admin_token(
    Extension(agent): AgentExt,
    request: Request,
    next: Next,
 ) -> Response {
    let Some(expected) = agent.config.admin_api_token.as_ref() else {
        // Belt-and-braces — if the routes were somehow mounted without
        // a token configured, refuse rather than no-op-pass.
        return (StatusCode::NOT_FOUND, "admin disabled").into_response();
    };
    let presented = request
        .headers()
        .get(header::AUTHORIZATION)
        .and_then(|v| v.to_str().ok())
        .and_then(|s| s.strip_prefix("Bearer "))
        .map(|s| s.trim());
    let Some(presented) = presented.filter(|s| !s.is_empty()) else {
        return (StatusCode::UNAUTHORIZED, "Missing bearer token").into_response();
    };
    if !tokens_eq(presented, expected.expose_secret()) {
        return (StatusCode::UNAUTHORIZED, "Invalid admin token").into_response();
    }
    next.run(request).await
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn tokens_eq_basic() {
        assert!(tokens_eq("abc", "abc"));
        assert!(!tokens_eq("abc", "abd"));
        assert!(!tokens_eq("abc", "abcd"));
        assert!(!tokens_eq("", "x"));
        assert!(tokens_eq("", ""));
    }
 }
@@ -1,4 +1,3 @@
 pub mod admin;
 pub mod chat;
 pub mod dast;
 pub mod dto;
@@ -4,8 +4,7 @@ use axum::extract::Request;
 use axum::http::HeaderValue;
 use axum::middleware::Next;
 use axum::response::Response;
-use axum::routing::{delete, get};
+use axum::{middleware, Extension};
 use axum::{middleware, Extension, Router};
 use tokio::sync::RwLock;
 use tower_http::cors::CorsLayer;
 use tower_http::set_header::SetResponseHeaderLayer;
@@ -15,7 +14,6 @@ use compliance_core::auth::{require_jwt_auth, require_tenant_status, JwksState};
 use compliance_core::{TenantContext, TenantStatus};
 use crate::agent::ComplianceAgent;
 use crate::api::handlers;
 use crate::api::routes;
 use crate::error::AgentError;
@@ -52,28 +50,7 @@ pub async fn inject_dev_tenant(mut request: Request, next: Next) -> Response {
 }
 pub async fn start_api_server(agent: ComplianceAgent, port: u16) -> Result<(), AgentError> {
    // Admin sub-router. Routes are only mounted when ADMIN_API_TOKEN is
    // configured — without it, the paths don't exist at all (404 rather
    // than 401), so an operator who hasn't opted in can't fingerprint
    // the surface area.
    let admin_router: Router = if agent.config.admin_api_token.is_some() {
        tracing::info!("Admin API enabled — /api/v1/admin/* mounted behind ADMIN_API_TOKEN bearer");
        Router::new()
            .route(
                "/api/v1/admin/tenants",
                get(handlers::admin::list_tenant_dbs),
            )
            .route(
                "/api/v1/admin/tenants/{tenant_id}",
                delete(handlers::admin::drop_tenant_db),
            )
            .layer(middleware::from_fn(handlers::admin::require_admin_token))
    } else {
        Router::new()
    };
    let mut app = routes::build_router()
        .merge(admin_router)
        .layer(Extension(Arc::new(agent.clone())))
        .layer(CorsLayer::permissive())
        .layer(TraceLayer::new_for_http())
@@ -59,7 +59,5 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
            .unwrap_or(true),
        pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
        pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
        admin_api_token: env_secret_opt("ADMIN_API_TOKEN"),
        tenant_registry_url: env_var_opt("TENANT_REGISTRY_URL"),
    })
 }
@@ -339,8 +339,6 @@ mod tests {
            pentest_imap_tls: true,
            pentest_imap_username: None,
            pentest_imap_password: None,
            admin_api_token: None,
            tenant_registry_url: None,
        }
    }
@@ -66,8 +66,6 @@ impl TestServer {
            pentest_imap_tls: false,
            pentest_imap_username: None,
            pentest_imap_password: None,
            admin_api_token: None,
            tenant_registry_url: None,
        };
        let agent = ComplianceAgent::new(config, db_pool);
@@ -63,24 +63,16 @@ struct Claims {
 const PUBLIC_ENDPOINTS: &[&str] = &["/api/v1/health"];
 /// Path prefixes that bypass JWT validation. The admin sub-router
 /// (`/api/v1/admin/*`) has its own static-bearer middleware and must
 /// not be routed through the customer-JWT path — a Keycloak token
 /// always carries a single tenant_id and would semantically conflict
 /// with cross-tenant admin operations.
 const PUBLIC_PREFIXES: &[&str] = &["/api/v1/admin/"];
 /// Middleware that validates Bearer JWT tokens against Keycloak's JWKS
 /// and attaches a `TenantContext` extension on success.
 ///
-/// Skips validation for the health endpoint and any path under one of
+/// Skips validation for the health endpoint.
-/// the [`PUBLIC_PREFIXES`]. If `JwksState` is not present (Keycloak
+/// If `JwksState` is not present (Keycloak not configured), requests
-/// not configured), requests pass through and downstream code must
+/// pass through and downstream code must handle the missing context.
 /// handle the missing context.
 pub async fn require_jwt_auth(mut request: Request, next: Next) -> Response {
    let path = request.uri().path();
-    if PUBLIC_ENDPOINTS.contains(&path) || PUBLIC_PREFIXES.iter().any(|p| path.starts_with(p)) {
+    if PUBLIC_ENDPOINTS.contains(&path) {
        return next.run(request).await;
    }
@@ -37,15 +37,6 @@ pub struct AgentConfig {
    pub pentest_imap_tls: bool,
    pub pentest_imap_username: Option<String>,
    pub pentest_imap_password: Option<SecretString>,
    /// Static bearer for the cross-tenant admin endpoints under
    /// `/api/v1/admin/*`. When `None`, those endpoints are not
    /// mounted at all (defense-in-depth: ops endpoints never reach
    /// any auth path if no operator has explicitly opted in).
    pub admin_api_token: Option<SecretString>,
    /// Live tenant-registry URL the scheduler consults for the list
    /// of tenants to iterate. When `None` or unreachable, scheduler
    /// falls back to `SCHEDULER_TENANT_IDS` env (M7.2-C).
    pub tenant_registry_url: Option<String>,
 }
 #[derive(Clone, Debug, Serialize, Deserialize)]