Compare commits

..

1 Commits

Author SHA1 Message Date
Sharang Parnerkar bec47f8c7d feat(dashboard): proactively refresh expired Keycloak tokens
CI / Check (pull_request) Successful in 8m7s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
The dashboard stored a refresh_token in the session at login (auth.rs)
but never used it. Once the access_token's 5-minute lifespan ran out,
every subsequent agent call failed with 401 ExpiredSignature. The UI
showed "unable to load X" until the user logged out and back in.

Fix: before attaching the bearer, decode the JWT's `exp` claim and
proactively refresh via the stored refresh_token if the token is
expired or within REFRESH_SKEW_SECS (30s) of expiry. Updates the
session with the new access_token (and rotated refresh_token if KC
sends one). Refresh failures fall through with the stale token so the
agent's 401 surfaces to the UI rather than failing the request at the
dashboard layer.

Why "proactive" instead of "retry on 401"
- Saves a wasted round-trip on every agent call once the token has
  aged past 5 min.
- Doesn't require cloning RequestBuilder bodies for retry.
- Same end state — fresh token reaches the agent.

Test plan
- cargo test -p compliance-dashboard --features server
  --no-default-features infrastructure::agent_client::tests — 5 pass:
    * expired JWT → refresh
    * near-expiry within skew window → refresh
    * fresh JWT → no refresh
    * malformed/empty JWT → refresh (defensive)
    * JWT without exp claim → refresh (defensive)
- Manual after deploy: dashboard works past the 5-min token lifespan
  without manual re-login.

Note
- The refresh code addresses the ExpiredSignature failure mode. The
  separate "JWT is missing tenant_id claim" 401 is a Keycloak realm
  config issue (the user logging in lacks the M7.1 attributes that
  the protocol mappers consume) and is fixed by realm/attribute
  config, not by this PR.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-17 21:38:06 +02:00
8 changed files with 5 additions and 167 deletions
-115
View File
@@ -1,115 +0,0 @@
//! Cross-tenant admin endpoints (`/api/v1/admin/*`).
//!
//! Operator-only. Auth is a **static bearer token** (`ADMIN_API_TOKEN`
//! env on the agent) — explicitly NOT a Keycloak JWT, because the
//! whole point of these endpoints is to operate ACROSS tenants. A
//! customer JWT (which always carries a single tenant_id) has no
//! business mounting them.
//!
//! Routes are only registered when `ADMIN_API_TOKEN` is set. With no
//! token, the endpoints don't exist at all (404), which is a stronger
//! guarantee than "401 if you guess the path".
//!
//! Operations:
//! - `GET /api/v1/admin/tenants` — list tenant DBs
//! - `DELETE /api/v1/admin/tenants/{tenant_id}` — GDPR delete
//!
//! Tenant ids in URLs are passed as-is to `DatabasePool::drop_tenant`,
//! which sanitises them the same way it does for creation. Listing
//! returns the raw DB names from `list_tenant_db_names` — operators
//! can reverse-derive the tenant_id from the prefix.
use axum::extract::{Extension, Path, Request};
use axum::http::{header, StatusCode};
use axum::middleware::Next;
use axum::response::{IntoResponse, Response};
use axum::Json;
use secrecy::ExposeSecret;
use serde::Serialize;
use super::dto::AgentExt;
#[derive(Serialize)]
pub struct ListTenantDbsResponse {
pub tenant_db_names: Vec<String>,
}
#[tracing::instrument(skip_all)]
pub async fn list_tenant_dbs(
Extension(agent): AgentExt,
) -> Result<Json<ListTenantDbsResponse>, StatusCode> {
let names = agent.db_pool.list_tenant_db_names().await.map_err(|e| {
tracing::error!("admin: list_tenant_db_names failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(Json(ListTenantDbsResponse {
tenant_db_names: names,
}))
}
#[tracing::instrument(skip_all, fields(tenant_id = %tenant_id))]
pub async fn drop_tenant_db(
Extension(agent): AgentExt,
Path(tenant_id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
agent.db_pool.drop_tenant(&tenant_id).await.map_err(|e| {
tracing::error!("admin: drop_tenant failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(Json(serde_json::json!({ "status": "dropped" })))
}
/// Constant-time-ish comparison of the configured admin token against
/// the incoming bearer. Uses `subtle`-style byte equality so timing
/// attacks can't probe the token character by character.
fn tokens_eq(a: &str, b: &str) -> bool {
if a.len() != b.len() {
return false;
}
let mut diff = 0u8;
for (x, y) in a.bytes().zip(b.bytes()) {
diff |= x ^ y;
}
diff == 0
}
/// Middleware enforcing the static `ADMIN_API_TOKEN`. Mounted only on
/// the admin sub-router, so this never runs on customer routes.
pub async fn require_admin_token(
Extension(agent): AgentExt,
request: Request,
next: Next,
) -> Response {
let Some(expected) = agent.config.admin_api_token.as_ref() else {
// Belt-and-braces — if the routes were somehow mounted without
// a token configured, refuse rather than no-op-pass.
return (StatusCode::NOT_FOUND, "admin disabled").into_response();
};
let presented = request
.headers()
.get(header::AUTHORIZATION)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.strip_prefix("Bearer "))
.map(|s| s.trim());
let Some(presented) = presented.filter(|s| !s.is_empty()) else {
return (StatusCode::UNAUTHORIZED, "Missing bearer token").into_response();
};
if !tokens_eq(presented, expected.expose_secret()) {
return (StatusCode::UNAUTHORIZED, "Invalid admin token").into_response();
}
next.run(request).await
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tokens_eq_basic() {
assert!(tokens_eq("abc", "abc"));
assert!(!tokens_eq("abc", "abd"));
assert!(!tokens_eq("abc", "abcd"));
assert!(!tokens_eq("", "x"));
assert!(tokens_eq("", ""));
}
}
-1
View File
@@ -1,4 +1,3 @@
pub mod admin;
pub mod chat; pub mod chat;
pub mod dast; pub mod dast;
pub mod dto; pub mod dto;
+1 -24
View File
@@ -4,8 +4,7 @@ use axum::extract::Request;
use axum::http::HeaderValue; use axum::http::HeaderValue;
use axum::middleware::Next; use axum::middleware::Next;
use axum::response::Response; use axum::response::Response;
use axum::routing::{delete, get}; use axum::{middleware, Extension};
use axum::{middleware, Extension, Router};
use tokio::sync::RwLock; use tokio::sync::RwLock;
use tower_http::cors::CorsLayer; use tower_http::cors::CorsLayer;
use tower_http::set_header::SetResponseHeaderLayer; use tower_http::set_header::SetResponseHeaderLayer;
@@ -15,7 +14,6 @@ use compliance_core::auth::{require_jwt_auth, require_tenant_status, JwksState};
use compliance_core::{TenantContext, TenantStatus}; use compliance_core::{TenantContext, TenantStatus};
use crate::agent::ComplianceAgent; use crate::agent::ComplianceAgent;
use crate::api::handlers;
use crate::api::routes; use crate::api::routes;
use crate::error::AgentError; use crate::error::AgentError;
@@ -52,28 +50,7 @@ pub async fn inject_dev_tenant(mut request: Request, next: Next) -> Response {
} }
pub async fn start_api_server(agent: ComplianceAgent, port: u16) -> Result<(), AgentError> { pub async fn start_api_server(agent: ComplianceAgent, port: u16) -> Result<(), AgentError> {
// Admin sub-router. Routes are only mounted when ADMIN_API_TOKEN is
// configured — without it, the paths don't exist at all (404 rather
// than 401), so an operator who hasn't opted in can't fingerprint
// the surface area.
let admin_router: Router = if agent.config.admin_api_token.is_some() {
tracing::info!("Admin API enabled — /api/v1/admin/* mounted behind ADMIN_API_TOKEN bearer");
Router::new()
.route(
"/api/v1/admin/tenants",
get(handlers::admin::list_tenant_dbs),
)
.route(
"/api/v1/admin/tenants/{tenant_id}",
delete(handlers::admin::drop_tenant_db),
)
.layer(middleware::from_fn(handlers::admin::require_admin_token))
} else {
Router::new()
};
let mut app = routes::build_router() let mut app = routes::build_router()
.merge(admin_router)
.layer(Extension(Arc::new(agent.clone()))) .layer(Extension(Arc::new(agent.clone())))
.layer(CorsLayer::permissive()) .layer(CorsLayer::permissive())
.layer(TraceLayer::new_for_http()) .layer(TraceLayer::new_for_http())
-2
View File
@@ -59,7 +59,5 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
.unwrap_or(true), .unwrap_or(true),
pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"), pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"), pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
admin_api_token: env_secret_opt("ADMIN_API_TOKEN"),
tenant_registry_url: env_var_opt("TENANT_REGISTRY_URL"),
}) })
} }
-2
View File
@@ -339,8 +339,6 @@ mod tests {
pentest_imap_tls: true, pentest_imap_tls: true,
pentest_imap_username: None, pentest_imap_username: None,
pentest_imap_password: None, pentest_imap_password: None,
admin_api_token: None,
tenant_registry_url: None,
} }
} }
-2
View File
@@ -66,8 +66,6 @@ impl TestServer {
pentest_imap_tls: false, pentest_imap_tls: false,
pentest_imap_username: None, pentest_imap_username: None,
pentest_imap_password: None, pentest_imap_password: None,
admin_api_token: None,
tenant_registry_url: None,
}; };
let agent = ComplianceAgent::new(config, db_pool); let agent = ComplianceAgent::new(config, db_pool);
+4 -12
View File
@@ -63,24 +63,16 @@ struct Claims {
const PUBLIC_ENDPOINTS: &[&str] = &["/api/v1/health"]; const PUBLIC_ENDPOINTS: &[&str] = &["/api/v1/health"];
/// Path prefixes that bypass JWT validation. The admin sub-router
/// (`/api/v1/admin/*`) has its own static-bearer middleware and must
/// not be routed through the customer-JWT path — a Keycloak token
/// always carries a single tenant_id and would semantically conflict
/// with cross-tenant admin operations.
const PUBLIC_PREFIXES: &[&str] = &["/api/v1/admin/"];
/// Middleware that validates Bearer JWT tokens against Keycloak's JWKS /// Middleware that validates Bearer JWT tokens against Keycloak's JWKS
/// and attaches a `TenantContext` extension on success. /// and attaches a `TenantContext` extension on success.
/// ///
/// Skips validation for the health endpoint and any path under one of /// Skips validation for the health endpoint.
/// the [`PUBLIC_PREFIXES`]. If `JwksState` is not present (Keycloak /// If `JwksState` is not present (Keycloak not configured), requests
/// not configured), requests pass through and downstream code must /// pass through and downstream code must handle the missing context.
/// handle the missing context.
pub async fn require_jwt_auth(mut request: Request, next: Next) -> Response { pub async fn require_jwt_auth(mut request: Request, next: Next) -> Response {
let path = request.uri().path(); let path = request.uri().path();
if PUBLIC_ENDPOINTS.contains(&path) || PUBLIC_PREFIXES.iter().any(|p| path.starts_with(p)) { if PUBLIC_ENDPOINTS.contains(&path) {
return next.run(request).await; return next.run(request).await;
} }
-9
View File
@@ -37,15 +37,6 @@ pub struct AgentConfig {
pub pentest_imap_tls: bool, pub pentest_imap_tls: bool,
pub pentest_imap_username: Option<String>, pub pentest_imap_username: Option<String>,
pub pentest_imap_password: Option<SecretString>, pub pentest_imap_password: Option<SecretString>,
/// Static bearer for the cross-tenant admin endpoints under
/// `/api/v1/admin/*`. When `None`, those endpoints are not
/// mounted at all (defense-in-depth: ops endpoints never reach
/// any auth path if no operator has explicitly opted in).
pub admin_api_token: Option<SecretString>,
/// Live tenant-registry URL the scheduler consults for the list
/// of tenants to iterate. When `None` or unreachable, scheduler
/// falls back to `SCHEDULER_TENANT_IDS` env (M7.2-C).
pub tenant_registry_url: Option<String>,
} }
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]