From 003835764e298fbb66616fdc4df2efc8ee835d7f Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Wed, 17 Jun 2026 13:16:46 +0200 Subject: [PATCH] fixup(m7.2-A): validate db_prefix at connect, bump hash to 16 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on the hash-fallback path. The original `debug_assert!(hashed.len() <= MAX_DB_NAME_LEN)` was a runtime hack that vanished in release builds. With an 8-byte hash truncation (~2^32 birthday-collision resistance), two tenant_ids hashing to the same suffix would silently share a database — no panic, no rollback, just cross-tenant data leak. Not acceptable for a regulated-industry product. Changes: - Bump hash truncation 8 → 16 bytes (32 hex chars). 2^64 birthday resistance — collision-impossible at our scale. - Add MAX_PREFIX_LEN (= 30) and validate db_prefix.len() at `DatabasePool::connect`. The runtime hash-fallback arithmetic is now provably within Mongo's 63-byte cap; drop the debug_assert!. - New test `connect_rejects_overlong_db_prefix` exercises the inclusive bound (30 passes, 31 fails). - Existing hash-fallback test now asserts a 32-char hex suffix + basic distinctness for two different inputs. Co-Authored-By: Claude Opus 4.7 --- compliance-agent/src/database.rs | 44 +++++++++++++++++----- compliance-agent/tests/tenant_isolation.rs | 36 ++++++++++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/compliance-agent/src/database.rs b/compliance-agent/src/database.rs index 6fc6f9c..0d5e4f6 100644 --- a/compliance-agent/src/database.rs +++ b/compliance-agent/src/database.rs @@ -15,6 +15,16 @@ use crate::error::AgentError; /// on Linux, 63 on Windows; we target the conservative limit). const MAX_DB_NAME_LEN: usize = 63; +/// Hex length of the SHA-256 truncation used for the hash fallback +/// tenant DB name (16 bytes → 32 hex chars). 16 bytes gives ~2^64 +/// birthday-collision resistance — at our 10s-100s tenant scale this +/// is effectively impossible to hit. +const HASH_HEX_LEN: usize = 32; + +/// Largest `db_prefix` that still guarantees the hash-fallback name +/// fits in the 63-byte cap: `prefix + "_" + 32 hex chars`. +const MAX_PREFIX_LEN: usize = MAX_DB_NAME_LEN - 1 - HASH_HEX_LEN; + /// Per-tenant Mongo connection broker (M7.2 isolation model). /// /// Holds one [`Client`] and hands out [`Database`] handles physically @@ -36,7 +46,19 @@ pub struct DatabasePool { impl DatabasePool { /// Connect to the cluster and prepare to hand out tenant databases /// named `_`. + /// + /// Validates `db_prefix.len() <= MAX_PREFIX_LEN` so the + /// hash-fallback path is provably within Mongo's 63-byte db-name + /// cap. Refuses to construct a pool that could ever produce an + /// over-long name. pub async fn connect(uri: &str, db_prefix: &str) -> Result { + if db_prefix.len() > MAX_PREFIX_LEN { + return Err(AgentError::Other(format!( + "db_prefix '{db_prefix}' is {} chars; max is {MAX_PREFIX_LEN} so the \ + hash-fallback tenant DB name fits Mongo's {MAX_DB_NAME_LEN}-byte cap", + db_prefix.len() + ))); + } let client = Client::with_uri_str(uri).await?; client .database("admin") @@ -79,10 +101,17 @@ impl DatabasePool { /// Compute the Mongo database name for a tenant. Public for tests /// and tenant offboarding (`pool.client().database(name).drop()`). /// - /// Format: `_` if it fits in 63 chars, - /// otherwise `_`. The hash - /// fallback is collision-resistant in practice (2^64 keyspace) - /// while keeping the name bounded. + /// Format: `_` if it fits the 63-byte + /// cap, else `_`. The + /// `db_prefix` length invariant established at [`Self::connect`] + /// guarantees the hash-fallback name always fits — no runtime + /// assertion needed. + /// + /// Collision resistance: the hash fallback is a 16-byte SHA-256 + /// truncation, which gives ~2^64 birthday-collision resistance. At + /// our 10s–100s tenant scale the probability of two tenant_ids + /// colliding is effectively zero. (8-byte truncation would have + /// been ~2^32 — too close for comfort on a regulated product.) pub fn tenant_db_name(&self, tenant_id: &str) -> String { let sanitized = sanitize_tenant_id(tenant_id); let natural = format!("{}_{}", self.db_prefix, sanitized); @@ -92,11 +121,8 @@ impl DatabasePool { let mut hasher = Sha256::new(); hasher.update(tenant_id.as_bytes()); let digest = hasher.finalize(); - // 16 hex chars = 8 bytes = 64-bit truncation. - let suffix = hex::encode(&digest[..8]); - let hashed = format!("{}_{}", self.db_prefix, suffix); - debug_assert!(hashed.len() <= MAX_DB_NAME_LEN); - hashed + let suffix = hex::encode(&digest[..HASH_HEX_LEN / 2]); + format!("{}_{}", self.db_prefix, suffix) } } diff --git a/compliance-agent/tests/tenant_isolation.rs b/compliance-agent/tests/tenant_isolation.rs index 1c23f94..a12b57d 100644 --- a/compliance-agent/tests/tenant_isolation.rs +++ b/compliance-agent/tests/tenant_isolation.rs @@ -172,9 +172,45 @@ async fn tenant_db_name_falls_back_to_hash_when_too_long() { let name = pool.tenant_db_name(&huge); assert!(name.len() <= 63, "hashed name should fit: {name}"); assert!(name.starts_with("m72a_long_")); + // The hash suffix is 32 hex chars (16-byte SHA-256 truncation). + let suffix = name.trim_start_matches("m72a_long_"); + assert_eq!( + suffix.len(), + 32, + "expected 32-hex suffix (16-byte hash), got {suffix:?}" + ); + assert!(suffix.chars().all(|c| c.is_ascii_hexdigit())); // Stable: same input → same output. assert_eq!(name, pool.tenant_db_name(&huge)); + + // Different inputs → different outputs (collision check on a tiny + // sample — full birthday-resistance is a proof not a test). + let huge2 = "y".repeat(100); + assert_ne!(pool.tenant_db_name(&huge), pool.tenant_db_name(&huge2)); +} + +#[tokio::test] +async fn connect_rejects_overlong_db_prefix() { + let uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + + // MAX_PREFIX_LEN is 30 (= 63 - 1 - 32). A 31-char prefix MUST be + // rejected at construction so the hash-fallback path can never + // produce an over-long db name at runtime. + let too_long = "a".repeat(31); + let err = DatabasePool::connect(&uri, &too_long).await.unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("max is 30") || msg.contains(&too_long), + "error should explain the cap: {msg}" + ); + + // Exactly 30 chars is the inclusive bound — must succeed. + let just_right = "a".repeat(30); + let _ = DatabasePool::connect(&uri, &just_right) + .await + .expect("30-char prefix should be accepted"); } /// Short UUID slug for keeping test prefixes well under Mongo's 63-byte