From 08c4ec4cff4ceaf7d649fa51464acdb7f54ad98b Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Wed, 17 Jun 2026 15:05:27 +0200 Subject: [PATCH] feat(m7.2-D): drop transitional agent.db, add admin helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final slice of M7.2. Removes the transitional single-database handle that M7.2-A introduced alongside the pool, so the compliance-agent now has a single source of truth for storage: every code path obtains a tenant-scoped Database from `agent.db_pool.for_tenant_id(...)` or `for_tenant(&ctx)`. There is no shared "default" database anywhere. Changes - ComplianceAgent: `db: Database` field removed. ComplianceAgent::new now takes only `(config, db_pool)`. Verified by an earlier grep during M7.2-C that no remaining call site reads `agent.db`. - main.rs: stops constructing the legacy Database. Only the pool is built at startup. - TestServer: same — drops Database::connect/ensure_indexes, builds only the pool. cleanup() now drops every `_*` per-tenant database (no longer touches a bare ``). - DatabasePool::list_tenant_db_names() — lists Mongo databases matching the pool's prefix. For admin endpoints + scheduler tenant enumeration in a future M7.3 (this PR keeps SCHEDULER_TENANT_IDS env config — registry integration is a separate concern). - DatabasePool::drop_tenant(&str) — idempotent tenant offboarding. Drops the per-tenant database and evicts the in-memory `ensured` marker so a later re-provision re-runs ensure_indexes. Test plan - cargo fmt --all clean - cargo clippy --workspace --exclude compliance-dashboard -- -D warnings clean - cargo test -p compliance-core --lib — 7 pass - cargo test -p compliance-agent --lib — 228 pass - cargo test -p compliance-agent --test tenant_isolation — 6 pass including new `admin_helpers_list_and_drop_tenant_dbs` - cargo test -p compliance-agent --test tenant_status_middleware — 6 pass M7.2 closeout state after this lands - M7.1 (auth + status) — done - M7.2-A (pool) — done - M7.2-B (handlers) — done - M7.2-C (background paths) — done - M7.2-D (legacy db removed, admin helpers) — done (this PR) - Future M7.3: scheduler pulls tenants from tenant-registry instead of SCHEDULER_TENANT_IDS env; cross-tenant admin HTTP endpoints built on list_tenant_db_names / drop_tenant. Co-Authored-By: Claude Opus 4.7 --- compliance-agent/src/agent.rs | 14 ++--- compliance-agent/src/database.rs | 37 +++++++++++++ compliance-agent/src/main.rs | 11 ++-- compliance-agent/tests/common/mod.rs | 16 ++---- compliance-agent/tests/tenant_isolation.rs | 64 ++++++++++++++++++++++ 5 files changed, 115 insertions(+), 27 deletions(-) diff --git a/compliance-agent/src/agent.rs b/compliance-agent/src/agent.rs index 6ce21f1..5327fec 100644 --- a/compliance-agent/src/agent.rs +++ b/compliance-agent/src/agent.rs @@ -6,7 +6,7 @@ use tokio::sync::{broadcast, watch, Semaphore}; use compliance_core::models::pentest::PentestEvent; use compliance_core::AgentConfig; -use crate::database::{Database, DatabasePool}; +use crate::database::DatabasePool; use crate::llm::LlmClient; use crate::pipeline::orchestrator::PipelineOrchestrator; @@ -16,12 +16,9 @@ const DEFAULT_MAX_CONCURRENT_SESSIONS: usize = 5; #[derive(Clone)] pub struct ComplianceAgent { pub config: AgentConfig, - /// Transitional single-database handle. Used by handlers that have - /// not yet been migrated to `db_pool.for_tenant(&ctx)` (M7.2-B/C). - /// Will be removed once every call site is tenant-scoped (M7.2-D). - pub db: Database, - /// Per-tenant Mongo broker introduced in M7.2-A. Handlers should - /// prefer this and obtain a tenant-scoped [`Database`] from it. + /// Per-tenant Mongo broker. Every code path must obtain a + /// tenant-scoped [`crate::database::Database`] from this pool — + /// there is no single shared database any more. pub db_pool: DatabasePool, pub llm: Arc, pub http: reqwest::Client, @@ -34,7 +31,7 @@ pub struct ComplianceAgent { } impl ComplianceAgent { - pub fn new(config: AgentConfig, db: Database, db_pool: DatabasePool) -> Self { + pub fn new(config: AgentConfig, db_pool: DatabasePool) -> Self { let llm = Arc::new(LlmClient::new( config.litellm_url.clone(), config.litellm_api_key.clone(), @@ -48,7 +45,6 @@ impl ComplianceAgent { .unwrap_or_default(); Self { config, - db, db_pool, llm, http, diff --git a/compliance-agent/src/database.rs b/compliance-agent/src/database.rs index 11d17fa..5532b18 100644 --- a/compliance-agent/src/database.rs +++ b/compliance-agent/src/database.rs @@ -140,6 +140,43 @@ impl DatabasePool { pub fn client(&self) -> &Client { &self.client } + + /// List every Mongo database currently belonging to this pool, + /// identified by the `_` prefix. The result is the raw + /// database names — opening one for offboarding/cleanup goes + /// through [`Self::client`]. + /// + /// Note: hashed-fallback names (very long tenant_ids) lose the + /// original tenant_id at the cluster level — we know a database + /// exists for *some* tenant but not which one. In practice + /// tenant_ids are UUIDs (36 chars) and never hit the fallback, + /// so this is a theoretical concern, not an operational one. + pub async fn list_tenant_db_names(&self) -> Result, AgentError> { + let prefix = format!("{}_", self.db_prefix); + let names = self.client.list_database_names().await?; + Ok(names + .into_iter() + .filter(|n| n.starts_with(&prefix)) + .collect()) + } + + /// Drop the database for a specific tenant. Used by GDPR delete + /// and tenant offboarding. Idempotent — dropping a non-existent + /// database is a no-op at the driver level. + /// + /// Also evicts the tenant from the in-memory `ensured` set so a + /// later re-provision triggers fresh `ensure_indexes`. + pub async fn drop_tenant(&self, tenant_id: &str) -> Result<(), AgentError> { + let db_name = self.tenant_db_name(tenant_id); + self.client.database(&db_name).drop().await?; + self.ensured.remove(tenant_id); + tracing::info!( + tenant_id = %tenant_id, + db_name = %db_name, + "Dropped tenant database" + ); + Ok(()) + } } /// Mongo database names disallow `/`, `\`, `.`, `"`, `$`, ` `, and NUL. diff --git a/compliance-agent/src/main.rs b/compliance-agent/src/main.rs index 1270b22..110634f 100644 --- a/compliance-agent/src/main.rs +++ b/compliance-agent/src/main.rs @@ -25,16 +25,13 @@ async fn main() -> Result<(), Box> { } tracing::info!("Connecting to MongoDB..."); - let db = database::Database::connect(&config.mongodb_uri, &config.mongodb_database).await?; - db.ensure_indexes().await?; - - // M7.2-A: per-tenant pool. Uses `mongodb_database` as the db-name - // prefix so tenant databases land as `_` next to - // the legacy single-tenant database. + // Per-tenant pool only — the agent has no shared "default" database + // after M7.2-D. `mongodb_database` is now the db-name prefix used + // for tenant databases (`_`). let db_pool = database::DatabasePool::connect(&config.mongodb_uri, &config.mongodb_database).await?; - let agent = agent::ComplianceAgent::new(config.clone(), db.clone(), db_pool); + let agent = agent::ComplianceAgent::new(config.clone(), db_pool); tracing::info!("Starting scheduler..."); let scheduler_agent = agent.clone(); diff --git a/compliance-agent/tests/common/mod.rs b/compliance-agent/tests/common/mod.rs index cd1d307..5857203 100644 --- a/compliance-agent/tests/common/mod.rs +++ b/compliance-agent/tests/common/mod.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use compliance_agent::agent::ComplianceAgent; use compliance_agent::api; -use compliance_agent::database::{Database, DatabasePool}; +use compliance_agent::database::DatabasePool; use compliance_core::AgentConfig; use secrecy::SecretString; @@ -28,11 +28,6 @@ impl TestServer { // Unique database name per test run to avoid collisions let db_name = format!("test_{}", uuid::Uuid::new_v4().simple()); - let db = Database::connect(&mongodb_uri, &db_name) - .await - .expect("Failed to connect to MongoDB — is it running?"); - db.ensure_indexes().await.expect("Failed to create indexes"); - let db_pool = DatabasePool::connect(&mongodb_uri, &db_name) .await .expect("Failed to build DatabasePool"); @@ -73,7 +68,7 @@ impl TestServer { pentest_imap_password: None, }; - let agent = ComplianceAgent::new(config, db, db_pool); + let agent = ComplianceAgent::new(config, db_pool); // Build the router with the agent extension. After M7.2-B every // handler takes a TenantCtx extractor; without KC in the test @@ -164,12 +159,11 @@ impl TestServer { &self.db_name } - /// Drop the test database on cleanup. Post-M7.2-B the actual data - /// lives in `_` per-tenant databases; list those - /// off the cluster and drop them too. + /// Drop every per-tenant database belonging to this test run. + /// Post-M7.2-D the agent never opens a `db_name` directly — + /// data lives only in `_` per-tenant databases. pub async fn cleanup(&self) { if let Ok(client) = mongodb::Client::with_uri_str(&self.mongodb_uri).await { - client.database(&self.db_name).drop().await.ok(); if let Ok(names) = client.list_database_names().await { let prefix = format!("{}_", self.db_name); for name in names { diff --git a/compliance-agent/tests/tenant_isolation.rs b/compliance-agent/tests/tenant_isolation.rs index a12b57d..c204f7b 100644 --- a/compliance-agent/tests/tenant_isolation.rs +++ b/compliance-agent/tests/tenant_isolation.rs @@ -158,6 +158,70 @@ async fn tenant_db_name_sanitizes_unsafe_characters() { } } +#[tokio::test] +async fn admin_helpers_list_and_drop_tenant_dbs() { + let uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let prefix = format!("m72d_{}", short_id()); + let pool = DatabasePool::connect(&uri, &prefix).await.expect("connect"); + + let acme = ctx("00000000-0000-0000-0000-00000000acme", "acme"); + let globex = ctx("00000000-0000-0000-0000-0000globex000", "globex"); + + // Provision two tenants and write a doc into each so the databases + // actually materialize on the cluster (Mongo lazily creates DBs). + let acme_db = pool.for_tenant(&acme).await.expect("acme db"); + let globex_db = pool.for_tenant(&globex).await.expect("globex db"); + acme_db + .repositories() + .insert_one(fixture_repo("acme-app", "git@example.com:acme/app.git")) + .await + .expect("insert acme"); + globex_db + .repositories() + .insert_one(fixture_repo("globex-app", "git@example.com:globex/app.git")) + .await + .expect("insert globex"); + + // list_tenant_db_names sees both, filtered by prefix + let names = pool.list_tenant_db_names().await.expect("list tenants"); + let acme_name = pool.tenant_db_name(&acme.tenant_id); + let globex_name = pool.tenant_db_name(&globex.tenant_id); + assert!( + names.contains(&acme_name), + "expected {acme_name} in {names:?}" + ); + assert!( + names.contains(&globex_name), + "expected {globex_name} in {names:?}" + ); + for name in &names { + assert!(name.starts_with(&format!("{prefix}_"))); + } + + // drop_tenant removes acme's DB + pool.drop_tenant(&acme.tenant_id) + .await + .expect("drop acme tenant"); + let after = pool + .list_tenant_db_names() + .await + .expect("list tenants after drop"); + assert!( + !after.contains(&acme_name), + "acme should be gone after drop, got {after:?}" + ); + assert!( + after.contains(&globex_name), + "globex should still be present, got {after:?}" + ); + + // Cleanup remaining + pool.drop_tenant(&globex.tenant_id) + .await + .expect("drop globex tenant"); +} + #[tokio::test] async fn tenant_db_name_falls_back_to_hash_when_too_long() { let uri = std::env::var("TEST_MONGODB_URI")