compliance-scanner-agent/compliance-agent/tests/tenant_isolation.rs

//! M7.2-A — `DatabasePool` isolation proof.
//!
//! Two `TenantContext`s, two databases, one client. Insert on A, query
//! on B → empty. Insert on B, query on A → only A's docs. Proves that
//! the per-tenant database split actually isolates at the driver level
//! and not at "we hope we filter."
//!
//! Requires MongoDB. Set `TEST_MONGODB_URI` to override the default
//! `mongodb://root:example@localhost:27017/?authSource=admin`.

#![allow(clippy::expect_used, clippy::unwrap_used)]

use compliance_agent::database::DatabasePool;
use compliance_core::models::TrackedRepository;
use compliance_core::{OrgRole, TenantContext, TenantStatus};
use mongodb::bson::doc;

fn ctx(tenant_id: &str, slug: &str) -> TenantContext {
    TenantContext {
        tenant_id: tenant_id.to_string(),
        tenant_slug: slug.to_string(),
        org_roles: vec![OrgRole::ItAdmin],
        products: vec!["compliance-scanner".to_string()],
        plan: "starter".to_string(),
        status: TenantStatus::Active,
        user_id: "u-1".to_string(),
        user_name: None,
    }
}

fn fixture_repo(name: &str, git_url: &str) -> TrackedRepository {
    TrackedRepository {
        id: None,
        name: name.to_string(),
        git_url: git_url.to_string(),
        default_branch: "main".to_string(),
        local_path: None,
        scan_schedule: None,
        webhook_enabled: false,
        webhook_secret: None,
        tracker_type: None,
        tracker_owner: None,
        tracker_repo: None,
        tracker_token: None,
        auth_token: None,
        auth_username: None,
        last_scanned_commit: None,
        findings_count: 0,
        created_at: chrono::Utc::now(),
        updated_at: chrono::Utc::now(),
    }
}

#[tokio::test]
async fn pool_isolates_tenants_at_driver_level() {
    let uri = std::env::var("TEST_MONGODB_URI")
        .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
    // Unique per run so parallel test invocations don't collide. Kept
    // short because Mongo caps db names at 63 bytes (prefix + tenant_id).
    let prefix = format!("m72a_{}", short_id());

    let pool = DatabasePool::connect(&uri, &prefix)
        .await
        .expect("Failed to connect to MongoDB — is it running?");

    let acme = ctx("00000000-0000-0000-0000-00000000acme", "acme");
    let globex = ctx("00000000-0000-0000-0000-0000globex000", "globex");

    let acme_db = pool.for_tenant(&acme).await.expect("acme db");
    let globex_db = pool.for_tenant(&globex).await.expect("globex db");

    // Write distinct repos into each tenant's database.
    acme_db
        .repositories()
        .insert_one(fixture_repo("acme-app", "git@example.com:acme/app.git"))
        .await
        .expect("insert acme");
    globex_db
        .repositories()
        .insert_one(fixture_repo(
            "globex-platform",
            "git@example.com:globex/platform.git",
        ))
        .await
        .expect("insert globex");

    // The point of the whole exercise: acme can ONLY see acme's repo
    // and globex can ONLY see globex's, with no filter doc anywhere
    // because the isolation is at the database handle, not in the query.
    let acme_seen = collect(&acme_db).await;
    let globex_seen = collect(&globex_db).await;

    assert_eq!(acme_seen.len(), 1, "acme should see exactly its own repo");
    assert_eq!(acme_seen[0].name, "acme-app");
    assert_eq!(
        globex_seen.len(),
        1,
        "globex should see exactly its own repo"
    );
    assert_eq!(globex_seen[0].name, "globex-platform");

    // Sanity: the two databases really are different by name.
    let acme_db_name = pool.tenant_db_name(&acme.tenant_id);
    let globex_db_name = pool.tenant_db_name(&globex.tenant_id);
    assert_ne!(acme_db_name, globex_db_name);
    assert!(acme_db_name.starts_with(&prefix));

    // Cleanup — drop both per-tenant databases.
    pool.client()
        .database(&acme_db_name)
        .drop()
        .await
        .expect("drop acme");
    pool.client()
        .database(&globex_db_name)
        .drop()
        .await
        .expect("drop globex");
}

#[tokio::test]
async fn for_tenant_is_idempotent_index_creation() {
    let uri = std::env::var("TEST_MONGODB_URI")
        .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
    let prefix = format!("m72a_{}", short_id());
    let pool = DatabasePool::connect(&uri, &prefix).await.expect("connect");

    let acme = ctx("00000000-0000-0000-0000-00000000acme", "acme");

    // Second call must not fail (ensure_indexes already ran, in-memory
    // marker is set, Mongo's createIndex is idempotent by name anyway).
    let _ = pool.for_tenant(&acme).await.expect("first call");
    let _ = pool.for_tenant(&acme).await.expect("second call");
    let _ = pool.for_tenant(&acme).await.expect("third call");

    // Cleanup
    let db_name = pool.tenant_db_name(&acme.tenant_id);
    pool.client().database(&db_name).drop().await.expect("drop");
}

#[tokio::test]
async fn tenant_db_name_sanitizes_unsafe_characters() {
    let uri = std::env::var("TEST_MONGODB_URI")
        .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
    let pool = DatabasePool::connect(&uri, "m72a_sanitize")
        .await
        .expect("connect");

    // Mongo db names cannot contain `/ \ . " $ <space> NUL`. The pool
    // must rewrite these without exploding on connect.
    let funky = "te/n.a\\nt$id\" with spaces";
    let name = pool.tenant_db_name(funky);
    for c in ['/', '\\', '.', '"', '$', ' '] {
        assert!(
            !name.contains(c),
            "sanitized db name still contains {c:?}: {name}"
        );
    }
}

#[tokio::test]
async fn admin_helpers_list_and_drop_tenant_dbs() {
    let uri = std::env::var("TEST_MONGODB_URI")
        .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
    let prefix = format!("m72d_{}", short_id());
    let pool = DatabasePool::connect(&uri, &prefix).await.expect("connect");

    let acme = ctx("00000000-0000-0000-0000-00000000acme", "acme");
    let globex = ctx("00000000-0000-0000-0000-0000globex000", "globex");

    // Provision two tenants and write a doc into each so the databases
    // actually materialize on the cluster (Mongo lazily creates DBs).
    let acme_db = pool.for_tenant(&acme).await.expect("acme db");
    let globex_db = pool.for_tenant(&globex).await.expect("globex db");
    acme_db
        .repositories()
        .insert_one(fixture_repo("acme-app", "git@example.com:acme/app.git"))
        .await
        .expect("insert acme");
    globex_db
        .repositories()
        .insert_one(fixture_repo("globex-app", "git@example.com:globex/app.git"))
        .await
        .expect("insert globex");

    // list_tenant_db_names sees both, filtered by prefix
    let names = pool.list_tenant_db_names().await.expect("list tenants");
    let acme_name = pool.tenant_db_name(&acme.tenant_id);
    let globex_name = pool.tenant_db_name(&globex.tenant_id);
    assert!(
        names.contains(&acme_name),
        "expected {acme_name} in {names:?}"
    );
    assert!(
        names.contains(&globex_name),
        "expected {globex_name} in {names:?}"
    );
    for name in &names {
        assert!(name.starts_with(&format!("{prefix}_")));
    }

    // drop_tenant removes acme's DB
    pool.drop_tenant(&acme.tenant_id)
        .await
        .expect("drop acme tenant");
    let after = pool
        .list_tenant_db_names()
        .await
        .expect("list tenants after drop");
    assert!(
        !after.contains(&acme_name),
        "acme should be gone after drop, got {after:?}"
    );
    assert!(
        after.contains(&globex_name),
        "globex should still be present, got {after:?}"
    );

    // Cleanup remaining
    pool.drop_tenant(&globex.tenant_id)
        .await
        .expect("drop globex tenant");
}

#[tokio::test]
async fn tenant_db_name_falls_back_to_hash_when_too_long() {
    let uri = std::env::var("TEST_MONGODB_URI")
        .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
    let pool = DatabasePool::connect(&uri, "m72a_long")
        .await
        .expect("connect");

    // 100-byte tenant_id would overflow the 63-byte db-name cap with
    // any reasonable prefix. The pool must hash it down.
    let huge = "x".repeat(100);
    let name = pool.tenant_db_name(&huge);
    assert!(name.len() <= 63, "hashed name should fit: {name}");
    assert!(name.starts_with("m72a_long_"));
    // The hash suffix is 32 hex chars (16-byte SHA-256 truncation).
    let suffix = name.trim_start_matches("m72a_long_");
    assert_eq!(
        suffix.len(),
        32,
        "expected 32-hex suffix (16-byte hash), got {suffix:?}"
    );
    assert!(suffix.chars().all(|c| c.is_ascii_hexdigit()));

    // Stable: same input → same output.
    assert_eq!(name, pool.tenant_db_name(&huge));

    // Different inputs → different outputs (collision check on a tiny
    // sample — full birthday-resistance is a proof not a test).
    let huge2 = "y".repeat(100);
    assert_ne!(pool.tenant_db_name(&huge), pool.tenant_db_name(&huge2));
}

#[tokio::test]
async fn connect_rejects_overlong_db_prefix() {
    let uri = std::env::var("TEST_MONGODB_URI")
        .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());

    // MAX_PREFIX_LEN is 30 (= 63 - 1 - 32). A 31-char prefix MUST be
    // rejected at construction so the hash-fallback path can never
    // produce an over-long db name at runtime.
    let too_long = "a".repeat(31);
    let err = DatabasePool::connect(&uri, &too_long).await.unwrap_err();
    let msg = format!("{err}");
    assert!(
        msg.contains("max is 30") || msg.contains(&too_long),
        "error should explain the cap: {msg}"
    );

    // Exactly 30 chars is the inclusive bound — must succeed.
    let just_right = "a".repeat(30);
    let _ = DatabasePool::connect(&uri, &just_right)
        .await
        .expect("30-char prefix should be accepted");
}

/// Short UUID slug for keeping test prefixes well under Mongo's 63-byte
/// db-name cap.
fn short_id() -> String {
    uuid::Uuid::new_v4().simple().to_string()[..8].to_string()
}

/// Drain a `repositories` find cursor on the given tenant database.
async fn collect(db: &compliance_agent::database::Database) -> Vec<TrackedRepository> {
    let mut cursor = db
        .repositories()
        .find(doc! {})
        .await
        .expect("find repositories");
    let mut out = Vec::new();
    while cursor.advance().await.expect("advance") {
        out.push(cursor.deserialize_current().expect("deserialize"));
    }
    out
}