From 5b07d38907c6ddeb8e7aa5b41adc68bd30e87866 Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:01:19 +0200 Subject: [PATCH 1/3] feat: add E2E test suite with nightly CI, fix dashboard Dockerfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E Tests: - 17 integration tests covering: health, repos CRUD, findings lifecycle, cascade delete (SAST + DAST + pentest), DAST targets, stats overview - TestServer harness: spins up agent API on random port with isolated MongoDB database per test, auto-cleanup - Added lib.rs to expose agent internals for integration tests - Nightly CI workflow with MongoDB service container (3 AM UTC) Tests verify: - Repository add/list/delete + duplicate rejection + invalid ID handling - Finding creation, filtering by severity/repo, status updates, bulk updates - Cascade delete: repo deletion removes all DAST targets, pentest sessions, attack chain nodes, DAST findings, SAST findings, and SBOM entries - DAST target CRUD and empty finding list - Stats overview accuracy with zero and populated data Also: - Fix Dockerfile.dashboard: bump dioxus-cli 0.7.3 → 0.7.4 (compile fix) - Fix clippy: allow new_without_default for pattern scanners Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/nightly.yml | 52 +++++ Dockerfile.dashboard | 2 +- compliance-agent/Cargo.toml | 11 + compliance-agent/src/lib.rs | 16 ++ compliance-agent/src/main.rs | 15 +- compliance-agent/src/pipeline/patterns.rs | 2 + compliance-agent/tests/common/mod.rs | 166 ++++++++++++- compliance-agent/tests/e2e.rs | 7 + .../tests/integration/api/cascade_delete.rs | 221 ++++++++++++++++++ .../tests/integration/api/dast.rs | 48 ++++ .../tests/integration/api/findings.rs | 144 ++++++++++++ .../tests/integration/api/health.rs | 29 +++ compliance-agent/tests/integration/api/mod.rs | 6 + .../tests/integration/api/repositories.rs | 110 +++++++++ .../tests/integration/api/stats.rs | 111 +++++++++ compliance-agent/tests/integration/mod.rs | 11 +- 16 files changed, 931 insertions(+), 20 deletions(-) create mode 100644 .gitea/workflows/nightly.yml create mode 100644 compliance-agent/src/lib.rs create mode 100644 compliance-agent/tests/e2e.rs create mode 100644 compliance-agent/tests/integration/api/cascade_delete.rs create mode 100644 compliance-agent/tests/integration/api/dast.rs create mode 100644 compliance-agent/tests/integration/api/findings.rs create mode 100644 compliance-agent/tests/integration/api/health.rs create mode 100644 compliance-agent/tests/integration/api/mod.rs create mode 100644 compliance-agent/tests/integration/api/repositories.rs create mode 100644 compliance-agent/tests/integration/api/stats.rs diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml new file mode 100644 index 0000000..65bd5ef --- /dev/null +++ b/.gitea/workflows/nightly.yml @@ -0,0 +1,52 @@ +name: Nightly E2E Tests + +on: + schedule: + - cron: '0 3 * * *' # 3 AM UTC daily + workflow_dispatch: # Allow manual trigger + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + RUSTC_WRAPPER: /usr/local/bin/sccache + SCCACHE_DIR: /tmp/sccache + TEST_MONGODB_URI: "mongodb://root:example@mongo:27017/?authSource=admin" + +concurrency: + group: nightly-e2e + cancel-in-progress: true + +jobs: + e2e: + name: E2E Tests + runs-on: docker + container: + image: rust:1.94-bookworm + services: + mongo: + image: mongo:7 + env: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: example + steps: + - name: Checkout + run: | + git init + git remote add origin "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git" + git fetch --depth=1 origin "${GITHUB_SHA:-refs/heads/main}" + git checkout FETCH_HEAD + + - name: Install sccache + run: | + curl -fsSL https://github.com/mozilla/sccache/releases/download/v0.9.1/sccache-v0.9.1-x86_64-unknown-linux-musl.tar.gz \ + | tar xz --strip-components=1 -C /usr/local/bin/ sccache-v0.9.1-x86_64-unknown-linux-musl/sccache + chmod +x /usr/local/bin/sccache + env: + RUSTC_WRAPPER: "" + + - name: Run E2E tests + run: cargo test -p compliance-agent --test e2e -- --test-threads=4 + + - name: Show sccache stats + run: sccache --show-stats + if: always() diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard index 0be53ce..b535505 100644 --- a/Dockerfile.dashboard +++ b/Dockerfile.dashboard @@ -1,6 +1,6 @@ FROM rust:1.94-bookworm AS builder -RUN cargo install dioxus-cli --version 0.7.3 +RUN cargo install dioxus-cli --version 0.7.4 ARG DOCS_URL=/docs diff --git a/compliance-agent/Cargo.toml b/compliance-agent/Cargo.toml index b47dd2e..5d1f8b0 100644 --- a/compliance-agent/Cargo.toml +++ b/compliance-agent/Cargo.toml @@ -42,3 +42,14 @@ tokio-tungstenite = { version = "0.26", features = ["rustls-tls-webpki-roots"] } futures-core = "0.3" dashmap = { workspace = true } tokio-stream = { workspace = true } + +[dev-dependencies] +compliance-core = { workspace = true, features = ["mongodb"] } +reqwest = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true } +mongodb = { workspace = true } +uuid = { workspace = true } +secrecy = { workspace = true } +axum = "0.8" +tower-http = { version = "0.6", features = ["cors"] } diff --git a/compliance-agent/src/lib.rs b/compliance-agent/src/lib.rs new file mode 100644 index 0000000..788fec7 --- /dev/null +++ b/compliance-agent/src/lib.rs @@ -0,0 +1,16 @@ +// Library entrypoint — re-exports for integration tests and the binary. + +pub mod agent; +pub mod api; +pub mod config; +pub mod database; +pub mod error; +pub mod llm; +pub mod pentest; +pub mod pipeline; +pub mod rag; +pub mod scheduler; +pub mod ssh; +#[allow(dead_code)] +pub mod trackers; +pub mod webhooks; diff --git a/compliance-agent/src/main.rs b/compliance-agent/src/main.rs index bb54769..7f36343 100644 --- a/compliance-agent/src/main.rs +++ b/compliance-agent/src/main.rs @@ -1,17 +1,4 @@ -mod agent; -mod api; -pub(crate) mod config; -mod database; -mod error; -mod llm; -mod pentest; -mod pipeline; -mod rag; -mod scheduler; -mod ssh; -#[allow(dead_code)] -mod trackers; -mod webhooks; +use compliance_agent::{agent, api, config, database, scheduler, ssh, webhooks}; #[tokio::main] async fn main() -> Result<(), Box> { diff --git a/compliance-agent/src/pipeline/patterns.rs b/compliance-agent/src/pipeline/patterns.rs index bd25eec..b3ca394 100644 --- a/compliance-agent/src/pipeline/patterns.rs +++ b/compliance-agent/src/pipeline/patterns.rs @@ -33,6 +33,7 @@ struct PatternRule { file_extensions: Vec, } +#[allow(clippy::new_without_default)] impl GdprPatternScanner { pub fn new() -> Self { let patterns = vec![ @@ -98,6 +99,7 @@ impl Scanner for GdprPatternScanner { } } +#[allow(clippy::new_without_default)] impl OAuthPatternScanner { pub fn new() -> Self { let patterns = vec![ diff --git a/compliance-agent/tests/common/mod.rs b/compliance-agent/tests/common/mod.rs index b51bdd6..1cbe46f 100644 --- a/compliance-agent/tests/common/mod.rs +++ b/compliance-agent/tests/common/mod.rs @@ -1,3 +1,165 @@ -// Shared test helpers for compliance-agent integration tests. +// Shared test harness for E2E / integration tests. // -// Add database mocks, fixtures, and test utilities here. +// Spins up the agent API server on a random port with an isolated test +// database. Each test gets a fresh database that is dropped on cleanup. + +use std::sync::Arc; + +use compliance_agent::agent::ComplianceAgent; +use compliance_agent::api; +use compliance_agent::database::Database; +use compliance_core::AgentConfig; +use secrecy::SecretString; + +/// A running test server with a unique database. +pub struct TestServer { + pub base_url: String, + pub client: reqwest::Client, + db_name: String, + mongodb_uri: String, +} + +impl TestServer { + /// Start an agent API server on a random port with an isolated database. + pub async fn start() -> Self { + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + + // Unique database name per test run to avoid collisions + let db_name = format!("test_{}", uuid::Uuid::new_v4().simple()); + + let db = Database::connect(&mongodb_uri, &db_name) + .await + .expect("Failed to connect to MongoDB — is it running?"); + db.ensure_indexes().await.expect("Failed to create indexes"); + + let config = AgentConfig { + mongodb_uri: mongodb_uri.clone(), + mongodb_database: db_name.clone(), + litellm_url: std::env::var("TEST_LITELLM_URL") + .unwrap_or_else(|_| "http://localhost:4000".into()), + litellm_api_key: SecretString::from(String::new()), + litellm_model: "gpt-4o".into(), + litellm_embed_model: "text-embedding-3-small".into(), + agent_port: 0, // not used — we bind ourselves + scan_schedule: String::new(), + cve_monitor_schedule: String::new(), + git_clone_base_path: "/tmp/compliance-scanner-tests/repos".into(), + ssh_key_path: "/tmp/compliance-scanner-tests/ssh/id_ed25519".into(), + github_token: None, + github_webhook_secret: None, + gitlab_url: None, + gitlab_token: None, + gitlab_webhook_secret: None, + jira_url: None, + jira_email: None, + jira_api_token: None, + jira_project_key: None, + searxng_url: None, + nvd_api_key: None, + keycloak_url: None, + keycloak_realm: None, + keycloak_admin_username: None, + keycloak_admin_password: None, + pentest_verification_email: None, + pentest_imap_host: None, + pentest_imap_port: None, + pentest_imap_tls: false, + pentest_imap_username: None, + pentest_imap_password: None, + }; + + let agent = ComplianceAgent::new(config, db); + + // Build the router with the agent extension + let app = api::routes::build_router() + .layer(axum::extract::Extension(Arc::new(agent))) + .layer(tower_http::cors::CorsLayer::permissive()); + + // Bind to port 0 to get a random available port + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("Failed to bind test server"); + let port = listener.local_addr().expect("no local addr").port(); + + tokio::spawn(async move { + axum::serve(listener, app).await.ok(); + }); + + let base_url = format!("http://127.0.0.1:{port}"); + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build() + .expect("Failed to build HTTP client"); + + // Wait for server to be ready + for _ in 0..50 { + if client + .get(format!("{base_url}/api/v1/health")) + .send() + .await + .is_ok() + { + break; + } + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + } + + Self { + base_url, + client, + db_name, + mongodb_uri, + } + } + + /// GET helper + pub async fn get(&self, path: &str) -> reqwest::Response { + self.client + .get(format!("{}{path}", self.base_url)) + .send() + .await + .expect("GET request failed") + } + + /// POST helper with JSON body + pub async fn post(&self, path: &str, body: &serde_json::Value) -> reqwest::Response { + self.client + .post(format!("{}{path}", self.base_url)) + .json(body) + .send() + .await + .expect("POST request failed") + } + + /// PATCH helper with JSON body + pub async fn patch(&self, path: &str, body: &serde_json::Value) -> reqwest::Response { + self.client + .patch(format!("{}{path}", self.base_url)) + .json(body) + .send() + .await + .expect("PATCH request failed") + } + + /// DELETE helper + pub async fn delete(&self, path: &str) -> reqwest::Response { + self.client + .delete(format!("{}{path}", self.base_url)) + .send() + .await + .expect("DELETE request failed") + } + + /// Get the unique database name for direct MongoDB access in tests. + pub fn db_name(&self) -> &str { + &self.db_name + } + + /// Drop the test database on cleanup + pub async fn cleanup(&self) { + if let Ok(client) = mongodb::Client::with_uri_str(&self.mongodb_uri).await { + client.database(&self.db_name).drop().await.ok(); + } + } +} diff --git a/compliance-agent/tests/e2e.rs b/compliance-agent/tests/e2e.rs new file mode 100644 index 0000000..577acdf --- /dev/null +++ b/compliance-agent/tests/e2e.rs @@ -0,0 +1,7 @@ +// E2E test entry point. +// +// Run with: cargo test -p compliance-agent --test e2e +// Requires: MongoDB running (set TEST_MONGODB_URI if not default) + +mod common; +mod integration; diff --git a/compliance-agent/tests/integration/api/cascade_delete.rs b/compliance-agent/tests/integration/api/cascade_delete.rs new file mode 100644 index 0000000..0ba9cc2 --- /dev/null +++ b/compliance-agent/tests/integration/api/cascade_delete.rs @@ -0,0 +1,221 @@ +use crate::common::TestServer; +use serde_json::json; + +/// Insert a DAST target directly into MongoDB linked to a repo. +async fn insert_dast_target(server: &TestServer, repo_id: &str, name: &str) -> String { + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + + let result = db + .collection::("dast_targets") + .insert_one(mongodb::bson::doc! { + "name": name, + "base_url": format!("https://{name}.example.com"), + "target_type": "webapp", + "repo_id": repo_id, + "rate_limit": 10, + "allow_destructive": false, + "created_at": mongodb::bson::DateTime::now(), + }) + .await + .unwrap(); + + result.inserted_id.as_object_id().unwrap().to_hex() +} + +/// Insert a pentest session linked to a target. +async fn insert_pentest_session(server: &TestServer, target_id: &str, repo_id: &str) -> String { + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + + let result = db + .collection::("pentest_sessions") + .insert_one(mongodb::bson::doc! { + "target_id": target_id, + "repo_id": repo_id, + "strategy": "comprehensive", + "status": "completed", + "findings_count": 1_i32, + "exploitable_count": 0_i32, + "created_at": mongodb::bson::DateTime::now(), + }) + .await + .unwrap(); + + result.inserted_id.as_object_id().unwrap().to_hex() +} + +/// Insert an attack chain node linked to a session. +async fn insert_attack_node(server: &TestServer, session_id: &str) { + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + + db.collection::("attack_chain_nodes") + .insert_one(mongodb::bson::doc! { + "session_id": session_id, + "node_id": "node-1", + "tool_name": "recon", + "status": "completed", + "created_at": mongodb::bson::DateTime::now(), + }) + .await + .unwrap(); +} + +/// Insert a DAST finding linked to a target. +async fn insert_dast_finding(server: &TestServer, target_id: &str, session_id: &str) { + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + + db.collection::("dast_findings") + .insert_one(mongodb::bson::doc! { + "scan_run_id": "run-1", + "target_id": target_id, + "vuln_type": "xss", + "title": "Reflected XSS", + "description": "XSS in search param", + "severity": "high", + "endpoint": "https://example.com/search", + "method": "GET", + "exploitable": true, + "evidence": [], + "session_id": session_id, + "created_at": mongodb::bson::DateTime::now(), + }) + .await + .unwrap(); +} + +/// Helper to count documents in a collection +async fn count_docs(server: &TestServer, collection: &str) -> u64 { + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + db.collection::(collection) + .count_documents(mongodb::bson::doc! {}) + .await + .unwrap() +} + +#[tokio::test] +async fn delete_repo_cascades_to_dast_and_pentest_data() { + let server = TestServer::start().await; + + // Create a repo + let resp = server + .post( + "/api/v1/repositories", + &json!({ + "name": "cascade-test", + "git_url": "https://github.com/example/cascade-test.git", + }), + ) + .await; + let body: serde_json::Value = resp.json().await.unwrap(); + let repo_id = body["data"]["id"].as_str().unwrap().to_string(); + + // Insert DAST target linked to repo + let target_id = insert_dast_target(&server, &repo_id, "cascade-target").await; + + // Insert pentest session linked to target + let session_id = insert_pentest_session(&server, &target_id, &repo_id).await; + + // Insert downstream data + insert_attack_node(&server, &session_id).await; + insert_dast_finding(&server, &target_id, &session_id).await; + + // Verify data exists + assert_eq!(count_docs(&server, "dast_targets").await, 1); + assert_eq!(count_docs(&server, "pentest_sessions").await, 1); + assert_eq!(count_docs(&server, "attack_chain_nodes").await, 1); + assert_eq!(count_docs(&server, "dast_findings").await, 1); + + // Delete the repo + let resp = server + .delete(&format!("/api/v1/repositories/{repo_id}")) + .await; + assert_eq!(resp.status(), 200); + + // All downstream data should be gone + assert_eq!(count_docs(&server, "dast_targets").await, 0); + assert_eq!(count_docs(&server, "pentest_sessions").await, 0); + assert_eq!(count_docs(&server, "attack_chain_nodes").await, 0); + assert_eq!(count_docs(&server, "dast_findings").await, 0); + + server.cleanup().await; +} + +#[tokio::test] +async fn delete_repo_cascades_sast_findings_and_sbom() { + let server = TestServer::start().await; + + // Create a repo + let resp = server + .post( + "/api/v1/repositories", + &json!({ + "name": "sast-cascade", + "git_url": "https://github.com/example/sast-cascade.git", + }), + ) + .await; + let body: serde_json::Value = resp.json().await.unwrap(); + let repo_id = body["data"]["id"].as_str().unwrap().to_string(); + + // Insert SAST finding and SBOM entry + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + let now = mongodb::bson::DateTime::now(); + + db.collection::("findings") + .insert_one(mongodb::bson::doc! { + "repo_id": &repo_id, + "fingerprint": "fp-test-1", + "scanner": "semgrep", + "scan_type": "sast", + "title": "SQL Injection", + "description": "desc", + "severity": "critical", + "status": "open", + "created_at": now, + "updated_at": now, + }) + .await + .unwrap(); + + db.collection::("sbom_entries") + .insert_one(mongodb::bson::doc! { + "repo_id": &repo_id, + "name": "lodash", + "version": "4.17.20", + "package_manager": "npm", + "known_vulnerabilities": [], + }) + .await + .unwrap(); + + assert_eq!(count_docs(&server, "findings").await, 1); + assert_eq!(count_docs(&server, "sbom_entries").await, 1); + + // Delete repo + server + .delete(&format!("/api/v1/repositories/{repo_id}")) + .await; + + // Both should be gone + assert_eq!(count_docs(&server, "findings").await, 0); + assert_eq!(count_docs(&server, "sbom_entries").await, 0); + + server.cleanup().await; +} diff --git a/compliance-agent/tests/integration/api/dast.rs b/compliance-agent/tests/integration/api/dast.rs new file mode 100644 index 0000000..67d13b7 --- /dev/null +++ b/compliance-agent/tests/integration/api/dast.rs @@ -0,0 +1,48 @@ +use crate::common::TestServer; +use serde_json::json; + +#[tokio::test] +async fn add_and_list_dast_targets() { + let server = TestServer::start().await; + + // Initially empty + let resp = server.get("/api/v1/dast/targets").await; + assert_eq!(resp.status(), 200); + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"].as_array().unwrap().len(), 0); + + // Add a target + let resp = server + .post( + "/api/v1/dast/targets", + &json!({ + "name": "test-app", + "base_url": "https://test-app.example.com", + "target_type": "webapp", + }), + ) + .await; + assert_eq!(resp.status(), 200); + + // List should return 1 + let resp = server.get("/api/v1/dast/targets").await; + let body: serde_json::Value = resp.json().await.unwrap(); + let targets = body["data"].as_array().unwrap(); + assert_eq!(targets.len(), 1); + assert_eq!(targets[0]["name"], "test-app"); + assert_eq!(targets[0]["base_url"], "https://test-app.example.com"); + + server.cleanup().await; +} + +#[tokio::test] +async fn list_dast_findings_empty() { + let server = TestServer::start().await; + + let resp = server.get("/api/v1/dast/findings").await; + assert_eq!(resp.status(), 200); + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"].as_array().unwrap().len(), 0); + + server.cleanup().await; +} diff --git a/compliance-agent/tests/integration/api/findings.rs b/compliance-agent/tests/integration/api/findings.rs new file mode 100644 index 0000000..ad747a9 --- /dev/null +++ b/compliance-agent/tests/integration/api/findings.rs @@ -0,0 +1,144 @@ +use crate::common::TestServer; +use serde_json::json; + +/// Helper: insert a finding directly via MongoDB for testing query endpoints. +async fn insert_finding(server: &TestServer, repo_id: &str, title: &str, severity: &str) { + // We insert via the agent's DB by posting to the internal test path. + // Since there's no direct "create finding" API, we use MongoDB directly. + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + + // Extract the database name from the server's unique DB + // We'll use the agent's internal DB through the stats endpoint to verify + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + + // Get the DB name from the test server by parsing the health response + // For now, we use a direct insert approach + let db = client.database(&server.db_name()); + + let now = mongodb::bson::DateTime::now(); + db.collection::("findings") + .insert_one(mongodb::bson::doc! { + "repo_id": repo_id, + "fingerprint": format!("fp-{title}-{severity}"), + "scanner": "test-scanner", + "scan_type": "sast", + "title": title, + "description": format!("Test finding: {title}"), + "severity": severity, + "status": "open", + "created_at": now, + "updated_at": now, + }) + .await + .unwrap(); +} + +#[tokio::test] +async fn list_findings_empty() { + let server = TestServer::start().await; + + let resp = server.get("/api/v1/findings").await; + assert_eq!(resp.status(), 200); + + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"].as_array().unwrap().len(), 0); + assert_eq!(body["total"], 0); + + server.cleanup().await; +} + +#[tokio::test] +async fn list_findings_with_data() { + let server = TestServer::start().await; + + insert_finding(&server, "repo1", "SQL Injection", "critical").await; + insert_finding(&server, "repo1", "XSS", "high").await; + insert_finding(&server, "repo2", "Info Leak", "low").await; + + let resp = server.get("/api/v1/findings").await; + assert_eq!(resp.status(), 200); + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["total"], 3); + + // Filter by severity + let resp = server.get("/api/v1/findings?severity=critical").await; + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["total"], 1); + assert_eq!(body["data"][0]["title"], "SQL Injection"); + + // Filter by repo + let resp = server.get("/api/v1/findings?repo_id=repo1").await; + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["total"], 2); + + server.cleanup().await; +} + +#[tokio::test] +async fn update_finding_status() { + let server = TestServer::start().await; + + insert_finding(&server, "repo1", "Test Bug", "medium").await; + + // Get the finding ID + let resp = server.get("/api/v1/findings").await; + let body: serde_json::Value = resp.json().await.unwrap(); + let finding_id = body["data"][0]["_id"]["$oid"].as_str().unwrap(); + + // Update status to resolved + let resp = server + .patch( + &format!("/api/v1/findings/{finding_id}/status"), + &json!({ "status": "resolved" }), + ) + .await; + assert_eq!(resp.status(), 200); + + // Verify it's updated + let resp = server.get(&format!("/api/v1/findings/{finding_id}")).await; + assert_eq!(resp.status(), 200); + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"]["status"], "resolved"); + + server.cleanup().await; +} + +#[tokio::test] +async fn bulk_update_finding_status() { + let server = TestServer::start().await; + + insert_finding(&server, "repo1", "Bug A", "high").await; + insert_finding(&server, "repo1", "Bug B", "high").await; + + // Get both finding IDs + let resp = server.get("/api/v1/findings").await; + let body: serde_json::Value = resp.json().await.unwrap(); + let ids: Vec = body["data"] + .as_array() + .unwrap() + .iter() + .map(|f| f["_id"]["$oid"].as_str().unwrap().to_string()) + .collect(); + + // Bulk update + let resp = server + .patch( + "/api/v1/findings/bulk-status", + &json!({ + "ids": ids, + "status": "false_positive" + }), + ) + .await; + assert_eq!(resp.status(), 200); + + // Verify both are updated + for id in &ids { + let resp = server.get(&format!("/api/v1/findings/{id}")).await; + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"]["status"], "false_positive"); + } + + server.cleanup().await; +} diff --git a/compliance-agent/tests/integration/api/health.rs b/compliance-agent/tests/integration/api/health.rs new file mode 100644 index 0000000..97bd07b --- /dev/null +++ b/compliance-agent/tests/integration/api/health.rs @@ -0,0 +1,29 @@ +use crate::common::TestServer; + +#[tokio::test] +async fn health_endpoint_returns_ok() { + let server = TestServer::start().await; + + let resp = server.get("/api/v1/health").await; + assert_eq!(resp.status(), 200); + + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["status"], "ok"); + + server.cleanup().await; +} + +#[tokio::test] +async fn stats_overview_returns_zeroes_on_empty_db() { + let server = TestServer::start().await; + + let resp = server.get("/api/v1/stats/overview").await; + assert_eq!(resp.status(), 200); + + let body: serde_json::Value = resp.json().await.unwrap(); + let data = &body["data"]; + assert_eq!(data["repositories"], 0); + assert_eq!(data["total_findings"], 0); + + server.cleanup().await; +} diff --git a/compliance-agent/tests/integration/api/mod.rs b/compliance-agent/tests/integration/api/mod.rs new file mode 100644 index 0000000..82b0fe1 --- /dev/null +++ b/compliance-agent/tests/integration/api/mod.rs @@ -0,0 +1,6 @@ +mod cascade_delete; +mod dast; +mod findings; +mod health; +mod repositories; +mod stats; diff --git a/compliance-agent/tests/integration/api/repositories.rs b/compliance-agent/tests/integration/api/repositories.rs new file mode 100644 index 0000000..7cf476f --- /dev/null +++ b/compliance-agent/tests/integration/api/repositories.rs @@ -0,0 +1,110 @@ +use crate::common::TestServer; +use serde_json::json; + +#[tokio::test] +async fn add_and_list_repository() { + let server = TestServer::start().await; + + // Initially empty + let resp = server.get("/api/v1/repositories").await; + assert_eq!(resp.status(), 200); + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"].as_array().unwrap().len(), 0); + + // Add a repository + let resp = server + .post( + "/api/v1/repositories", + &json!({ + "name": "test-repo", + "git_url": "https://github.com/example/test-repo.git", + }), + ) + .await; + assert_eq!(resp.status(), 200); + let body: serde_json::Value = resp.json().await.unwrap(); + let repo_id = body["data"]["id"].as_str().unwrap().to_string(); + assert!(!repo_id.is_empty()); + + // List should now return 1 + let resp = server.get("/api/v1/repositories").await; + let body: serde_json::Value = resp.json().await.unwrap(); + let repos = body["data"].as_array().unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0]["name"], "test-repo"); + + server.cleanup().await; +} + +#[tokio::test] +async fn add_duplicate_repository_fails() { + let server = TestServer::start().await; + + let payload = json!({ + "name": "dup-repo", + "git_url": "https://github.com/example/dup-repo.git", + }); + + // First add succeeds + let resp = server.post("/api/v1/repositories", &payload).await; + assert_eq!(resp.status(), 200); + + // Second add with same git_url should fail (unique index) + let resp = server.post("/api/v1/repositories", &payload).await; + assert_ne!(resp.status(), 200); + + server.cleanup().await; +} + +#[tokio::test] +async fn delete_repository() { + let server = TestServer::start().await; + + // Add a repo + let resp = server + .post( + "/api/v1/repositories", + &json!({ + "name": "to-delete", + "git_url": "https://github.com/example/to-delete.git", + }), + ) + .await; + let body: serde_json::Value = resp.json().await.unwrap(); + let repo_id = body["data"]["id"].as_str().unwrap(); + + // Delete it + let resp = server + .delete(&format!("/api/v1/repositories/{repo_id}")) + .await; + assert_eq!(resp.status(), 200); + + // List should be empty again + let resp = server.get("/api/v1/repositories").await; + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"].as_array().unwrap().len(), 0); + + server.cleanup().await; +} + +#[tokio::test] +async fn delete_nonexistent_repository_returns_404() { + let server = TestServer::start().await; + + let resp = server + .delete("/api/v1/repositories/000000000000000000000000") + .await; + assert_eq!(resp.status(), 404); + + server.cleanup().await; +} + +#[tokio::test] +async fn delete_invalid_id_returns_400() { + let server = TestServer::start().await; + + let resp = server.delete("/api/v1/repositories/not-a-valid-id").await; + assert_eq!(resp.status(), 400); + + server.cleanup().await; +} diff --git a/compliance-agent/tests/integration/api/stats.rs b/compliance-agent/tests/integration/api/stats.rs new file mode 100644 index 0000000..969dc3f --- /dev/null +++ b/compliance-agent/tests/integration/api/stats.rs @@ -0,0 +1,111 @@ +use crate::common::TestServer; +use serde_json::json; + +#[tokio::test] +async fn stats_overview_reflects_inserted_data() { + let server = TestServer::start().await; + + // Add a repo + server + .post( + "/api/v1/repositories", + &json!({ + "name": "stats-repo", + "git_url": "https://github.com/example/stats-repo.git", + }), + ) + .await; + + // Insert findings directly + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + let now = mongodb::bson::DateTime::now(); + + for (title, severity) in [ + ("Critical Bug", "critical"), + ("High Bug", "high"), + ("Medium Bug", "medium"), + ("Low Bug", "low"), + ] { + db.collection::("findings") + .insert_one(mongodb::bson::doc! { + "repo_id": "test-repo-id", + "fingerprint": format!("fp-{title}"), + "scanner": "test", + "scan_type": "sast", + "title": title, + "description": "desc", + "severity": severity, + "status": "open", + "created_at": now, + "updated_at": now, + }) + .await + .unwrap(); + } + + let resp = server.get("/api/v1/stats/overview").await; + assert_eq!(resp.status(), 200); + + let body: serde_json::Value = resp.json().await.unwrap(); + let data = &body["data"]; + assert_eq!(data["repositories"], 1); + assert_eq!(data["total_findings"], 4); + assert_eq!(data["critical"], 1); + assert_eq!(data["high"], 1); + + server.cleanup().await; +} + +#[tokio::test] +async fn stats_update_after_finding_status_change() { + let server = TestServer::start().await; + + // Insert a finding + let mongodb_uri = std::env::var("TEST_MONGODB_URI") + .unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into()); + let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap(); + let db = client.database(&server.db_name()); + let now = mongodb::bson::DateTime::now(); + + let result = db + .collection::("findings") + .insert_one(mongodb::bson::doc! { + "repo_id": "repo-1", + "fingerprint": "fp-stats-test", + "scanner": "test", + "scan_type": "sast", + "title": "Stats Test Finding", + "description": "desc", + "severity": "high", + "status": "open", + "created_at": now, + "updated_at": now, + }) + .await + .unwrap(); + let finding_id = result.inserted_id.as_object_id().unwrap().to_hex(); + + // Stats should show 1 finding + let resp = server.get("/api/v1/stats/overview").await; + let body: serde_json::Value = resp.json().await.unwrap(); + assert_eq!(body["data"]["total_findings"], 1); + + // Mark it as resolved + server + .patch( + &format!("/api/v1/findings/{finding_id}/status"), + &json!({ "status": "resolved" }), + ) + .await; + + // The finding still exists (status changed, not deleted) + let resp = server.get("/api/v1/stats/overview").await; + let body: serde_json::Value = resp.json().await.unwrap(); + // total_findings counts all findings regardless of status + assert_eq!(body["data"]["total_findings"], 1); + + server.cleanup().await; +} diff --git a/compliance-agent/tests/integration/mod.rs b/compliance-agent/tests/integration/mod.rs index bcdefd1..1baec19 100644 --- a/compliance-agent/tests/integration/mod.rs +++ b/compliance-agent/tests/integration/mod.rs @@ -1,4 +1,9 @@ -// Integration tests for the compliance-agent crate. +// E2E / Integration tests for the compliance-agent API. // -// Add tests that exercise the full pipeline, API handlers, -// and cross-module interactions here. +// These tests require a running MongoDB instance. Set TEST_MONGODB_URI +// if it's not at the default `mongodb://root:example@localhost:27017`. +// +// Run with: cargo test -p compliance-agent --test e2e +// Or nightly: (via CI with MongoDB service container) + +mod api; -- 2.49.1 From 08a1ee2f0025858125343b6d35852a7689cd5f2a Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:12:38 +0200 Subject: [PATCH 2/3] fix: synthesise Contains edges and improve cross-file resolution in code graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code graph produced disconnected "islands" because: 1. No Contains edges were created between File/Module nodes and their children (functions, classes, structs), leaving file nodes isolated 2. Cross-file call resolution was too strict — calls like `crate::config::load` failed to resolve to `src/config.rs::load` Fix: - After resolving explicit parser edges, synthesise Contains edges by walking each node's qualified-name hierarchy and linking to the closest ancestor that exists in the node map - Improve edge resolution with module-path matching: strip Rust prefixes (crate::, super::, self::) and try progressively shorter suffix matches for cross-file calls Adds 4 new tests covering Contains edge synthesis, dedup with existing edges, cross-file module path resolution, and parent qname lookup. Co-Authored-By: Claude Opus 4.6 (1M context) --- compliance-graph/src/graph/engine.rs | 208 ++++++++++++++++++++++++--- 1 file changed, 192 insertions(+), 16 deletions(-) diff --git a/compliance-graph/src/graph/engine.rs b/compliance-graph/src/graph/engine.rs index 431a54f..c44c2c8 100644 --- a/compliance-graph/src/graph/engine.rs +++ b/compliance-graph/src/graph/engine.rs @@ -15,6 +15,30 @@ use crate::parsers::registry::ParserRegistry; use super::community::detect_communities; use super::impact::ImpactAnalyzer; +/// Walk up the qualified-name hierarchy to find the closest ancestor +/// that exists in the node map. +/// +/// For `"src/main.rs::config::load"` this tries: +/// 1. `"src/main.rs::config"` (trim last `::` segment) +/// 2. `"src/main.rs"` (trim again) +/// +/// Returns the first match found, or `None` if the node is a root. +fn find_parent_qname(qname: &str, node_map: &HashMap) -> Option { + let mut current = qname.to_string(); + loop { + // Try stripping the last "::" segment + if let Some(pos) = current.rfind("::") { + current.truncate(pos); + if node_map.contains_key(¤t) { + return Some(current); + } + continue; + } + // No more "::" — this is a top-level node (file), no parent + return None; + } +} + /// The main graph engine that builds and manages code knowledge graphs pub struct GraphEngine { parser_registry: ParserRegistry, @@ -89,7 +113,12 @@ impl GraphEngine { Ok((code_graph, build_run)) } - /// Build petgraph from parsed output, resolving edges to node indices + /// Build petgraph from parsed output, resolving edges to node indices. + /// + /// After resolving the explicit edges from parsers, we synthesise + /// `Contains` edges so that every node is reachable from its parent + /// file or module. This eliminates disconnected "islands" that + /// otherwise appear when files share no direct call/import edges. fn build_petgraph(&self, parse_output: ParseOutput) -> Result { let mut graph = DiGraph::new(); let mut node_map: HashMap = HashMap::new(); @@ -102,15 +131,13 @@ impl GraphEngine { node_map.insert(node.qualified_name.clone(), idx); } - // Resolve and add edges — rewrite target to the resolved qualified name - // so the persisted edge references match node qualified_names. + // Resolve and add explicit edges from parsers let mut resolved_edges = Vec::new(); for mut edge in parse_output.edges { let source_idx = node_map.get(&edge.source); let resolved = self.resolve_edge_target(&edge.target, &node_map); if let (Some(&src), Some(tgt)) = (source_idx, resolved) { - // Update target to the resolved qualified name let resolved_name = node_map .iter() .find(|(_, &idx)| idx == tgt) @@ -121,7 +148,48 @@ impl GraphEngine { graph.add_edge(src, tgt, edge.kind.clone()); resolved_edges.push(edge); } - // Skip unresolved edges (cross-file, external deps) — conservative approach + } + + // Synthesise Contains edges: connect each node to its closest + // parent in the qualified-name hierarchy. + // + // For "src/main.rs::config::load", the parent chain is: + // "src/main.rs::config" → "src/main.rs" + // + // We walk up the qualified name (splitting on "::") and link to + // the first ancestor that exists in the node map. + let repo_id = nodes.first().map(|n| n.repo_id.as_str()).unwrap_or(""); + let build_id = nodes + .first() + .map(|n| n.graph_build_id.as_str()) + .unwrap_or(""); + + let qualified_names: Vec = nodes.iter().map(|n| n.qualified_name.clone()).collect(); + let file_paths: HashMap = nodes + .iter() + .map(|n| (n.qualified_name.clone(), n.file_path.clone())) + .collect(); + + for qname in &qualified_names { + if let Some(parent_qname) = find_parent_qname(qname, &node_map) { + let child_idx = node_map[qname]; + let parent_idx = node_map[&parent_qname]; + + // Avoid duplicate edges + if !graph.contains_edge(parent_idx, child_idx) { + graph.add_edge(parent_idx, child_idx, CodeEdgeKind::Contains); + resolved_edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: build_id.to_string(), + source: parent_qname, + target: qname.clone(), + kind: CodeEdgeKind::Contains, + file_path: file_paths.get(qname).cloned().unwrap_or_default(), + line_number: None, + }); + } + } } Ok(CodeGraph { @@ -132,33 +200,62 @@ impl GraphEngine { }) } - /// Try to resolve an edge target to a known node + /// Try to resolve an edge target to a known node. + /// + /// Resolution strategies (in order): + /// 1. Direct qualified-name match + /// 2. Suffix match: "foo" matches "src/main.rs::mod::foo" + /// 3. Module-path match: "config::load" matches "src/config.rs::load" + /// 4. Self-method: "self.method" matches "::method" fn resolve_edge_target( &self, target: &str, node_map: &HashMap, ) -> Option { - // Direct match + // 1. Direct match if let Some(idx) = node_map.get(target) { return Some(*idx); } - // Try matching just the function/type name (intra-file resolution) + // 2. Suffix match: "foo" → "path/file.rs::foo" + let suffix_pattern = format!("::{target}"); + let dot_pattern = format!(".{target}"); for (qualified, idx) in node_map { - // Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo" - if qualified.ends_with(&format!("::{target}")) - || qualified.ends_with(&format!(".{target}")) - { + if qualified.ends_with(&suffix_pattern) || qualified.ends_with(&dot_pattern) { return Some(*idx); } } - // Try matching method calls like "self.method" -> look for "::method" + // 3. Module-path match: "config::load" → try matching the last N + // segments of the target against node qualified names. + // This handles cross-file calls like `crate::config::load` or + // `super::handlers::process` where the prefix differs. + if target.contains("::") { + // Strip common Rust path prefixes + let stripped = target + .strip_prefix("crate::") + .or_else(|| target.strip_prefix("super::")) + .or_else(|| target.strip_prefix("self::")) + .unwrap_or(target); + + let segments: Vec<&str> = stripped.split("::").collect(); + // Try matching progressively shorter suffixes + for start in 0..segments.len() { + let suffix = segments[start..].join("::"); + let pattern = format!("::{suffix}"); + for (qualified, idx) in node_map { + if qualified.ends_with(&pattern) { + return Some(*idx); + } + } + } + } + + // 4. Self-method: "self.method" → "::method" if let Some(method_name) = target.strip_prefix("self.") { + let pattern = format!("::{method_name}"); for (qualified, idx) in node_map { - if qualified.ends_with(&format!("::{method_name}")) - || qualified.ends_with(&format!(".{method_name}")) - { + if qualified.ends_with(&pattern) { return Some(*idx); } } @@ -353,4 +450,83 @@ mod tests { assert!(code_graph.node_map.contains_key("a::c")); assert!(code_graph.node_map.contains_key("a::d")); } + + #[test] + fn test_contains_edges_synthesised() { + let engine = GraphEngine::new(1000); + let mut output = ParseOutput::default(); + // File → Module → Function hierarchy + output.nodes.push(make_node("src/main.rs")); + output.nodes.push(make_node("src/main.rs::config")); + output.nodes.push(make_node("src/main.rs::config::load")); + + let code_graph = engine.build_petgraph(output).unwrap(); + + // Should have 2 Contains edges: + // src/main.rs → src/main.rs::config + // src/main.rs::config → src/main.rs::config::load + let contains_edges: Vec<_> = code_graph + .edges + .iter() + .filter(|e| matches!(e.kind, CodeEdgeKind::Contains)) + .collect(); + assert_eq!(contains_edges.len(), 2, "expected 2 Contains edges"); + + let sources: Vec<&str> = contains_edges.iter().map(|e| e.source.as_str()).collect(); + assert!(sources.contains(&"src/main.rs")); + assert!(sources.contains(&"src/main.rs::config")); + } + + #[test] + fn test_contains_edges_no_duplicates_with_existing_edges() { + let engine = GraphEngine::new(1000); + let mut output = ParseOutput::default(); + output.nodes.push(make_node("src/main.rs")); + output.nodes.push(make_node("src/main.rs::foo")); + + // Explicit Calls edge (foo calls itself? just for testing) + output.edges.push(CodeEdge { + id: None, + repo_id: "test".to_string(), + graph_build_id: "build1".to_string(), + source: "src/main.rs::foo".to_string(), + target: "src/main.rs::foo".to_string(), + kind: CodeEdgeKind::Calls, + file_path: "src/main.rs".to_string(), + line_number: Some(1), + }); + + let code_graph = engine.build_petgraph(output).unwrap(); + + // 1 Calls + 1 Contains = 2 edges total + assert_eq!(code_graph.edges.len(), 2); + } + + #[test] + fn test_cross_file_resolution_with_module_path() { + let engine = GraphEngine::new(1000); + let node_map = build_test_node_map(&["src/config.rs::load_config", "src/main.rs::main"]); + // "crate::config::load_config" should resolve to "src/config.rs::load_config" + let result = engine.resolve_edge_target("crate::config::load_config", &node_map); + assert!(result.is_some(), "cross-file crate:: path should resolve"); + } + + #[test] + fn test_find_parent_qname() { + let node_map = build_test_node_map(&[ + "src/main.rs", + "src/main.rs::config", + "src/main.rs::config::load", + ]); + + assert_eq!( + find_parent_qname("src/main.rs::config::load", &node_map), + Some("src/main.rs::config".to_string()) + ); + assert_eq!( + find_parent_qname("src/main.rs::config", &node_map), + Some("src/main.rs".to_string()) + ); + assert_eq!(find_parent_qname("src/main.rs", &node_map), None); + } } -- 2.49.1 From d418f8386f2524d29c51d10a78bc3e2a1ddad87e Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:51:39 +0200 Subject: [PATCH 3/3] fix: exclude E2E tests from regular CI (no MongoDB available) The E2E tests require MongoDB and only run in the nightly workflow. Use --lib flag to run only unit tests in the regular CI check job. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index bfb37d9..673e175 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -70,7 +70,7 @@ jobs: # Tests (reuses compilation artifacts from clippy) - name: Tests (core + agent) - run: cargo test -p compliance-core -p compliance-agent + run: cargo test -p compliance-core -p compliance-agent --lib - name: Tests (dashboard server) run: cargo test -p compliance-dashboard --features server --no-default-features - name: Tests (dashboard web) -- 2.49.1