Files
compliance-scanner-agent/compliance-agent/src/database.rs
Sharang Parnerkar 89c30a62dd
Some checks failed
CI / Security Audit (push) Has been cancelled
CI / Tests (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Format (push) Failing after 3s
Add RAG embedding and AI chat feature
Implement end-to-end RAG pipeline: AST-aware code chunking, LiteLLM
embedding generation, MongoDB vector storage with brute-force cosine
similarity fallback for self-hosted instances, and a chat API with
RAG-augmented responses. Add dedicated /chat/:repo_id dashboard page
with embedding build controls, message history, and source reference
cards.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 23:29:40 +01:00

248 lines
7.6 KiB
Rust

use mongodb::bson::doc;
use mongodb::options::IndexOptions;
use mongodb::{Client, Collection, IndexModel};
use compliance_core::models::*;
use crate::error::AgentError;
#[derive(Clone, Debug)]
pub struct Database {
inner: mongodb::Database,
}
impl Database {
pub async fn connect(uri: &str, db_name: &str) -> Result<Self, AgentError> {
let client = Client::with_uri_str(uri).await?;
let db = client.database(db_name);
db.run_command(doc! { "ping": 1 }).await?;
tracing::info!("Connected to MongoDB database '{db_name}'");
Ok(Self { inner: db })
}
pub async fn ensure_indexes(&self) -> Result<(), AgentError> {
// repositories: unique git_url
self.repositories()
.create_index(
IndexModel::builder()
.keys(doc! { "git_url": 1 })
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
// findings: unique fingerprint
self.findings()
.create_index(
IndexModel::builder()
.keys(doc! { "fingerprint": 1 })
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
// findings: repo_id + severity compound
self.findings()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "severity": 1 })
.build(),
)
.await?;
// scan_runs: repo_id + started_at descending
self.scan_runs()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "started_at": -1 })
.build(),
)
.await?;
// sbom_entries: compound
self.sbom_entries()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "name": 1, "version": 1 })
.build(),
)
.await?;
// cve_alerts: unique cve_id + repo_id
self.cve_alerts()
.create_index(
IndexModel::builder()
.keys(doc! { "cve_id": 1, "repo_id": 1 })
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
// tracker_issues: unique finding_id
self.tracker_issues()
.create_index(
IndexModel::builder()
.keys(doc! { "finding_id": 1 })
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
// graph_nodes: compound (repo_id, graph_build_id)
self.graph_nodes()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
.build(),
)
.await?;
// graph_edges: compound (repo_id, graph_build_id)
self.graph_edges()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
.build(),
)
.await?;
// graph_builds: compound (repo_id, started_at DESC)
self.graph_builds()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "started_at": -1 })
.build(),
)
.await?;
// impact_analyses: unique (repo_id, finding_id)
self.impact_analyses()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "finding_id": 1 })
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
// dast_targets: index on repo_id
self.dast_targets()
.create_index(IndexModel::builder().keys(doc! { "repo_id": 1 }).build())
.await?;
// dast_scan_runs: compound (target_id, started_at DESC)
self.dast_scan_runs()
.create_index(
IndexModel::builder()
.keys(doc! { "target_id": 1, "started_at": -1 })
.build(),
)
.await?;
// dast_findings: compound (scan_run_id, vuln_type)
self.dast_findings()
.create_index(
IndexModel::builder()
.keys(doc! { "scan_run_id": 1, "vuln_type": 1 })
.build(),
)
.await?;
// code_embeddings: compound (repo_id, graph_build_id)
self.code_embeddings()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
.build(),
)
.await?;
// embedding_builds: compound (repo_id, started_at DESC)
self.embedding_builds()
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "started_at": -1 })
.build(),
)
.await?;
tracing::info!("Database indexes ensured");
Ok(())
}
pub fn repositories(&self) -> Collection<TrackedRepository> {
self.inner.collection("repositories")
}
pub fn findings(&self) -> Collection<Finding> {
self.inner.collection("findings")
}
pub fn scan_runs(&self) -> Collection<ScanRun> {
self.inner.collection("scan_runs")
}
pub fn sbom_entries(&self) -> Collection<SbomEntry> {
self.inner.collection("sbom_entries")
}
pub fn cve_alerts(&self) -> Collection<CveAlert> {
self.inner.collection("cve_alerts")
}
pub fn tracker_issues(&self) -> Collection<TrackerIssue> {
self.inner.collection("tracker_issues")
}
// Graph collections
pub fn graph_nodes(&self) -> Collection<compliance_core::models::graph::CodeNode> {
self.inner.collection("graph_nodes")
}
pub fn graph_edges(&self) -> Collection<compliance_core::models::graph::CodeEdge> {
self.inner.collection("graph_edges")
}
pub fn graph_builds(&self) -> Collection<compliance_core::models::graph::GraphBuildRun> {
self.inner.collection("graph_builds")
}
pub fn impact_analyses(&self) -> Collection<compliance_core::models::graph::ImpactAnalysis> {
self.inner.collection("impact_analyses")
}
// DAST collections
pub fn dast_targets(&self) -> Collection<DastTarget> {
self.inner.collection("dast_targets")
}
pub fn dast_scan_runs(&self) -> Collection<DastScanRun> {
self.inner.collection("dast_scan_runs")
}
pub fn dast_findings(&self) -> Collection<DastFinding> {
self.inner.collection("dast_findings")
}
// Embedding collections
pub fn code_embeddings(&self) -> Collection<compliance_core::models::embedding::CodeEmbedding> {
self.inner.collection("code_embeddings")
}
pub fn embedding_builds(
&self,
) -> Collection<compliance_core::models::embedding::EmbeddingBuildRun> {
self.inner.collection("embedding_builds")
}
#[allow(dead_code)]
pub fn raw_collection(&self, name: &str) -> Collection<mongodb::bson::Document> {
self.inner.collection(name)
}
/// Get the raw MongoDB database handle (for graph persistence)
pub fn inner(&self) -> &mongodb::Database {
&self.inner
}
}