Files
compliance-scanner-agent/compliance-graph/src/graph/community.rs
Sharang Parnerkar 42cabf0582
All checks were successful
CI / Format (push) Successful in 2s
CI / Clippy (push) Successful in 2m56s
CI / Security Audit (push) Successful in 1m25s
CI / Tests (push) Successful in 3m57s
feat: rag-embedding-ai-chat (#1)
Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: #1
2026-03-06 21:54:15 +00:00

256 lines
8.4 KiB
Rust

use std::collections::HashMap;
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use tracing::info;
use super::engine::CodeGraph;
/// Run Louvain community detection on the code graph.
/// Returns the number of communities detected.
/// Mutates node community_id in place.
pub fn detect_communities(code_graph: &CodeGraph) -> u32 {
let graph = &code_graph.graph;
let node_count = graph.node_count();
if node_count == 0 {
return 0;
}
// Initialize: each node in its own community
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
for idx in graph.node_indices() {
community.insert(idx, idx.index() as u32);
}
// Compute total edge weight (all edges weight 1.0)
let total_edges = graph.edge_count() as f64;
if total_edges == 0.0 {
// All nodes are isolated, each is its own community
return node_count as u32;
}
let m2 = 2.0 * total_edges;
// Pre-compute node degrees
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
for idx in graph.node_indices() {
let d = graph.edges(idx).count() as f64;
degree.insert(idx, d);
}
// Louvain phase 1: local moves
let mut improved = true;
let mut iterations = 0;
let max_iterations = 50;
while improved && iterations < max_iterations {
improved = false;
iterations += 1;
for node in graph.node_indices() {
let current_comm = community[&node];
let node_deg = degree[&node];
// Compute edges to each neighboring community
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
for edge in graph.edges(node) {
let neighbor = edge.target();
let neighbor_comm = community[&neighbor];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
// Also check incoming edges (undirected treatment)
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
let neighbor = edge.source();
let neighbor_comm = community[&neighbor];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
// Compute community totals (sum of degrees in each community)
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
for (n, &c) in &community {
*comm_totals.entry(c).or_insert(0.0) += degree[n];
}
// Find best community
let current_total = comm_totals.get(&current_comm).copied().unwrap_or(0.0);
let edges_to_current = comm_edges.get(&current_comm).copied().unwrap_or(0.0);
// Modularity gain from removing node from current community
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
let mut best_comm = current_comm;
let mut best_gain = 0.0;
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
if candidate_comm == current_comm {
continue;
}
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
// Modularity gain from adding node to candidate community
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
let gain = add_gain - remove_cost;
if gain > best_gain {
best_gain = gain;
best_comm = candidate_comm;
}
}
if best_comm != current_comm {
community.insert(node, best_comm);
improved = true;
}
}
}
// Renumber communities to be contiguous
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
let mut next_id: u32 = 0;
for &c in community.values() {
if let std::collections::hash_map::Entry::Vacant(e) = comm_remap.entry(c) {
e.insert(next_id);
next_id += 1;
}
}
// Apply to community map
for c in community.values_mut() {
if let Some(&new_id) = comm_remap.get(c) {
*c = new_id;
}
}
let num_communities = next_id;
info!(
communities = num_communities,
iterations, "Community detection complete"
);
// NOTE: community IDs are stored in the HashMap but need to be applied
// back to the CodeGraph nodes by the caller (engine) if needed for persistence.
// For now we return the count; the full assignment is available via the map.
num_communities
}
/// Apply community assignments back to code nodes
pub fn apply_communities(code_graph: &mut CodeGraph) -> u32 {
detect_communities_with_assignment(code_graph)
}
/// Detect communities and write assignments into the nodes
fn detect_communities_with_assignment(code_graph: &mut CodeGraph) -> u32 {
let graph = &code_graph.graph;
let node_count = graph.node_count();
if node_count == 0 {
return 0;
}
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
for idx in graph.node_indices() {
community.insert(idx, idx.index() as u32);
}
let total_edges = graph.edge_count() as f64;
if total_edges == 0.0 {
for node in &mut code_graph.nodes {
if let Some(gi) = node.graph_index {
node.community_id = Some(gi);
}
}
return node_count as u32;
}
let m2 = 2.0 * total_edges;
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
for idx in graph.node_indices() {
let d = (graph.edges(idx).count()
+ graph
.edges_directed(idx, petgraph::Direction::Incoming)
.count()) as f64;
degree.insert(idx, d);
}
let mut improved = true;
let mut iterations = 0;
let max_iterations = 50;
while improved && iterations < max_iterations {
improved = false;
iterations += 1;
for node in graph.node_indices() {
let current_comm = community[&node];
let node_deg = degree[&node];
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
for edge in graph.edges(node) {
let neighbor_comm = community[&edge.target()];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
let neighbor_comm = community[&edge.source()];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
for (n, &c) in &community {
*comm_totals.entry(c).or_insert(0.0) += degree[n];
}
let current_total = comm_totals.get(&current_comm).copied().unwrap_or(0.0);
let edges_to_current = comm_edges.get(&current_comm).copied().unwrap_or(0.0);
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
let mut best_comm = current_comm;
let mut best_gain = 0.0;
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
if candidate_comm == current_comm {
continue;
}
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
let gain = add_gain - remove_cost;
if gain > best_gain {
best_gain = gain;
best_comm = candidate_comm;
}
}
if best_comm != current_comm {
community.insert(node, best_comm);
improved = true;
}
}
}
// Renumber
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
let mut next_id: u32 = 0;
for &c in community.values() {
if let std::collections::hash_map::Entry::Vacant(e) = comm_remap.entry(c) {
e.insert(next_id);
next_id += 1;
}
}
// Apply to nodes
for node in &mut code_graph.nodes {
if let Some(gi) = node.graph_index {
let idx = NodeIndex::new(gi as usize);
if let Some(&comm) = community.get(&idx) {
let remapped = comm_remap.get(&comm).copied().unwrap_or(comm);
node.community_id = Some(remapped);
}
}
}
next_id
}