Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #1
256 lines
8.4 KiB
Rust
256 lines
8.4 KiB
Rust
use std::collections::HashMap;
|
|
|
|
use petgraph::graph::NodeIndex;
|
|
use petgraph::visit::EdgeRef;
|
|
use tracing::info;
|
|
|
|
use super::engine::CodeGraph;
|
|
|
|
/// Run Louvain community detection on the code graph.
|
|
/// Returns the number of communities detected.
|
|
/// Mutates node community_id in place.
|
|
pub fn detect_communities(code_graph: &CodeGraph) -> u32 {
|
|
let graph = &code_graph.graph;
|
|
let node_count = graph.node_count();
|
|
|
|
if node_count == 0 {
|
|
return 0;
|
|
}
|
|
|
|
// Initialize: each node in its own community
|
|
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
|
|
for idx in graph.node_indices() {
|
|
community.insert(idx, idx.index() as u32);
|
|
}
|
|
|
|
// Compute total edge weight (all edges weight 1.0)
|
|
let total_edges = graph.edge_count() as f64;
|
|
if total_edges == 0.0 {
|
|
// All nodes are isolated, each is its own community
|
|
return node_count as u32;
|
|
}
|
|
|
|
let m2 = 2.0 * total_edges;
|
|
|
|
// Pre-compute node degrees
|
|
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
|
|
for idx in graph.node_indices() {
|
|
let d = graph.edges(idx).count() as f64;
|
|
degree.insert(idx, d);
|
|
}
|
|
|
|
// Louvain phase 1: local moves
|
|
let mut improved = true;
|
|
let mut iterations = 0;
|
|
let max_iterations = 50;
|
|
|
|
while improved && iterations < max_iterations {
|
|
improved = false;
|
|
iterations += 1;
|
|
|
|
for node in graph.node_indices() {
|
|
let current_comm = community[&node];
|
|
let node_deg = degree[&node];
|
|
|
|
// Compute edges to each neighboring community
|
|
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
|
|
for edge in graph.edges(node) {
|
|
let neighbor = edge.target();
|
|
let neighbor_comm = community[&neighbor];
|
|
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
|
}
|
|
// Also check incoming edges (undirected treatment)
|
|
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
|
|
let neighbor = edge.source();
|
|
let neighbor_comm = community[&neighbor];
|
|
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
|
}
|
|
|
|
// Compute community totals (sum of degrees in each community)
|
|
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
|
|
for (n, &c) in &community {
|
|
*comm_totals.entry(c).or_insert(0.0) += degree[n];
|
|
}
|
|
|
|
// Find best community
|
|
let current_total = comm_totals.get(¤t_comm).copied().unwrap_or(0.0);
|
|
let edges_to_current = comm_edges.get(¤t_comm).copied().unwrap_or(0.0);
|
|
|
|
// Modularity gain from removing node from current community
|
|
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
|
|
|
|
let mut best_comm = current_comm;
|
|
let mut best_gain = 0.0;
|
|
|
|
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
|
|
if candidate_comm == current_comm {
|
|
continue;
|
|
}
|
|
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
|
|
|
|
// Modularity gain from adding node to candidate community
|
|
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
|
|
let gain = add_gain - remove_cost;
|
|
|
|
if gain > best_gain {
|
|
best_gain = gain;
|
|
best_comm = candidate_comm;
|
|
}
|
|
}
|
|
|
|
if best_comm != current_comm {
|
|
community.insert(node, best_comm);
|
|
improved = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Renumber communities to be contiguous
|
|
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
|
|
let mut next_id: u32 = 0;
|
|
for &c in community.values() {
|
|
if let std::collections::hash_map::Entry::Vacant(e) = comm_remap.entry(c) {
|
|
e.insert(next_id);
|
|
next_id += 1;
|
|
}
|
|
}
|
|
|
|
// Apply to community map
|
|
for c in community.values_mut() {
|
|
if let Some(&new_id) = comm_remap.get(c) {
|
|
*c = new_id;
|
|
}
|
|
}
|
|
|
|
let num_communities = next_id;
|
|
info!(
|
|
communities = num_communities,
|
|
iterations, "Community detection complete"
|
|
);
|
|
|
|
// NOTE: community IDs are stored in the HashMap but need to be applied
|
|
// back to the CodeGraph nodes by the caller (engine) if needed for persistence.
|
|
// For now we return the count; the full assignment is available via the map.
|
|
|
|
num_communities
|
|
}
|
|
|
|
/// Apply community assignments back to code nodes
|
|
pub fn apply_communities(code_graph: &mut CodeGraph) -> u32 {
|
|
detect_communities_with_assignment(code_graph)
|
|
}
|
|
|
|
/// Detect communities and write assignments into the nodes
|
|
fn detect_communities_with_assignment(code_graph: &mut CodeGraph) -> u32 {
|
|
let graph = &code_graph.graph;
|
|
let node_count = graph.node_count();
|
|
|
|
if node_count == 0 {
|
|
return 0;
|
|
}
|
|
|
|
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
|
|
for idx in graph.node_indices() {
|
|
community.insert(idx, idx.index() as u32);
|
|
}
|
|
|
|
let total_edges = graph.edge_count() as f64;
|
|
if total_edges == 0.0 {
|
|
for node in &mut code_graph.nodes {
|
|
if let Some(gi) = node.graph_index {
|
|
node.community_id = Some(gi);
|
|
}
|
|
}
|
|
return node_count as u32;
|
|
}
|
|
|
|
let m2 = 2.0 * total_edges;
|
|
|
|
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
|
|
for idx in graph.node_indices() {
|
|
let d = (graph.edges(idx).count()
|
|
+ graph
|
|
.edges_directed(idx, petgraph::Direction::Incoming)
|
|
.count()) as f64;
|
|
degree.insert(idx, d);
|
|
}
|
|
|
|
let mut improved = true;
|
|
let mut iterations = 0;
|
|
let max_iterations = 50;
|
|
|
|
while improved && iterations < max_iterations {
|
|
improved = false;
|
|
iterations += 1;
|
|
|
|
for node in graph.node_indices() {
|
|
let current_comm = community[&node];
|
|
let node_deg = degree[&node];
|
|
|
|
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
|
|
for edge in graph.edges(node) {
|
|
let neighbor_comm = community[&edge.target()];
|
|
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
|
}
|
|
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
|
|
let neighbor_comm = community[&edge.source()];
|
|
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
|
}
|
|
|
|
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
|
|
for (n, &c) in &community {
|
|
*comm_totals.entry(c).or_insert(0.0) += degree[n];
|
|
}
|
|
|
|
let current_total = comm_totals.get(¤t_comm).copied().unwrap_or(0.0);
|
|
let edges_to_current = comm_edges.get(¤t_comm).copied().unwrap_or(0.0);
|
|
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
|
|
|
|
let mut best_comm = current_comm;
|
|
let mut best_gain = 0.0;
|
|
|
|
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
|
|
if candidate_comm == current_comm {
|
|
continue;
|
|
}
|
|
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
|
|
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
|
|
let gain = add_gain - remove_cost;
|
|
|
|
if gain > best_gain {
|
|
best_gain = gain;
|
|
best_comm = candidate_comm;
|
|
}
|
|
}
|
|
|
|
if best_comm != current_comm {
|
|
community.insert(node, best_comm);
|
|
improved = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Renumber
|
|
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
|
|
let mut next_id: u32 = 0;
|
|
for &c in community.values() {
|
|
if let std::collections::hash_map::Entry::Vacant(e) = comm_remap.entry(c) {
|
|
e.insert(next_id);
|
|
next_id += 1;
|
|
}
|
|
}
|
|
|
|
// Apply to nodes
|
|
for node in &mut code_graph.nodes {
|
|
if let Some(gi) = node.graph_index {
|
|
let idx = NodeIndex::new(gi as usize);
|
|
if let Some(&comm) = community.get(&idx) {
|
|
let remapped = comm_remap.get(&comm).copied().unwrap_or(comm);
|
|
node.community_id = Some(remapped);
|
|
}
|
|
}
|
|
}
|
|
|
|
next_id
|
|
}
|