Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #1
175 lines
5.6 KiB
Rust
175 lines
5.6 KiB
Rust
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
|
|
use chrono::Utc;
|
|
use compliance_core::error::CoreError;
|
|
use compliance_core::models::graph::{
|
|
CodeEdge, CodeEdgeKind, CodeNode, GraphBuildRun, GraphBuildStatus,
|
|
};
|
|
use compliance_core::traits::graph_builder::ParseOutput;
|
|
use petgraph::graph::{DiGraph, NodeIndex};
|
|
use tracing::info;
|
|
|
|
use crate::parsers::registry::ParserRegistry;
|
|
|
|
use super::community::detect_communities;
|
|
use super::impact::ImpactAnalyzer;
|
|
|
|
/// The main graph engine that builds and manages code knowledge graphs
|
|
pub struct GraphEngine {
|
|
parser_registry: ParserRegistry,
|
|
max_nodes: u32,
|
|
}
|
|
|
|
/// In-memory representation of a built code graph
|
|
pub struct CodeGraph {
|
|
pub graph: DiGraph<String, CodeEdgeKind>,
|
|
pub node_map: HashMap<String, NodeIndex>,
|
|
pub nodes: Vec<CodeNode>,
|
|
pub edges: Vec<CodeEdge>,
|
|
}
|
|
|
|
impl GraphEngine {
|
|
pub fn new(max_nodes: u32) -> Self {
|
|
Self {
|
|
parser_registry: ParserRegistry::new(),
|
|
max_nodes,
|
|
}
|
|
}
|
|
|
|
/// Build a code graph from a repository directory
|
|
pub fn build_graph(
|
|
&self,
|
|
repo_path: &Path,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
) -> Result<(CodeGraph, GraphBuildRun), CoreError> {
|
|
let mut build_run = GraphBuildRun::new(repo_id.to_string());
|
|
|
|
info!(repo_id, path = %repo_path.display(), "Starting graph build");
|
|
|
|
// Phase 1: Parse all files
|
|
let parse_output = self.parser_registry.parse_directory(
|
|
repo_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
self.max_nodes,
|
|
)?;
|
|
|
|
// Phase 2: Build petgraph
|
|
let code_graph = self.build_petgraph(parse_output)?;
|
|
|
|
// Phase 3: Run community detection
|
|
let community_count = detect_communities(&code_graph);
|
|
|
|
// Collect language stats
|
|
let mut languages: Vec<String> = code_graph
|
|
.nodes
|
|
.iter()
|
|
.map(|n| n.language.clone())
|
|
.collect::<std::collections::HashSet<_>>()
|
|
.into_iter()
|
|
.collect();
|
|
languages.sort();
|
|
|
|
build_run.node_count = code_graph.nodes.len() as u32;
|
|
build_run.edge_count = code_graph.edges.len() as u32;
|
|
build_run.community_count = community_count;
|
|
build_run.languages_parsed = languages;
|
|
build_run.status = GraphBuildStatus::Completed;
|
|
build_run.completed_at = Some(Utc::now());
|
|
|
|
info!(
|
|
nodes = build_run.node_count,
|
|
edges = build_run.edge_count,
|
|
communities = build_run.community_count,
|
|
"Graph build complete"
|
|
);
|
|
|
|
Ok((code_graph, build_run))
|
|
}
|
|
|
|
/// Build petgraph from parsed output, resolving edges to node indices
|
|
fn build_petgraph(&self, parse_output: ParseOutput) -> Result<CodeGraph, CoreError> {
|
|
let mut graph = DiGraph::new();
|
|
let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
|
|
let mut nodes = parse_output.nodes;
|
|
|
|
// Add all nodes to the graph
|
|
for node in &mut nodes {
|
|
let idx = graph.add_node(node.qualified_name.clone());
|
|
node.graph_index = Some(idx.index() as u32);
|
|
node_map.insert(node.qualified_name.clone(), idx);
|
|
}
|
|
|
|
// Resolve and add edges — rewrite target to the resolved qualified name
|
|
// so the persisted edge references match node qualified_names.
|
|
let mut resolved_edges = Vec::new();
|
|
for mut edge in parse_output.edges {
|
|
let source_idx = node_map.get(&edge.source);
|
|
let resolved = self.resolve_edge_target(&edge.target, &node_map);
|
|
|
|
if let (Some(&src), Some(tgt)) = (source_idx, resolved) {
|
|
// Update target to the resolved qualified name
|
|
let resolved_name = node_map
|
|
.iter()
|
|
.find(|(_, &idx)| idx == tgt)
|
|
.map(|(name, _)| name.clone());
|
|
if let Some(name) = resolved_name {
|
|
edge.target = name;
|
|
}
|
|
graph.add_edge(src, tgt, edge.kind.clone());
|
|
resolved_edges.push(edge);
|
|
}
|
|
// Skip unresolved edges (cross-file, external deps) — conservative approach
|
|
}
|
|
|
|
Ok(CodeGraph {
|
|
graph,
|
|
node_map,
|
|
nodes,
|
|
edges: resolved_edges,
|
|
})
|
|
}
|
|
|
|
/// Try to resolve an edge target to a known node
|
|
fn resolve_edge_target(
|
|
&self,
|
|
target: &str,
|
|
node_map: &HashMap<String, NodeIndex>,
|
|
) -> Option<NodeIndex> {
|
|
// Direct match
|
|
if let Some(idx) = node_map.get(target) {
|
|
return Some(*idx);
|
|
}
|
|
|
|
// Try matching just the function/type name (intra-file resolution)
|
|
for (qualified, idx) in node_map {
|
|
// Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo"
|
|
if qualified.ends_with(&format!("::{target}"))
|
|
|| qualified.ends_with(&format!(".{target}"))
|
|
{
|
|
return Some(*idx);
|
|
}
|
|
}
|
|
|
|
// Try matching method calls like "self.method" -> look for "::method"
|
|
if let Some(method_name) = target.strip_prefix("self.") {
|
|
for (qualified, idx) in node_map {
|
|
if qualified.ends_with(&format!("::{method_name}"))
|
|
|| qualified.ends_with(&format!(".{method_name}"))
|
|
{
|
|
return Some(*idx);
|
|
}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
/// Get the impact analyzer for a built graph
|
|
pub fn impact_analyzer(code_graph: &CodeGraph) -> ImpactAnalyzer<'_> {
|
|
ImpactAnalyzer::new(code_graph)
|
|
}
|
|
}
|