fix: synthesise Contains edges and improve cross-file resolution in code graph
Some checks failed
CI / Check (pull_request) Failing after 12m5s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
Some checks failed
CI / Check (pull_request) Failing after 12m5s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
The code graph produced disconnected "islands" because: 1. No Contains edges were created between File/Module nodes and their children (functions, classes, structs), leaving file nodes isolated 2. Cross-file call resolution was too strict — calls like `crate::config::load` failed to resolve to `src/config.rs::load` Fix: - After resolving explicit parser edges, synthesise Contains edges by walking each node's qualified-name hierarchy and linking to the closest ancestor that exists in the node map - Improve edge resolution with module-path matching: strip Rust prefixes (crate::, super::, self::) and try progressively shorter suffix matches for cross-file calls Adds 4 new tests covering Contains edge synthesis, dedup with existing edges, cross-file module path resolution, and parent qname lookup. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,30 @@ use crate::parsers::registry::ParserRegistry;
|
||||
use super::community::detect_communities;
|
||||
use super::impact::ImpactAnalyzer;
|
||||
|
||||
/// Walk up the qualified-name hierarchy to find the closest ancestor
|
||||
/// that exists in the node map.
|
||||
///
|
||||
/// For `"src/main.rs::config::load"` this tries:
|
||||
/// 1. `"src/main.rs::config"` (trim last `::` segment)
|
||||
/// 2. `"src/main.rs"` (trim again)
|
||||
///
|
||||
/// Returns the first match found, or `None` if the node is a root.
|
||||
fn find_parent_qname(qname: &str, node_map: &HashMap<String, NodeIndex>) -> Option<String> {
|
||||
let mut current = qname.to_string();
|
||||
loop {
|
||||
// Try stripping the last "::" segment
|
||||
if let Some(pos) = current.rfind("::") {
|
||||
current.truncate(pos);
|
||||
if node_map.contains_key(¤t) {
|
||||
return Some(current);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// No more "::" — this is a top-level node (file), no parent
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
/// The main graph engine that builds and manages code knowledge graphs
|
||||
pub struct GraphEngine {
|
||||
parser_registry: ParserRegistry,
|
||||
@@ -89,7 +113,12 @@ impl GraphEngine {
|
||||
Ok((code_graph, build_run))
|
||||
}
|
||||
|
||||
/// Build petgraph from parsed output, resolving edges to node indices
|
||||
/// Build petgraph from parsed output, resolving edges to node indices.
|
||||
///
|
||||
/// After resolving the explicit edges from parsers, we synthesise
|
||||
/// `Contains` edges so that every node is reachable from its parent
|
||||
/// file or module. This eliminates disconnected "islands" that
|
||||
/// otherwise appear when files share no direct call/import edges.
|
||||
fn build_petgraph(&self, parse_output: ParseOutput) -> Result<CodeGraph, CoreError> {
|
||||
let mut graph = DiGraph::new();
|
||||
let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
|
||||
@@ -102,15 +131,13 @@ impl GraphEngine {
|
||||
node_map.insert(node.qualified_name.clone(), idx);
|
||||
}
|
||||
|
||||
// Resolve and add edges — rewrite target to the resolved qualified name
|
||||
// so the persisted edge references match node qualified_names.
|
||||
// Resolve and add explicit edges from parsers
|
||||
let mut resolved_edges = Vec::new();
|
||||
for mut edge in parse_output.edges {
|
||||
let source_idx = node_map.get(&edge.source);
|
||||
let resolved = self.resolve_edge_target(&edge.target, &node_map);
|
||||
|
||||
if let (Some(&src), Some(tgt)) = (source_idx, resolved) {
|
||||
// Update target to the resolved qualified name
|
||||
let resolved_name = node_map
|
||||
.iter()
|
||||
.find(|(_, &idx)| idx == tgt)
|
||||
@@ -121,7 +148,48 @@ impl GraphEngine {
|
||||
graph.add_edge(src, tgt, edge.kind.clone());
|
||||
resolved_edges.push(edge);
|
||||
}
|
||||
// Skip unresolved edges (cross-file, external deps) — conservative approach
|
||||
}
|
||||
|
||||
// Synthesise Contains edges: connect each node to its closest
|
||||
// parent in the qualified-name hierarchy.
|
||||
//
|
||||
// For "src/main.rs::config::load", the parent chain is:
|
||||
// "src/main.rs::config" → "src/main.rs"
|
||||
//
|
||||
// We walk up the qualified name (splitting on "::") and link to
|
||||
// the first ancestor that exists in the node map.
|
||||
let repo_id = nodes.first().map(|n| n.repo_id.as_str()).unwrap_or("");
|
||||
let build_id = nodes
|
||||
.first()
|
||||
.map(|n| n.graph_build_id.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
let qualified_names: Vec<String> = nodes.iter().map(|n| n.qualified_name.clone()).collect();
|
||||
let file_paths: HashMap<String, String> = nodes
|
||||
.iter()
|
||||
.map(|n| (n.qualified_name.clone(), n.file_path.clone()))
|
||||
.collect();
|
||||
|
||||
for qname in &qualified_names {
|
||||
if let Some(parent_qname) = find_parent_qname(qname, &node_map) {
|
||||
let child_idx = node_map[qname];
|
||||
let parent_idx = node_map[&parent_qname];
|
||||
|
||||
// Avoid duplicate edges
|
||||
if !graph.contains_edge(parent_idx, child_idx) {
|
||||
graph.add_edge(parent_idx, child_idx, CodeEdgeKind::Contains);
|
||||
resolved_edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: build_id.to_string(),
|
||||
source: parent_qname,
|
||||
target: qname.clone(),
|
||||
kind: CodeEdgeKind::Contains,
|
||||
file_path: file_paths.get(qname).cloned().unwrap_or_default(),
|
||||
line_number: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(CodeGraph {
|
||||
@@ -132,33 +200,62 @@ impl GraphEngine {
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to resolve an edge target to a known node
|
||||
/// Try to resolve an edge target to a known node.
|
||||
///
|
||||
/// Resolution strategies (in order):
|
||||
/// 1. Direct qualified-name match
|
||||
/// 2. Suffix match: "foo" matches "src/main.rs::mod::foo"
|
||||
/// 3. Module-path match: "config::load" matches "src/config.rs::load"
|
||||
/// 4. Self-method: "self.method" matches "::method"
|
||||
fn resolve_edge_target(
|
||||
&self,
|
||||
target: &str,
|
||||
node_map: &HashMap<String, NodeIndex>,
|
||||
) -> Option<NodeIndex> {
|
||||
// Direct match
|
||||
// 1. Direct match
|
||||
if let Some(idx) = node_map.get(target) {
|
||||
return Some(*idx);
|
||||
}
|
||||
|
||||
// Try matching just the function/type name (intra-file resolution)
|
||||
// 2. Suffix match: "foo" → "path/file.rs::foo"
|
||||
let suffix_pattern = format!("::{target}");
|
||||
let dot_pattern = format!(".{target}");
|
||||
for (qualified, idx) in node_map {
|
||||
// Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo"
|
||||
if qualified.ends_with(&format!("::{target}"))
|
||||
|| qualified.ends_with(&format!(".{target}"))
|
||||
{
|
||||
if qualified.ends_with(&suffix_pattern) || qualified.ends_with(&dot_pattern) {
|
||||
return Some(*idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Try matching method calls like "self.method" -> look for "::method"
|
||||
// 3. Module-path match: "config::load" → try matching the last N
|
||||
// segments of the target against node qualified names.
|
||||
// This handles cross-file calls like `crate::config::load` or
|
||||
// `super::handlers::process` where the prefix differs.
|
||||
if target.contains("::") {
|
||||
// Strip common Rust path prefixes
|
||||
let stripped = target
|
||||
.strip_prefix("crate::")
|
||||
.or_else(|| target.strip_prefix("super::"))
|
||||
.or_else(|| target.strip_prefix("self::"))
|
||||
.unwrap_or(target);
|
||||
|
||||
let segments: Vec<&str> = stripped.split("::").collect();
|
||||
// Try matching progressively shorter suffixes
|
||||
for start in 0..segments.len() {
|
||||
let suffix = segments[start..].join("::");
|
||||
let pattern = format!("::{suffix}");
|
||||
for (qualified, idx) in node_map {
|
||||
if qualified.ends_with(&pattern) {
|
||||
return Some(*idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Self-method: "self.method" → "::method"
|
||||
if let Some(method_name) = target.strip_prefix("self.") {
|
||||
let pattern = format!("::{method_name}");
|
||||
for (qualified, idx) in node_map {
|
||||
if qualified.ends_with(&format!("::{method_name}"))
|
||||
|| qualified.ends_with(&format!(".{method_name}"))
|
||||
{
|
||||
if qualified.ends_with(&pattern) {
|
||||
return Some(*idx);
|
||||
}
|
||||
}
|
||||
@@ -353,4 +450,83 @@ mod tests {
|
||||
assert!(code_graph.node_map.contains_key("a::c"));
|
||||
assert!(code_graph.node_map.contains_key("a::d"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contains_edges_synthesised() {
|
||||
let engine = GraphEngine::new(1000);
|
||||
let mut output = ParseOutput::default();
|
||||
// File → Module → Function hierarchy
|
||||
output.nodes.push(make_node("src/main.rs"));
|
||||
output.nodes.push(make_node("src/main.rs::config"));
|
||||
output.nodes.push(make_node("src/main.rs::config::load"));
|
||||
|
||||
let code_graph = engine.build_petgraph(output).unwrap();
|
||||
|
||||
// Should have 2 Contains edges:
|
||||
// src/main.rs → src/main.rs::config
|
||||
// src/main.rs::config → src/main.rs::config::load
|
||||
let contains_edges: Vec<_> = code_graph
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| matches!(e.kind, CodeEdgeKind::Contains))
|
||||
.collect();
|
||||
assert_eq!(contains_edges.len(), 2, "expected 2 Contains edges");
|
||||
|
||||
let sources: Vec<&str> = contains_edges.iter().map(|e| e.source.as_str()).collect();
|
||||
assert!(sources.contains(&"src/main.rs"));
|
||||
assert!(sources.contains(&"src/main.rs::config"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contains_edges_no_duplicates_with_existing_edges() {
|
||||
let engine = GraphEngine::new(1000);
|
||||
let mut output = ParseOutput::default();
|
||||
output.nodes.push(make_node("src/main.rs"));
|
||||
output.nodes.push(make_node("src/main.rs::foo"));
|
||||
|
||||
// Explicit Calls edge (foo calls itself? just for testing)
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: "test".to_string(),
|
||||
graph_build_id: "build1".to_string(),
|
||||
source: "src/main.rs::foo".to_string(),
|
||||
target: "src/main.rs::foo".to_string(),
|
||||
kind: CodeEdgeKind::Calls,
|
||||
file_path: "src/main.rs".to_string(),
|
||||
line_number: Some(1),
|
||||
});
|
||||
|
||||
let code_graph = engine.build_petgraph(output).unwrap();
|
||||
|
||||
// 1 Calls + 1 Contains = 2 edges total
|
||||
assert_eq!(code_graph.edges.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cross_file_resolution_with_module_path() {
|
||||
let engine = GraphEngine::new(1000);
|
||||
let node_map = build_test_node_map(&["src/config.rs::load_config", "src/main.rs::main"]);
|
||||
// "crate::config::load_config" should resolve to "src/config.rs::load_config"
|
||||
let result = engine.resolve_edge_target("crate::config::load_config", &node_map);
|
||||
assert!(result.is_some(), "cross-file crate:: path should resolve");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_parent_qname() {
|
||||
let node_map = build_test_node_map(&[
|
||||
"src/main.rs",
|
||||
"src/main.rs::config",
|
||||
"src/main.rs::config::load",
|
||||
]);
|
||||
|
||||
assert_eq!(
|
||||
find_parent_qname("src/main.rs::config::load", &node_map),
|
||||
Some("src/main.rs::config".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
find_parent_qname("src/main.rs::config", &node_map),
|
||||
Some("src/main.rs".to_string())
|
||||
);
|
||||
assert_eq!(find_parent_qname("src/main.rs", &node_map), None);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user