fix: synthesise Contains edges and improve cross-file resolution in code graph
Some checks failed
CI / Check (pull_request) Failing after 12m5s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped

The code graph produced disconnected "islands" because:
1. No Contains edges were created between File/Module nodes and their
   children (functions, classes, structs), leaving file nodes isolated
2. Cross-file call resolution was too strict — calls like
   `crate::config::load` failed to resolve to `src/config.rs::load`

Fix:
- After resolving explicit parser edges, synthesise Contains edges by
  walking each node's qualified-name hierarchy and linking to the
  closest ancestor that exists in the node map
- Improve edge resolution with module-path matching: strip Rust
  prefixes (crate::, super::, self::) and try progressively shorter
  suffix matches for cross-file calls

Adds 4 new tests covering Contains edge synthesis, dedup with existing
edges, cross-file module path resolution, and parent qname lookup.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-30 11:12:38 +02:00
parent 5b07d38907
commit 08a1ee2f00

View File

@@ -15,6 +15,30 @@ use crate::parsers::registry::ParserRegistry;
use super::community::detect_communities;
use super::impact::ImpactAnalyzer;
/// Walk up the qualified-name hierarchy to find the closest ancestor
/// that exists in the node map.
///
/// For `"src/main.rs::config::load"` this tries:
/// 1. `"src/main.rs::config"` (trim last `::` segment)
/// 2. `"src/main.rs"` (trim again)
///
/// Returns the first match found, or `None` if the node is a root.
fn find_parent_qname(qname: &str, node_map: &HashMap<String, NodeIndex>) -> Option<String> {
let mut current = qname.to_string();
loop {
// Try stripping the last "::" segment
if let Some(pos) = current.rfind("::") {
current.truncate(pos);
if node_map.contains_key(&current) {
return Some(current);
}
continue;
}
// No more "::" — this is a top-level node (file), no parent
return None;
}
}
/// The main graph engine that builds and manages code knowledge graphs
pub struct GraphEngine {
parser_registry: ParserRegistry,
@@ -89,7 +113,12 @@ impl GraphEngine {
Ok((code_graph, build_run))
}
/// Build petgraph from parsed output, resolving edges to node indices
/// Build petgraph from parsed output, resolving edges to node indices.
///
/// After resolving the explicit edges from parsers, we synthesise
/// `Contains` edges so that every node is reachable from its parent
/// file or module. This eliminates disconnected "islands" that
/// otherwise appear when files share no direct call/import edges.
fn build_petgraph(&self, parse_output: ParseOutput) -> Result<CodeGraph, CoreError> {
let mut graph = DiGraph::new();
let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
@@ -102,15 +131,13 @@ impl GraphEngine {
node_map.insert(node.qualified_name.clone(), idx);
}
// Resolve and add edges — rewrite target to the resolved qualified name
// so the persisted edge references match node qualified_names.
// Resolve and add explicit edges from parsers
let mut resolved_edges = Vec::new();
for mut edge in parse_output.edges {
let source_idx = node_map.get(&edge.source);
let resolved = self.resolve_edge_target(&edge.target, &node_map);
if let (Some(&src), Some(tgt)) = (source_idx, resolved) {
// Update target to the resolved qualified name
let resolved_name = node_map
.iter()
.find(|(_, &idx)| idx == tgt)
@@ -121,7 +148,48 @@ impl GraphEngine {
graph.add_edge(src, tgt, edge.kind.clone());
resolved_edges.push(edge);
}
// Skip unresolved edges (cross-file, external deps) — conservative approach
}
// Synthesise Contains edges: connect each node to its closest
// parent in the qualified-name hierarchy.
//
// For "src/main.rs::config::load", the parent chain is:
// "src/main.rs::config" → "src/main.rs"
//
// We walk up the qualified name (splitting on "::") and link to
// the first ancestor that exists in the node map.
let repo_id = nodes.first().map(|n| n.repo_id.as_str()).unwrap_or("");
let build_id = nodes
.first()
.map(|n| n.graph_build_id.as_str())
.unwrap_or("");
let qualified_names: Vec<String> = nodes.iter().map(|n| n.qualified_name.clone()).collect();
let file_paths: HashMap<String, String> = nodes
.iter()
.map(|n| (n.qualified_name.clone(), n.file_path.clone()))
.collect();
for qname in &qualified_names {
if let Some(parent_qname) = find_parent_qname(qname, &node_map) {
let child_idx = node_map[qname];
let parent_idx = node_map[&parent_qname];
// Avoid duplicate edges
if !graph.contains_edge(parent_idx, child_idx) {
graph.add_edge(parent_idx, child_idx, CodeEdgeKind::Contains);
resolved_edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: build_id.to_string(),
source: parent_qname,
target: qname.clone(),
kind: CodeEdgeKind::Contains,
file_path: file_paths.get(qname).cloned().unwrap_or_default(),
line_number: None,
});
}
}
}
Ok(CodeGraph {
@@ -132,33 +200,62 @@ impl GraphEngine {
})
}
/// Try to resolve an edge target to a known node
/// Try to resolve an edge target to a known node.
///
/// Resolution strategies (in order):
/// 1. Direct qualified-name match
/// 2. Suffix match: "foo" matches "src/main.rs::mod::foo"
/// 3. Module-path match: "config::load" matches "src/config.rs::load"
/// 4. Self-method: "self.method" matches "::method"
fn resolve_edge_target(
&self,
target: &str,
node_map: &HashMap<String, NodeIndex>,
) -> Option<NodeIndex> {
// Direct match
// 1. Direct match
if let Some(idx) = node_map.get(target) {
return Some(*idx);
}
// Try matching just the function/type name (intra-file resolution)
// 2. Suffix match: "foo" → "path/file.rs::foo"
let suffix_pattern = format!("::{target}");
let dot_pattern = format!(".{target}");
for (qualified, idx) in node_map {
// Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo"
if qualified.ends_with(&format!("::{target}"))
|| qualified.ends_with(&format!(".{target}"))
{
if qualified.ends_with(&suffix_pattern) || qualified.ends_with(&dot_pattern) {
return Some(*idx);
}
}
// Try matching method calls like "self.method" -> look for "::method"
// 3. Module-path match: "config::load" → try matching the last N
// segments of the target against node qualified names.
// This handles cross-file calls like `crate::config::load` or
// `super::handlers::process` where the prefix differs.
if target.contains("::") {
// Strip common Rust path prefixes
let stripped = target
.strip_prefix("crate::")
.or_else(|| target.strip_prefix("super::"))
.or_else(|| target.strip_prefix("self::"))
.unwrap_or(target);
let segments: Vec<&str> = stripped.split("::").collect();
// Try matching progressively shorter suffixes
for start in 0..segments.len() {
let suffix = segments[start..].join("::");
let pattern = format!("::{suffix}");
for (qualified, idx) in node_map {
if qualified.ends_with(&pattern) {
return Some(*idx);
}
}
}
}
// 4. Self-method: "self.method" → "::method"
if let Some(method_name) = target.strip_prefix("self.") {
let pattern = format!("::{method_name}");
for (qualified, idx) in node_map {
if qualified.ends_with(&format!("::{method_name}"))
|| qualified.ends_with(&format!(".{method_name}"))
{
if qualified.ends_with(&pattern) {
return Some(*idx);
}
}
@@ -353,4 +450,83 @@ mod tests {
assert!(code_graph.node_map.contains_key("a::c"));
assert!(code_graph.node_map.contains_key("a::d"));
}
#[test]
fn test_contains_edges_synthesised() {
let engine = GraphEngine::new(1000);
let mut output = ParseOutput::default();
// File → Module → Function hierarchy
output.nodes.push(make_node("src/main.rs"));
output.nodes.push(make_node("src/main.rs::config"));
output.nodes.push(make_node("src/main.rs::config::load"));
let code_graph = engine.build_petgraph(output).unwrap();
// Should have 2 Contains edges:
// src/main.rs → src/main.rs::config
// src/main.rs::config → src/main.rs::config::load
let contains_edges: Vec<_> = code_graph
.edges
.iter()
.filter(|e| matches!(e.kind, CodeEdgeKind::Contains))
.collect();
assert_eq!(contains_edges.len(), 2, "expected 2 Contains edges");
let sources: Vec<&str> = contains_edges.iter().map(|e| e.source.as_str()).collect();
assert!(sources.contains(&"src/main.rs"));
assert!(sources.contains(&"src/main.rs::config"));
}
#[test]
fn test_contains_edges_no_duplicates_with_existing_edges() {
let engine = GraphEngine::new(1000);
let mut output = ParseOutput::default();
output.nodes.push(make_node("src/main.rs"));
output.nodes.push(make_node("src/main.rs::foo"));
// Explicit Calls edge (foo calls itself? just for testing)
output.edges.push(CodeEdge {
id: None,
repo_id: "test".to_string(),
graph_build_id: "build1".to_string(),
source: "src/main.rs::foo".to_string(),
target: "src/main.rs::foo".to_string(),
kind: CodeEdgeKind::Calls,
file_path: "src/main.rs".to_string(),
line_number: Some(1),
});
let code_graph = engine.build_petgraph(output).unwrap();
// 1 Calls + 1 Contains = 2 edges total
assert_eq!(code_graph.edges.len(), 2);
}
#[test]
fn test_cross_file_resolution_with_module_path() {
let engine = GraphEngine::new(1000);
let node_map = build_test_node_map(&["src/config.rs::load_config", "src/main.rs::main"]);
// "crate::config::load_config" should resolve to "src/config.rs::load_config"
let result = engine.resolve_edge_target("crate::config::load_config", &node_map);
assert!(result.is_some(), "cross-file crate:: path should resolve");
}
#[test]
fn test_find_parent_qname() {
let node_map = build_test_node_map(&[
"src/main.rs",
"src/main.rs::config",
"src/main.rs::config::load",
]);
assert_eq!(
find_parent_qname("src/main.rs::config::load", &node_map),
Some("src/main.rs::config".to_string())
);
assert_eq!(
find_parent_qname("src/main.rs::config", &node_map),
Some("src/main.rs".to_string())
);
assert_eq!(find_parent_qname("src/main.rs", &node_map), None);
}
}