From 08a1ee2f0025858125343b6d35852a7689cd5f2a Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:12:38 +0200 Subject: [PATCH] fix: synthesise Contains edges and improve cross-file resolution in code graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code graph produced disconnected "islands" because: 1. No Contains edges were created between File/Module nodes and their children (functions, classes, structs), leaving file nodes isolated 2. Cross-file call resolution was too strict — calls like `crate::config::load` failed to resolve to `src/config.rs::load` Fix: - After resolving explicit parser edges, synthesise Contains edges by walking each node's qualified-name hierarchy and linking to the closest ancestor that exists in the node map - Improve edge resolution with module-path matching: strip Rust prefixes (crate::, super::, self::) and try progressively shorter suffix matches for cross-file calls Adds 4 new tests covering Contains edge synthesis, dedup with existing edges, cross-file module path resolution, and parent qname lookup. Co-Authored-By: Claude Opus 4.6 (1M context) --- compliance-graph/src/graph/engine.rs | 208 ++++++++++++++++++++++++--- 1 file changed, 192 insertions(+), 16 deletions(-) diff --git a/compliance-graph/src/graph/engine.rs b/compliance-graph/src/graph/engine.rs index 431a54f..c44c2c8 100644 --- a/compliance-graph/src/graph/engine.rs +++ b/compliance-graph/src/graph/engine.rs @@ -15,6 +15,30 @@ use crate::parsers::registry::ParserRegistry; use super::community::detect_communities; use super::impact::ImpactAnalyzer; +/// Walk up the qualified-name hierarchy to find the closest ancestor +/// that exists in the node map. +/// +/// For `"src/main.rs::config::load"` this tries: +/// 1. `"src/main.rs::config"` (trim last `::` segment) +/// 2. `"src/main.rs"` (trim again) +/// +/// Returns the first match found, or `None` if the node is a root. +fn find_parent_qname(qname: &str, node_map: &HashMap) -> Option { + let mut current = qname.to_string(); + loop { + // Try stripping the last "::" segment + if let Some(pos) = current.rfind("::") { + current.truncate(pos); + if node_map.contains_key(¤t) { + return Some(current); + } + continue; + } + // No more "::" — this is a top-level node (file), no parent + return None; + } +} + /// The main graph engine that builds and manages code knowledge graphs pub struct GraphEngine { parser_registry: ParserRegistry, @@ -89,7 +113,12 @@ impl GraphEngine { Ok((code_graph, build_run)) } - /// Build petgraph from parsed output, resolving edges to node indices + /// Build petgraph from parsed output, resolving edges to node indices. + /// + /// After resolving the explicit edges from parsers, we synthesise + /// `Contains` edges so that every node is reachable from its parent + /// file or module. This eliminates disconnected "islands" that + /// otherwise appear when files share no direct call/import edges. fn build_petgraph(&self, parse_output: ParseOutput) -> Result { let mut graph = DiGraph::new(); let mut node_map: HashMap = HashMap::new(); @@ -102,15 +131,13 @@ impl GraphEngine { node_map.insert(node.qualified_name.clone(), idx); } - // Resolve and add edges — rewrite target to the resolved qualified name - // so the persisted edge references match node qualified_names. + // Resolve and add explicit edges from parsers let mut resolved_edges = Vec::new(); for mut edge in parse_output.edges { let source_idx = node_map.get(&edge.source); let resolved = self.resolve_edge_target(&edge.target, &node_map); if let (Some(&src), Some(tgt)) = (source_idx, resolved) { - // Update target to the resolved qualified name let resolved_name = node_map .iter() .find(|(_, &idx)| idx == tgt) @@ -121,7 +148,48 @@ impl GraphEngine { graph.add_edge(src, tgt, edge.kind.clone()); resolved_edges.push(edge); } - // Skip unresolved edges (cross-file, external deps) — conservative approach + } + + // Synthesise Contains edges: connect each node to its closest + // parent in the qualified-name hierarchy. + // + // For "src/main.rs::config::load", the parent chain is: + // "src/main.rs::config" → "src/main.rs" + // + // We walk up the qualified name (splitting on "::") and link to + // the first ancestor that exists in the node map. + let repo_id = nodes.first().map(|n| n.repo_id.as_str()).unwrap_or(""); + let build_id = nodes + .first() + .map(|n| n.graph_build_id.as_str()) + .unwrap_or(""); + + let qualified_names: Vec = nodes.iter().map(|n| n.qualified_name.clone()).collect(); + let file_paths: HashMap = nodes + .iter() + .map(|n| (n.qualified_name.clone(), n.file_path.clone())) + .collect(); + + for qname in &qualified_names { + if let Some(parent_qname) = find_parent_qname(qname, &node_map) { + let child_idx = node_map[qname]; + let parent_idx = node_map[&parent_qname]; + + // Avoid duplicate edges + if !graph.contains_edge(parent_idx, child_idx) { + graph.add_edge(parent_idx, child_idx, CodeEdgeKind::Contains); + resolved_edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: build_id.to_string(), + source: parent_qname, + target: qname.clone(), + kind: CodeEdgeKind::Contains, + file_path: file_paths.get(qname).cloned().unwrap_or_default(), + line_number: None, + }); + } + } } Ok(CodeGraph { @@ -132,33 +200,62 @@ impl GraphEngine { }) } - /// Try to resolve an edge target to a known node + /// Try to resolve an edge target to a known node. + /// + /// Resolution strategies (in order): + /// 1. Direct qualified-name match + /// 2. Suffix match: "foo" matches "src/main.rs::mod::foo" + /// 3. Module-path match: "config::load" matches "src/config.rs::load" + /// 4. Self-method: "self.method" matches "::method" fn resolve_edge_target( &self, target: &str, node_map: &HashMap, ) -> Option { - // Direct match + // 1. Direct match if let Some(idx) = node_map.get(target) { return Some(*idx); } - // Try matching just the function/type name (intra-file resolution) + // 2. Suffix match: "foo" → "path/file.rs::foo" + let suffix_pattern = format!("::{target}"); + let dot_pattern = format!(".{target}"); for (qualified, idx) in node_map { - // Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo" - if qualified.ends_with(&format!("::{target}")) - || qualified.ends_with(&format!(".{target}")) - { + if qualified.ends_with(&suffix_pattern) || qualified.ends_with(&dot_pattern) { return Some(*idx); } } - // Try matching method calls like "self.method" -> look for "::method" + // 3. Module-path match: "config::load" → try matching the last N + // segments of the target against node qualified names. + // This handles cross-file calls like `crate::config::load` or + // `super::handlers::process` where the prefix differs. + if target.contains("::") { + // Strip common Rust path prefixes + let stripped = target + .strip_prefix("crate::") + .or_else(|| target.strip_prefix("super::")) + .or_else(|| target.strip_prefix("self::")) + .unwrap_or(target); + + let segments: Vec<&str> = stripped.split("::").collect(); + // Try matching progressively shorter suffixes + for start in 0..segments.len() { + let suffix = segments[start..].join("::"); + let pattern = format!("::{suffix}"); + for (qualified, idx) in node_map { + if qualified.ends_with(&pattern) { + return Some(*idx); + } + } + } + } + + // 4. Self-method: "self.method" → "::method" if let Some(method_name) = target.strip_prefix("self.") { + let pattern = format!("::{method_name}"); for (qualified, idx) in node_map { - if qualified.ends_with(&format!("::{method_name}")) - || qualified.ends_with(&format!(".{method_name}")) - { + if qualified.ends_with(&pattern) { return Some(*idx); } } @@ -353,4 +450,83 @@ mod tests { assert!(code_graph.node_map.contains_key("a::c")); assert!(code_graph.node_map.contains_key("a::d")); } + + #[test] + fn test_contains_edges_synthesised() { + let engine = GraphEngine::new(1000); + let mut output = ParseOutput::default(); + // File → Module → Function hierarchy + output.nodes.push(make_node("src/main.rs")); + output.nodes.push(make_node("src/main.rs::config")); + output.nodes.push(make_node("src/main.rs::config::load")); + + let code_graph = engine.build_petgraph(output).unwrap(); + + // Should have 2 Contains edges: + // src/main.rs → src/main.rs::config + // src/main.rs::config → src/main.rs::config::load + let contains_edges: Vec<_> = code_graph + .edges + .iter() + .filter(|e| matches!(e.kind, CodeEdgeKind::Contains)) + .collect(); + assert_eq!(contains_edges.len(), 2, "expected 2 Contains edges"); + + let sources: Vec<&str> = contains_edges.iter().map(|e| e.source.as_str()).collect(); + assert!(sources.contains(&"src/main.rs")); + assert!(sources.contains(&"src/main.rs::config")); + } + + #[test] + fn test_contains_edges_no_duplicates_with_existing_edges() { + let engine = GraphEngine::new(1000); + let mut output = ParseOutput::default(); + output.nodes.push(make_node("src/main.rs")); + output.nodes.push(make_node("src/main.rs::foo")); + + // Explicit Calls edge (foo calls itself? just for testing) + output.edges.push(CodeEdge { + id: None, + repo_id: "test".to_string(), + graph_build_id: "build1".to_string(), + source: "src/main.rs::foo".to_string(), + target: "src/main.rs::foo".to_string(), + kind: CodeEdgeKind::Calls, + file_path: "src/main.rs".to_string(), + line_number: Some(1), + }); + + let code_graph = engine.build_petgraph(output).unwrap(); + + // 1 Calls + 1 Contains = 2 edges total + assert_eq!(code_graph.edges.len(), 2); + } + + #[test] + fn test_cross_file_resolution_with_module_path() { + let engine = GraphEngine::new(1000); + let node_map = build_test_node_map(&["src/config.rs::load_config", "src/main.rs::main"]); + // "crate::config::load_config" should resolve to "src/config.rs::load_config" + let result = engine.resolve_edge_target("crate::config::load_config", &node_map); + assert!(result.is_some(), "cross-file crate:: path should resolve"); + } + + #[test] + fn test_find_parent_qname() { + let node_map = build_test_node_map(&[ + "src/main.rs", + "src/main.rs::config", + "src/main.rs::config::load", + ]); + + assert_eq!( + find_parent_qname("src/main.rs::config::load", &node_map), + Some("src/main.rs::config".to_string()) + ); + assert_eq!( + find_parent_qname("src/main.rs::config", &node_map), + Some("src/main.rs".to_string()) + ); + assert_eq!(find_parent_qname("src/main.rs", &node_map), None); + } }