Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #1
428 lines
15 KiB
Rust
428 lines
15 KiB
Rust
use std::path::Path;
|
|
|
|
use compliance_core::error::CoreError;
|
|
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
|
|
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
|
use tree_sitter::{Node, Parser};
|
|
|
|
pub struct RustParser;
|
|
|
|
impl Default for RustParser {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl RustParser {
|
|
pub fn new() -> Self {
|
|
Self
|
|
}
|
|
|
|
fn walk_tree(
|
|
&self,
|
|
node: Node<'_>,
|
|
source: &str,
|
|
file_path: &str,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
parent_qualified: Option<&str>,
|
|
output: &mut ParseOutput,
|
|
) {
|
|
match node.kind() {
|
|
"function_item" | "function_signature_item" => {
|
|
if let Some(name_node) = node.child_by_field_name("name") {
|
|
let name = &source[name_node.byte_range()];
|
|
let qualified = match parent_qualified {
|
|
Some(p) => format!("{p}::{name}"),
|
|
None => format!("{file_path}::{name}"),
|
|
};
|
|
|
|
let is_entry = name == "main"
|
|
|| self.has_attribute(&node, source, "test")
|
|
|| self.has_attribute(&node, source, "tokio::main")
|
|
|| self.has_pub_visibility(&node, source);
|
|
|
|
output.nodes.push(CodeNode {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
qualified_name: qualified.clone(),
|
|
name: name.to_string(),
|
|
kind: CodeNodeKind::Function,
|
|
file_path: file_path.to_string(),
|
|
start_line: node.start_position().row as u32 + 1,
|
|
end_line: node.end_position().row as u32 + 1,
|
|
language: "rust".to_string(),
|
|
community_id: None,
|
|
is_entry_point: is_entry,
|
|
graph_index: None,
|
|
});
|
|
|
|
// Extract function calls within the body
|
|
if let Some(body) = node.child_by_field_name("body") {
|
|
self.extract_calls(
|
|
body,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
&qualified,
|
|
output,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
"struct_item" => {
|
|
if let Some(name_node) = node.child_by_field_name("name") {
|
|
let name = &source[name_node.byte_range()];
|
|
let qualified = match parent_qualified {
|
|
Some(p) => format!("{p}::{name}"),
|
|
None => format!("{file_path}::{name}"),
|
|
};
|
|
|
|
output.nodes.push(CodeNode {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
qualified_name: qualified,
|
|
name: name.to_string(),
|
|
kind: CodeNodeKind::Struct,
|
|
file_path: file_path.to_string(),
|
|
start_line: node.start_position().row as u32 + 1,
|
|
end_line: node.end_position().row as u32 + 1,
|
|
language: "rust".to_string(),
|
|
community_id: None,
|
|
is_entry_point: false,
|
|
graph_index: None,
|
|
});
|
|
}
|
|
}
|
|
"enum_item" => {
|
|
if let Some(name_node) = node.child_by_field_name("name") {
|
|
let name = &source[name_node.byte_range()];
|
|
let qualified = match parent_qualified {
|
|
Some(p) => format!("{p}::{name}"),
|
|
None => format!("{file_path}::{name}"),
|
|
};
|
|
|
|
output.nodes.push(CodeNode {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
qualified_name: qualified,
|
|
name: name.to_string(),
|
|
kind: CodeNodeKind::Enum,
|
|
file_path: file_path.to_string(),
|
|
start_line: node.start_position().row as u32 + 1,
|
|
end_line: node.end_position().row as u32 + 1,
|
|
language: "rust".to_string(),
|
|
community_id: None,
|
|
is_entry_point: false,
|
|
graph_index: None,
|
|
});
|
|
}
|
|
}
|
|
"trait_item" => {
|
|
if let Some(name_node) = node.child_by_field_name("name") {
|
|
let name = &source[name_node.byte_range()];
|
|
let qualified = match parent_qualified {
|
|
Some(p) => format!("{p}::{name}"),
|
|
None => format!("{file_path}::{name}"),
|
|
};
|
|
|
|
output.nodes.push(CodeNode {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
qualified_name: qualified.clone(),
|
|
name: name.to_string(),
|
|
kind: CodeNodeKind::Trait,
|
|
file_path: file_path.to_string(),
|
|
start_line: node.start_position().row as u32 + 1,
|
|
end_line: node.end_position().row as u32 + 1,
|
|
language: "rust".to_string(),
|
|
community_id: None,
|
|
is_entry_point: false,
|
|
graph_index: None,
|
|
});
|
|
|
|
// Parse methods inside the trait
|
|
self.walk_children(
|
|
node,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
Some(&qualified),
|
|
output,
|
|
);
|
|
return; // Don't walk children again
|
|
}
|
|
}
|
|
"impl_item" => {
|
|
// Extract impl target type for qualified naming
|
|
let impl_name = self.extract_impl_type(&node, source);
|
|
let qualified = match parent_qualified {
|
|
Some(p) => format!("{p}::{impl_name}"),
|
|
None => format!("{file_path}::{impl_name}"),
|
|
};
|
|
|
|
// Check for trait impl (impl Trait for Type)
|
|
if let Some(trait_node) = node.child_by_field_name("trait") {
|
|
let trait_name = &source[trait_node.byte_range()];
|
|
output.edges.push(CodeEdge {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
source: qualified.clone(),
|
|
target: trait_name.to_string(),
|
|
kind: CodeEdgeKind::Implements,
|
|
file_path: file_path.to_string(),
|
|
line_number: Some(node.start_position().row as u32 + 1),
|
|
});
|
|
}
|
|
|
|
// Walk methods inside impl block
|
|
self.walk_children(
|
|
node,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
Some(&qualified),
|
|
output,
|
|
);
|
|
return;
|
|
}
|
|
"use_declaration" => {
|
|
let use_text = &source[node.byte_range()];
|
|
// Extract the imported path
|
|
if let Some(path) = self.extract_use_path(use_text) {
|
|
output.edges.push(CodeEdge {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
source: parent_qualified.unwrap_or(file_path).to_string(),
|
|
target: path,
|
|
kind: CodeEdgeKind::Imports,
|
|
file_path: file_path.to_string(),
|
|
line_number: Some(node.start_position().row as u32 + 1),
|
|
});
|
|
}
|
|
}
|
|
"mod_item" => {
|
|
if let Some(name_node) = node.child_by_field_name("name") {
|
|
let name = &source[name_node.byte_range()];
|
|
let qualified = match parent_qualified {
|
|
Some(p) => format!("{p}::{name}"),
|
|
None => format!("{file_path}::{name}"),
|
|
};
|
|
|
|
output.nodes.push(CodeNode {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
qualified_name: qualified.clone(),
|
|
name: name.to_string(),
|
|
kind: CodeNodeKind::Module,
|
|
file_path: file_path.to_string(),
|
|
start_line: node.start_position().row as u32 + 1,
|
|
end_line: node.end_position().row as u32 + 1,
|
|
language: "rust".to_string(),
|
|
community_id: None,
|
|
is_entry_point: false,
|
|
graph_index: None,
|
|
});
|
|
|
|
// If it has a body (inline module), walk it
|
|
if let Some(body) = node.child_by_field_name("body") {
|
|
self.walk_children(
|
|
body,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
Some(&qualified),
|
|
output,
|
|
);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
// Default: walk children
|
|
self.walk_children(
|
|
node,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
parent_qualified,
|
|
output,
|
|
);
|
|
}
|
|
|
|
fn walk_children(
|
|
&self,
|
|
node: Node<'_>,
|
|
source: &str,
|
|
file_path: &str,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
parent_qualified: Option<&str>,
|
|
output: &mut ParseOutput,
|
|
) {
|
|
let mut cursor = node.walk();
|
|
for child in node.children(&mut cursor) {
|
|
self.walk_tree(
|
|
child,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
parent_qualified,
|
|
output,
|
|
);
|
|
}
|
|
}
|
|
|
|
fn extract_calls(
|
|
&self,
|
|
node: Node<'_>,
|
|
source: &str,
|
|
file_path: &str,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
caller_qualified: &str,
|
|
output: &mut ParseOutput,
|
|
) {
|
|
if node.kind() == "call_expression" {
|
|
if let Some(func_node) = node.child_by_field_name("function") {
|
|
let callee = &source[func_node.byte_range()];
|
|
output.edges.push(CodeEdge {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
source: caller_qualified.to_string(),
|
|
target: callee.to_string(),
|
|
kind: CodeEdgeKind::Calls,
|
|
file_path: file_path.to_string(),
|
|
line_number: Some(node.start_position().row as u32 + 1),
|
|
});
|
|
}
|
|
}
|
|
|
|
let mut cursor = node.walk();
|
|
for child in node.children(&mut cursor) {
|
|
self.extract_calls(
|
|
child,
|
|
source,
|
|
file_path,
|
|
repo_id,
|
|
graph_build_id,
|
|
caller_qualified,
|
|
output,
|
|
);
|
|
}
|
|
}
|
|
|
|
fn has_attribute(&self, node: &Node<'_>, source: &str, attr_name: &str) -> bool {
|
|
if let Some(prev) = node.prev_sibling() {
|
|
if prev.kind() == "attribute_item" || prev.kind() == "attribute" {
|
|
let text = &source[prev.byte_range()];
|
|
return text.contains(attr_name);
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
fn has_pub_visibility(&self, node: &Node<'_>, source: &str) -> bool {
|
|
let mut cursor = node.walk();
|
|
for child in node.children(&mut cursor) {
|
|
if child.kind() == "visibility_modifier" {
|
|
let text = &source[child.byte_range()];
|
|
return text == "pub";
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
fn extract_impl_type(&self, node: &Node<'_>, source: &str) -> String {
|
|
if let Some(type_node) = node.child_by_field_name("type") {
|
|
return source[type_node.byte_range()].to_string();
|
|
}
|
|
"unknown".to_string()
|
|
}
|
|
|
|
fn extract_use_path(&self, use_text: &str) -> Option<String> {
|
|
// "use foo::bar::baz;" -> "foo::bar::baz"
|
|
let trimmed = use_text.strip_prefix("use ")?.trim_end_matches(';').trim();
|
|
Some(trimmed.to_string())
|
|
}
|
|
}
|
|
|
|
impl LanguageParser for RustParser {
|
|
fn language(&self) -> &str {
|
|
"rust"
|
|
}
|
|
|
|
fn extensions(&self) -> &[&str] {
|
|
&["rs"]
|
|
}
|
|
|
|
fn parse_file(
|
|
&self,
|
|
file_path: &Path,
|
|
source: &str,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
) -> Result<ParseOutput, CoreError> {
|
|
let mut parser = Parser::new();
|
|
let language = tree_sitter_rust::LANGUAGE;
|
|
parser
|
|
.set_language(&language.into())
|
|
.map_err(|e| CoreError::Graph(format!("Failed to set Rust language: {e}")))?;
|
|
|
|
let tree = parser
|
|
.parse(source, None)
|
|
.ok_or_else(|| CoreError::Graph("Failed to parse Rust file".to_string()))?;
|
|
|
|
let file_path_str = file_path.to_string_lossy().to_string();
|
|
let mut output = ParseOutput::default();
|
|
|
|
// Add file node
|
|
output.nodes.push(CodeNode {
|
|
id: None,
|
|
repo_id: repo_id.to_string(),
|
|
graph_build_id: graph_build_id.to_string(),
|
|
qualified_name: file_path_str.clone(),
|
|
name: file_path
|
|
.file_name()
|
|
.map(|n| n.to_string_lossy().to_string())
|
|
.unwrap_or_default(),
|
|
kind: CodeNodeKind::File,
|
|
file_path: file_path_str.clone(),
|
|
start_line: 1,
|
|
end_line: source.lines().count() as u32,
|
|
language: "rust".to_string(),
|
|
community_id: None,
|
|
is_entry_point: false,
|
|
graph_index: None,
|
|
});
|
|
|
|
self.walk_tree(
|
|
tree.root_node(),
|
|
source,
|
|
&file_path_str,
|
|
repo_id,
|
|
graph_build_id,
|
|
None,
|
|
&mut output,
|
|
);
|
|
|
|
Ok(output)
|
|
}
|
|
}
|