Files
compliance-scanner-agent/compliance-graph/src/parsers/javascript.rs
Sharang Parnerkar 42cabf0582
All checks were successful
CI / Format (push) Successful in 2s
CI / Clippy (push) Successful in 2m56s
CI / Security Audit (push) Successful in 1m25s
CI / Tests (push) Successful in 3m57s
feat: rag-embedding-ai-chat (#1)
Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: #1
2026-03-06 21:54:15 +00:00

419 lines
15 KiB
Rust

use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tree_sitter::{Node, Parser};
pub struct JavaScriptParser;
impl Default for JavaScriptParser {
fn default() -> Self {
Self::new()
}
}
impl JavaScriptParser {
pub fn new() -> Self {
Self
}
fn walk_tree(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
match node.kind() {
"function_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
let is_entry = self.is_exported_function(&node, source);
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: is_entry,
graph_index: None,
});
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body,
source,
file_path,
repo_id,
graph_build_id,
&qualified,
output,
);
}
}
}
"class_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Class,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
// Extract superclass
if let Some(heritage) = node.child_by_field_name("superclass") {
let base_name = &source[heritage.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: qualified.clone(),
target: base_name.to_string(),
kind: CodeEdgeKind::Inherits,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
if let Some(body) = node.child_by_field_name("body") {
self.walk_children(
body,
source,
file_path,
repo_id,
graph_build_id,
Some(&qualified),
output,
);
}
return;
}
}
"method_definition" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Method,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body,
source,
file_path,
repo_id,
graph_build_id,
&qualified,
output,
);
}
}
}
// Arrow functions assigned to variables: const foo = () => {}
"lexical_declaration" | "variable_declaration" => {
self.extract_arrow_functions(
node,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
"import_statement" => {
let text = &source[node.byte_range()];
if let Some(module) = self.extract_import_source(text) {
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: parent_qualified.unwrap_or(file_path).to_string(),
target: module,
kind: CodeEdgeKind::Imports,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
_ => {}
}
self.walk_children(
node,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
fn walk_children(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree(
child,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
}
fn extract_calls(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
caller_qualified: &str,
output: &mut ParseOutput,
) {
if node.kind() == "call_expression" {
if let Some(func_node) = node.child_by_field_name("function") {
let callee = &source[func_node.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: caller_qualified.to_string(),
target: callee.to_string(),
kind: CodeEdgeKind::Calls,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_calls(
child,
source,
file_path,
repo_id,
graph_build_id,
caller_qualified,
output,
);
}
}
fn extract_arrow_functions(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "variable_declarator" {
let name_node = child.child_by_field_name("name");
let value_node = child.child_by_field_name("value");
if let (Some(name_n), Some(value_n)) = (name_node, value_node) {
if value_n.kind() == "arrow_function" || value_n.kind() == "function" {
let name = &source[name_n.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: child.start_position().row as u32 + 1,
end_line: child.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
if let Some(body) = value_n.child_by_field_name("body") {
self.extract_calls(
body,
source,
file_path,
repo_id,
graph_build_id,
&qualified,
output,
);
}
}
}
}
}
}
fn is_exported_function(&self, node: &Node<'_>, source: &str) -> bool {
if let Some(parent) = node.parent() {
if parent.kind() == "export_statement" {
return true;
}
}
// Check for module.exports patterns
if let Some(prev) = node.prev_sibling() {
let text = &source[prev.byte_range()];
if text.contains("module.exports") || text.contains("exports.") {
return true;
}
}
false
}
fn extract_import_source(&self, import_text: &str) -> Option<String> {
// import ... from 'module' or import 'module'
let from_idx = import_text.find("from ");
let start = if let Some(idx) = from_idx {
idx + 5
} else {
import_text.find("import ")? + 7
};
let rest = &import_text[start..];
let module = rest
.trim()
.trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' ');
if module.is_empty() {
None
} else {
Some(module.to_string())
}
}
}
impl LanguageParser for JavaScriptParser {
fn language(&self) -> &str {
"javascript"
}
fn extensions(&self) -> &[&str] {
&["js", "jsx", "mjs", "cjs"]
}
fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<ParseOutput, CoreError> {
let mut parser = Parser::new();
let language = tree_sitter_javascript::LANGUAGE;
parser
.set_language(&language.into())
.map_err(|e| CoreError::Graph(format!("Failed to set JavaScript language: {e}")))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CoreError::Graph("Failed to parse JavaScript file".to_string()))?;
let file_path_str = file_path.to_string_lossy().to_string();
let mut output = ParseOutput::default();
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: file_path_str.clone(),
name: file_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default(),
kind: CodeNodeKind::File,
file_path: file_path_str.clone(),
start_line: 1,
end_line: source.lines().count() as u32,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
self.walk_tree(
tree.root_node(),
source,
&file_path_str,
repo_id,
graph_build_id,
None,
&mut output,
);
Ok(output)
}
}