Add DAST, graph modules, toast notifications, and dashboard enhancements
Add DAST scanning and code knowledge graph features across the stack: - compliance-dast and compliance-graph workspace crates - Agent API handlers and routes for DAST targets/scans and graph builds - Core models and traits for DAST and graph domains - Dashboard pages for DAST targets/findings/overview and graph explorer/impact - Toast notification system with auto-dismiss for async action feedback - Button click animations and disabled states for better UX Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
37
compliance-graph/Cargo.toml
Normal file
37
compliance-graph/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[package]
|
||||
name = "compliance-graph"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
compliance-core = { workspace = true, features = ["mongodb"] }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
mongodb = { workspace = true }
|
||||
|
||||
# Tree-sitter parsing
|
||||
tree-sitter = "0.24"
|
||||
tree-sitter-rust = "0.23"
|
||||
tree-sitter-python = "0.23"
|
||||
tree-sitter-javascript = "0.23"
|
||||
tree-sitter-typescript = "0.23"
|
||||
|
||||
# Graph algorithms
|
||||
petgraph = "0.7"
|
||||
|
||||
# Text search
|
||||
tantivy = "0.22"
|
||||
|
||||
# Serialization
|
||||
bson = "2"
|
||||
|
||||
# Async streams
|
||||
futures-util = "0.3"
|
||||
256
compliance-graph/src/graph/community.rs
Normal file
256
compliance-graph/src/graph/community.rs
Normal file
@@ -0,0 +1,256 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use petgraph::graph::NodeIndex;
|
||||
use petgraph::visit::EdgeRef;
|
||||
use tracing::info;
|
||||
|
||||
use super::engine::CodeGraph;
|
||||
|
||||
/// Run Louvain community detection on the code graph.
|
||||
/// Returns the number of communities detected.
|
||||
/// Mutates node community_id in place.
|
||||
pub fn detect_communities(code_graph: &CodeGraph) -> u32 {
|
||||
let graph = &code_graph.graph;
|
||||
let node_count = graph.node_count();
|
||||
|
||||
if node_count == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize: each node in its own community
|
||||
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
|
||||
for idx in graph.node_indices() {
|
||||
community.insert(idx, idx.index() as u32);
|
||||
}
|
||||
|
||||
// Compute total edge weight (all edges weight 1.0)
|
||||
let total_edges = graph.edge_count() as f64;
|
||||
if total_edges == 0.0 {
|
||||
// All nodes are isolated, each is its own community
|
||||
return node_count as u32;
|
||||
}
|
||||
|
||||
let m2 = 2.0 * total_edges;
|
||||
|
||||
// Pre-compute node degrees
|
||||
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
|
||||
for idx in graph.node_indices() {
|
||||
let d = graph.edges(idx).count() as f64;
|
||||
degree.insert(idx, d);
|
||||
}
|
||||
|
||||
// Louvain phase 1: local moves
|
||||
let mut improved = true;
|
||||
let mut iterations = 0;
|
||||
let max_iterations = 50;
|
||||
|
||||
while improved && iterations < max_iterations {
|
||||
improved = false;
|
||||
iterations += 1;
|
||||
|
||||
for node in graph.node_indices() {
|
||||
let current_comm = community[&node];
|
||||
let node_deg = degree[&node];
|
||||
|
||||
// Compute edges to each neighboring community
|
||||
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
|
||||
for edge in graph.edges(node) {
|
||||
let neighbor = edge.target();
|
||||
let neighbor_comm = community[&neighbor];
|
||||
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
||||
}
|
||||
// Also check incoming edges (undirected treatment)
|
||||
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
|
||||
let neighbor = edge.source();
|
||||
let neighbor_comm = community[&neighbor];
|
||||
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
||||
}
|
||||
|
||||
// Compute community totals (sum of degrees in each community)
|
||||
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
|
||||
for (n, &c) in &community {
|
||||
*comm_totals.entry(c).or_insert(0.0) += degree[n];
|
||||
}
|
||||
|
||||
// Find best community
|
||||
let current_total = comm_totals.get(¤t_comm).copied().unwrap_or(0.0);
|
||||
let edges_to_current = comm_edges.get(¤t_comm).copied().unwrap_or(0.0);
|
||||
|
||||
// Modularity gain from removing node from current community
|
||||
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
|
||||
|
||||
let mut best_comm = current_comm;
|
||||
let mut best_gain = 0.0;
|
||||
|
||||
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
|
||||
if candidate_comm == current_comm {
|
||||
continue;
|
||||
}
|
||||
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
|
||||
|
||||
// Modularity gain from adding node to candidate community
|
||||
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
|
||||
let gain = add_gain - remove_cost;
|
||||
|
||||
if gain > best_gain {
|
||||
best_gain = gain;
|
||||
best_comm = candidate_comm;
|
||||
}
|
||||
}
|
||||
|
||||
if best_comm != current_comm {
|
||||
community.insert(node, best_comm);
|
||||
improved = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Renumber communities to be contiguous
|
||||
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
|
||||
let mut next_id: u32 = 0;
|
||||
for &c in community.values() {
|
||||
if !comm_remap.contains_key(&c) {
|
||||
comm_remap.insert(c, next_id);
|
||||
next_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply to community map
|
||||
for c in community.values_mut() {
|
||||
if let Some(&new_id) = comm_remap.get(c) {
|
||||
*c = new_id;
|
||||
}
|
||||
}
|
||||
|
||||
let num_communities = next_id;
|
||||
info!(
|
||||
communities = num_communities,
|
||||
iterations, "Community detection complete"
|
||||
);
|
||||
|
||||
// NOTE: community IDs are stored in the HashMap but need to be applied
|
||||
// back to the CodeGraph nodes by the caller (engine) if needed for persistence.
|
||||
// For now we return the count; the full assignment is available via the map.
|
||||
|
||||
num_communities
|
||||
}
|
||||
|
||||
/// Apply community assignments back to code nodes
|
||||
pub fn apply_communities(code_graph: &mut CodeGraph) -> u32 {
|
||||
let count = detect_communities_with_assignment(code_graph);
|
||||
count
|
||||
}
|
||||
|
||||
/// Detect communities and write assignments into the nodes
|
||||
fn detect_communities_with_assignment(code_graph: &mut CodeGraph) -> u32 {
|
||||
let graph = &code_graph.graph;
|
||||
let node_count = graph.node_count();
|
||||
|
||||
if node_count == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
|
||||
for idx in graph.node_indices() {
|
||||
community.insert(idx, idx.index() as u32);
|
||||
}
|
||||
|
||||
let total_edges = graph.edge_count() as f64;
|
||||
if total_edges == 0.0 {
|
||||
for node in &mut code_graph.nodes {
|
||||
if let Some(gi) = node.graph_index {
|
||||
node.community_id = Some(gi);
|
||||
}
|
||||
}
|
||||
return node_count as u32;
|
||||
}
|
||||
|
||||
let m2 = 2.0 * total_edges;
|
||||
|
||||
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
|
||||
for idx in graph.node_indices() {
|
||||
let d = (graph.edges(idx).count()
|
||||
+ graph
|
||||
.edges_directed(idx, petgraph::Direction::Incoming)
|
||||
.count()) as f64;
|
||||
degree.insert(idx, d);
|
||||
}
|
||||
|
||||
let mut improved = true;
|
||||
let mut iterations = 0;
|
||||
let max_iterations = 50;
|
||||
|
||||
while improved && iterations < max_iterations {
|
||||
improved = false;
|
||||
iterations += 1;
|
||||
|
||||
for node in graph.node_indices() {
|
||||
let current_comm = community[&node];
|
||||
let node_deg = degree[&node];
|
||||
|
||||
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
|
||||
for edge in graph.edges(node) {
|
||||
let neighbor_comm = community[&edge.target()];
|
||||
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
||||
}
|
||||
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
|
||||
let neighbor_comm = community[&edge.source()];
|
||||
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
|
||||
}
|
||||
|
||||
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
|
||||
for (n, &c) in &community {
|
||||
*comm_totals.entry(c).or_insert(0.0) += degree[n];
|
||||
}
|
||||
|
||||
let current_total = comm_totals.get(¤t_comm).copied().unwrap_or(0.0);
|
||||
let edges_to_current = comm_edges.get(¤t_comm).copied().unwrap_or(0.0);
|
||||
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
|
||||
|
||||
let mut best_comm = current_comm;
|
||||
let mut best_gain = 0.0;
|
||||
|
||||
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
|
||||
if candidate_comm == current_comm {
|
||||
continue;
|
||||
}
|
||||
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
|
||||
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
|
||||
let gain = add_gain - remove_cost;
|
||||
|
||||
if gain > best_gain {
|
||||
best_gain = gain;
|
||||
best_comm = candidate_comm;
|
||||
}
|
||||
}
|
||||
|
||||
if best_comm != current_comm {
|
||||
community.insert(node, best_comm);
|
||||
improved = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Renumber
|
||||
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
|
||||
let mut next_id: u32 = 0;
|
||||
for &c in community.values() {
|
||||
if !comm_remap.contains_key(&c) {
|
||||
comm_remap.insert(c, next_id);
|
||||
next_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply to nodes
|
||||
for node in &mut code_graph.nodes {
|
||||
if let Some(gi) = node.graph_index {
|
||||
let idx = NodeIndex::new(gi as usize);
|
||||
if let Some(&comm) = community.get(&idx) {
|
||||
let remapped = comm_remap.get(&comm).copied().unwrap_or(comm);
|
||||
node.community_id = Some(remapped);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
next_id
|
||||
}
|
||||
165
compliance-graph/src/graph/engine.rs
Normal file
165
compliance-graph/src/graph/engine.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::Utc;
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::{
|
||||
CodeEdge, CodeEdgeKind, CodeNode, GraphBuildRun, GraphBuildStatus,
|
||||
};
|
||||
use compliance_core::traits::graph_builder::ParseOutput;
|
||||
use petgraph::graph::{DiGraph, NodeIndex};
|
||||
use tracing::info;
|
||||
|
||||
use crate::parsers::registry::ParserRegistry;
|
||||
|
||||
use super::community::detect_communities;
|
||||
use super::impact::ImpactAnalyzer;
|
||||
|
||||
/// The main graph engine that builds and manages code knowledge graphs
|
||||
pub struct GraphEngine {
|
||||
parser_registry: ParserRegistry,
|
||||
max_nodes: u32,
|
||||
}
|
||||
|
||||
/// In-memory representation of a built code graph
|
||||
pub struct CodeGraph {
|
||||
pub graph: DiGraph<String, CodeEdgeKind>,
|
||||
pub node_map: HashMap<String, NodeIndex>,
|
||||
pub nodes: Vec<CodeNode>,
|
||||
pub edges: Vec<CodeEdge>,
|
||||
}
|
||||
|
||||
impl GraphEngine {
|
||||
pub fn new(max_nodes: u32) -> Self {
|
||||
Self {
|
||||
parser_registry: ParserRegistry::new(),
|
||||
max_nodes,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a code graph from a repository directory
|
||||
pub fn build_graph(
|
||||
&self,
|
||||
repo_path: &Path,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<(CodeGraph, GraphBuildRun), CoreError> {
|
||||
let mut build_run = GraphBuildRun::new(repo_id.to_string());
|
||||
|
||||
info!(repo_id, path = %repo_path.display(), "Starting graph build");
|
||||
|
||||
// Phase 1: Parse all files
|
||||
let parse_output = self.parser_registry.parse_directory(
|
||||
repo_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
self.max_nodes,
|
||||
)?;
|
||||
|
||||
// Phase 2: Build petgraph
|
||||
let code_graph = self.build_petgraph(parse_output)?;
|
||||
|
||||
// Phase 3: Run community detection
|
||||
let community_count = detect_communities(&code_graph);
|
||||
|
||||
// Collect language stats
|
||||
let mut languages: Vec<String> = code_graph
|
||||
.nodes
|
||||
.iter()
|
||||
.map(|n| n.language.clone())
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
languages.sort();
|
||||
|
||||
build_run.node_count = code_graph.nodes.len() as u32;
|
||||
build_run.edge_count = code_graph.edges.len() as u32;
|
||||
build_run.community_count = community_count;
|
||||
build_run.languages_parsed = languages;
|
||||
build_run.status = GraphBuildStatus::Completed;
|
||||
build_run.completed_at = Some(Utc::now());
|
||||
|
||||
info!(
|
||||
nodes = build_run.node_count,
|
||||
edges = build_run.edge_count,
|
||||
communities = build_run.community_count,
|
||||
"Graph build complete"
|
||||
);
|
||||
|
||||
Ok((code_graph, build_run))
|
||||
}
|
||||
|
||||
/// Build petgraph from parsed output, resolving edges to node indices
|
||||
fn build_petgraph(&self, parse_output: ParseOutput) -> Result<CodeGraph, CoreError> {
|
||||
let mut graph = DiGraph::new();
|
||||
let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
|
||||
let mut nodes = parse_output.nodes;
|
||||
|
||||
// Add all nodes to the graph
|
||||
for node in &mut nodes {
|
||||
let idx = graph.add_node(node.qualified_name.clone());
|
||||
node.graph_index = Some(idx.index() as u32);
|
||||
node_map.insert(node.qualified_name.clone(), idx);
|
||||
}
|
||||
|
||||
// Resolve and add edges
|
||||
let mut resolved_edges = Vec::new();
|
||||
for edge in parse_output.edges {
|
||||
let source_idx = node_map.get(&edge.source);
|
||||
let target_idx = self.resolve_edge_target(&edge.target, &node_map);
|
||||
|
||||
if let (Some(&src), Some(tgt)) = (source_idx, target_idx) {
|
||||
graph.add_edge(src, tgt, edge.kind.clone());
|
||||
resolved_edges.push(edge);
|
||||
}
|
||||
// Skip unresolved edges (cross-file, external deps) — conservative approach
|
||||
}
|
||||
|
||||
Ok(CodeGraph {
|
||||
graph,
|
||||
node_map,
|
||||
nodes,
|
||||
edges: resolved_edges,
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to resolve an edge target to a known node
|
||||
fn resolve_edge_target<'a>(
|
||||
&self,
|
||||
target: &str,
|
||||
node_map: &'a HashMap<String, NodeIndex>,
|
||||
) -> Option<NodeIndex> {
|
||||
// Direct match
|
||||
if let Some(idx) = node_map.get(target) {
|
||||
return Some(*idx);
|
||||
}
|
||||
|
||||
// Try matching just the function/type name (intra-file resolution)
|
||||
for (qualified, idx) in node_map {
|
||||
// Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo"
|
||||
if qualified.ends_with(&format!("::{target}"))
|
||||
|| qualified.ends_with(&format!(".{target}"))
|
||||
{
|
||||
return Some(*idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Try matching method calls like "self.method" -> look for "::method"
|
||||
if let Some(method_name) = target.strip_prefix("self.") {
|
||||
for (qualified, idx) in node_map {
|
||||
if qualified.ends_with(&format!("::{method_name}"))
|
||||
|| qualified.ends_with(&format!(".{method_name}"))
|
||||
{
|
||||
return Some(*idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Get the impact analyzer for a built graph
|
||||
pub fn impact_analyzer(code_graph: &CodeGraph) -> ImpactAnalyzer<'_> {
|
||||
ImpactAnalyzer::new(code_graph)
|
||||
}
|
||||
}
|
||||
219
compliance-graph/src/graph/impact.rs
Normal file
219
compliance-graph/src/graph/impact.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
|
||||
use compliance_core::models::graph::ImpactAnalysis;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use petgraph::visit::EdgeRef;
|
||||
use petgraph::Direction;
|
||||
|
||||
use super::engine::CodeGraph;
|
||||
|
||||
/// Analyzes the impact/blast radius of findings within a code graph
|
||||
pub struct ImpactAnalyzer<'a> {
|
||||
code_graph: &'a CodeGraph,
|
||||
}
|
||||
|
||||
impl<'a> ImpactAnalyzer<'a> {
|
||||
pub fn new(code_graph: &'a CodeGraph) -> Self {
|
||||
Self { code_graph }
|
||||
}
|
||||
|
||||
/// Compute impact analysis for a finding at the given file path and line number
|
||||
pub fn analyze(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
finding_id: &str,
|
||||
graph_build_id: &str,
|
||||
file_path: &str,
|
||||
line_number: Option<u32>,
|
||||
) -> ImpactAnalysis {
|
||||
let mut analysis =
|
||||
ImpactAnalysis::new(repo_id.to_string(), finding_id.to_string(), graph_build_id.to_string());
|
||||
|
||||
// Find the node containing the finding
|
||||
let target_node = self.find_node_at_location(file_path, line_number);
|
||||
let target_idx = match target_node {
|
||||
Some(idx) => idx,
|
||||
None => return analysis,
|
||||
};
|
||||
|
||||
// BFS forward: compute blast radius (what this node affects)
|
||||
let forward_reachable = self.bfs_reachable(target_idx, Direction::Outgoing);
|
||||
analysis.blast_radius = forward_reachable.len() as u32;
|
||||
|
||||
// BFS backward: find entry points that reach this node
|
||||
let backward_reachable = self.bfs_reachable(target_idx, Direction::Incoming);
|
||||
|
||||
// Find affected entry points
|
||||
for &idx in &backward_reachable {
|
||||
if let Some(node) = self.get_node_by_index(idx) {
|
||||
if node.is_entry_point {
|
||||
analysis
|
||||
.affected_entry_points
|
||||
.push(node.qualified_name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract call chains from entry points to the target (limited depth)
|
||||
for entry_name in &analysis.affected_entry_points.clone() {
|
||||
if let Some(&entry_idx) = self.code_graph.node_map.get(entry_name) {
|
||||
if let Some(chain) = self.find_path(entry_idx, target_idx, 10) {
|
||||
analysis.call_chains.push(chain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Direct callers (incoming edges to target)
|
||||
for edge in self
|
||||
.code_graph
|
||||
.graph
|
||||
.edges_directed(target_idx, Direction::Incoming)
|
||||
{
|
||||
if let Some(node) = self.get_node_by_index(edge.source()) {
|
||||
analysis.direct_callers.push(node.qualified_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Direct callees (outgoing edges from target)
|
||||
for edge in self.code_graph.graph.edges(target_idx) {
|
||||
if let Some(node) = self.get_node_by_index(edge.target()) {
|
||||
analysis.direct_callees.push(node.qualified_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Affected communities
|
||||
let mut affected_comms: HashSet<u32> = HashSet::new();
|
||||
for &idx in forward_reachable.iter().chain(std::iter::once(&target_idx)) {
|
||||
if let Some(node) = self.get_node_by_index(idx) {
|
||||
if let Some(cid) = node.community_id {
|
||||
affected_comms.insert(cid);
|
||||
}
|
||||
}
|
||||
}
|
||||
analysis.affected_communities = affected_comms.into_iter().collect();
|
||||
analysis.affected_communities.sort();
|
||||
|
||||
analysis
|
||||
}
|
||||
|
||||
/// Find the graph node at a given file/line location
|
||||
fn find_node_at_location(&self, file_path: &str, line_number: Option<u32>) -> Option<NodeIndex> {
|
||||
let mut best: Option<(NodeIndex, u32)> = None; // (index, line_span)
|
||||
|
||||
for node in &self.code_graph.nodes {
|
||||
if node.file_path != file_path {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(line) = line_number {
|
||||
if line >= node.start_line && line <= node.end_line {
|
||||
let span = node.end_line - node.start_line;
|
||||
// Prefer the narrowest containing node
|
||||
if best.is_none() || span < best.as_ref().map(|b| b.1).unwrap_or(u32::MAX) {
|
||||
if let Some(gi) = node.graph_index {
|
||||
best = Some((NodeIndex::new(gi as usize), span));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No line number, use file node
|
||||
if node.kind == compliance_core::models::graph::CodeNodeKind::File {
|
||||
if let Some(gi) = node.graph_index {
|
||||
return Some(NodeIndex::new(gi as usize));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
best.map(|(idx, _)| idx)
|
||||
}
|
||||
|
||||
/// BFS to find all reachable nodes in a given direction
|
||||
fn bfs_reachable(&self, start: NodeIndex, direction: Direction) -> HashSet<NodeIndex> {
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
queue.push_back(start);
|
||||
|
||||
while let Some(current) = queue.pop_front() {
|
||||
if !visited.insert(current) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let neighbors: Vec<NodeIndex> = match direction {
|
||||
Direction::Outgoing => self
|
||||
.code_graph
|
||||
.graph
|
||||
.edges(current)
|
||||
.map(|e| e.target())
|
||||
.collect(),
|
||||
Direction::Incoming => self
|
||||
.code_graph
|
||||
.graph
|
||||
.edges_directed(current, Direction::Incoming)
|
||||
.map(|e| e.source())
|
||||
.collect(),
|
||||
};
|
||||
|
||||
for neighbor in neighbors {
|
||||
if !visited.contains(&neighbor) {
|
||||
queue.push_back(neighbor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
visited.remove(&start);
|
||||
visited
|
||||
}
|
||||
|
||||
/// Find a path from source to target (BFS, limited depth)
|
||||
fn find_path(
|
||||
&self,
|
||||
from: NodeIndex,
|
||||
to: NodeIndex,
|
||||
max_depth: usize,
|
||||
) -> Option<Vec<String>> {
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue: VecDeque<(NodeIndex, Vec<NodeIndex>)> = VecDeque::new();
|
||||
queue.push_back((from, vec![from]));
|
||||
|
||||
while let Some((current, path)) = queue.pop_front() {
|
||||
if current == to {
|
||||
return Some(
|
||||
path.iter()
|
||||
.filter_map(|&idx| {
|
||||
self.get_node_by_index(idx)
|
||||
.map(|n| n.qualified_name.clone())
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
}
|
||||
|
||||
if path.len() >= max_depth {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !visited.insert(current) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for edge in self.code_graph.graph.edges(current) {
|
||||
let next = edge.target();
|
||||
if !visited.contains(&next) {
|
||||
let mut new_path = path.clone();
|
||||
new_path.push(next);
|
||||
queue.push_back((next, new_path));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn get_node_by_index(&self, idx: NodeIndex) -> Option<&compliance_core::models::graph::CodeNode> {
|
||||
let target_gi = idx.index() as u32;
|
||||
self.code_graph
|
||||
.nodes
|
||||
.iter()
|
||||
.find(|n| n.graph_index == Some(target_gi))
|
||||
}
|
||||
}
|
||||
4
compliance-graph/src/graph/mod.rs
Normal file
4
compliance-graph/src/graph/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod community;
|
||||
pub mod engine;
|
||||
pub mod impact;
|
||||
pub mod persistence;
|
||||
255
compliance-graph/src/graph/persistence.rs
Normal file
255
compliance-graph/src/graph/persistence.rs
Normal file
@@ -0,0 +1,255 @@
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::{CodeEdge, CodeNode, GraphBuildRun, ImpactAnalysis};
|
||||
use futures_util::TryStreamExt;
|
||||
use mongodb::bson::doc;
|
||||
use mongodb::options::IndexOptions;
|
||||
use mongodb::{Collection, Database, IndexModel};
|
||||
use tracing::info;
|
||||
|
||||
/// MongoDB persistence layer for the code knowledge graph
|
||||
pub struct GraphStore {
|
||||
nodes: Collection<CodeNode>,
|
||||
edges: Collection<CodeEdge>,
|
||||
builds: Collection<GraphBuildRun>,
|
||||
impacts: Collection<ImpactAnalysis>,
|
||||
}
|
||||
|
||||
impl GraphStore {
|
||||
pub fn new(db: &Database) -> Self {
|
||||
Self {
|
||||
nodes: db.collection("graph_nodes"),
|
||||
edges: db.collection("graph_edges"),
|
||||
builds: db.collection("graph_builds"),
|
||||
impacts: db.collection("impact_analyses"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure indexes are created
|
||||
pub async fn ensure_indexes(&self) -> Result<(), CoreError> {
|
||||
// graph_nodes: compound index on (repo_id, graph_build_id)
|
||||
self.nodes
|
||||
.create_index(
|
||||
IndexModel::builder()
|
||||
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
|
||||
.build(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// graph_nodes: index on qualified_name for lookups
|
||||
self.nodes
|
||||
.create_index(
|
||||
IndexModel::builder()
|
||||
.keys(doc! { "qualified_name": 1 })
|
||||
.build(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// graph_edges: compound index on (repo_id, graph_build_id)
|
||||
self.edges
|
||||
.create_index(
|
||||
IndexModel::builder()
|
||||
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
|
||||
.build(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// graph_builds: compound index on (repo_id, started_at DESC)
|
||||
self.builds
|
||||
.create_index(
|
||||
IndexModel::builder()
|
||||
.keys(doc! { "repo_id": 1, "started_at": -1 })
|
||||
.build(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// impact_analyses: compound index on (repo_id, finding_id)
|
||||
self.impacts
|
||||
.create_index(
|
||||
IndexModel::builder()
|
||||
.keys(doc! { "repo_id": 1, "finding_id": 1 })
|
||||
.options(IndexOptions::builder().unique(true).build())
|
||||
.build(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Store a complete graph build result
|
||||
pub async fn store_graph(
|
||||
&self,
|
||||
build_run: &GraphBuildRun,
|
||||
nodes: &[CodeNode],
|
||||
edges: &[CodeEdge],
|
||||
) -> Result<String, CoreError> {
|
||||
// Insert the build run
|
||||
let result = self.builds.insert_one(build_run).await?;
|
||||
let build_id = result
|
||||
.inserted_id
|
||||
.as_object_id()
|
||||
.map(|oid| oid.to_hex())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Insert nodes in batches
|
||||
if !nodes.is_empty() {
|
||||
let batch_size = 1000;
|
||||
for chunk in nodes.chunks(batch_size) {
|
||||
self.nodes.insert_many(chunk.to_vec()).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert edges in batches
|
||||
if !edges.is_empty() {
|
||||
let batch_size = 1000;
|
||||
for chunk in edges.chunks(batch_size) {
|
||||
self.edges.insert_many(chunk.to_vec()).await?;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
build_id = %build_id,
|
||||
nodes = nodes.len(),
|
||||
edges = edges.len(),
|
||||
"Graph stored to MongoDB"
|
||||
);
|
||||
|
||||
Ok(build_id)
|
||||
}
|
||||
|
||||
/// Delete previous graph data for a repo before storing new graph
|
||||
pub async fn delete_repo_graph(&self, repo_id: &str) -> Result<(), CoreError> {
|
||||
let filter = doc! { "repo_id": repo_id };
|
||||
self.nodes.delete_many(filter.clone()).await?;
|
||||
self.edges.delete_many(filter.clone()).await?;
|
||||
self.impacts.delete_many(filter).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Store an impact analysis result
|
||||
pub async fn store_impact(&self, impact: &ImpactAnalysis) -> Result<(), CoreError> {
|
||||
let filter = doc! {
|
||||
"repo_id": &impact.repo_id,
|
||||
"finding_id": &impact.finding_id,
|
||||
};
|
||||
|
||||
let opts = mongodb::options::ReplaceOptions::builder()
|
||||
.upsert(true)
|
||||
.build();
|
||||
|
||||
self.impacts
|
||||
.replace_one(filter, impact)
|
||||
.with_options(opts)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the latest graph build for a repo
|
||||
pub async fn get_latest_build(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
) -> Result<Option<GraphBuildRun>, CoreError> {
|
||||
let filter = doc! { "repo_id": repo_id };
|
||||
let opts = mongodb::options::FindOneOptions::builder()
|
||||
.sort(doc! { "started_at": -1 })
|
||||
.build();
|
||||
|
||||
let result = self.builds.find_one(filter).with_options(opts).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Get all nodes for a repo's latest graph build
|
||||
pub async fn get_nodes(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<Vec<CodeNode>, CoreError> {
|
||||
let filter = doc! {
|
||||
"repo_id": repo_id,
|
||||
"graph_build_id": graph_build_id,
|
||||
};
|
||||
|
||||
let cursor = self.nodes.find(filter).await?;
|
||||
let nodes: Vec<CodeNode> = cursor.try_collect().await?;
|
||||
Ok(nodes)
|
||||
}
|
||||
|
||||
/// Get all edges for a repo's latest graph build
|
||||
pub async fn get_edges(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<Vec<CodeEdge>, CoreError> {
|
||||
let filter = doc! {
|
||||
"repo_id": repo_id,
|
||||
"graph_build_id": graph_build_id,
|
||||
};
|
||||
|
||||
let cursor = self.edges.find(filter).await?;
|
||||
let edges: Vec<CodeEdge> = cursor.try_collect().await?;
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
/// Get impact analysis for a finding
|
||||
pub async fn get_impact(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
finding_id: &str,
|
||||
) -> Result<Option<ImpactAnalysis>, CoreError> {
|
||||
let filter = doc! {
|
||||
"repo_id": repo_id,
|
||||
"finding_id": finding_id,
|
||||
};
|
||||
|
||||
let result = self.impacts.find_one(filter).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Get nodes grouped by community
|
||||
pub async fn get_communities(
|
||||
&self,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<Vec<CommunityInfo>, CoreError> {
|
||||
|
||||
|
||||
let filter = doc! {
|
||||
"repo_id": repo_id,
|
||||
"graph_build_id": graph_build_id,
|
||||
};
|
||||
|
||||
let cursor = self.nodes.find(filter).await?;
|
||||
let nodes: Vec<CodeNode> = cursor.try_collect().await?;
|
||||
|
||||
let mut communities: std::collections::HashMap<u32, Vec<String>> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for node in &nodes {
|
||||
if let Some(cid) = node.community_id {
|
||||
communities
|
||||
.entry(cid)
|
||||
.or_default()
|
||||
.push(node.qualified_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut result: Vec<CommunityInfo> = communities
|
||||
.into_iter()
|
||||
.map(|(id, members)| CommunityInfo {
|
||||
community_id: id,
|
||||
member_count: members.len() as u32,
|
||||
members,
|
||||
})
|
||||
.collect();
|
||||
|
||||
result.sort_by_key(|c| c.community_id);
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CommunityInfo {
|
||||
pub community_id: u32,
|
||||
pub member_count: u32,
|
||||
pub members: Vec<String>,
|
||||
}
|
||||
7
compliance-graph/src/lib.rs
Normal file
7
compliance-graph/src/lib.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub mod graph;
|
||||
pub mod parsers;
|
||||
pub mod search;
|
||||
|
||||
pub use graph::engine::GraphEngine;
|
||||
pub use parsers::registry::ParserRegistry;
|
||||
pub use search::index::SymbolIndex;
|
||||
372
compliance-graph/src/parsers/javascript.rs
Normal file
372
compliance-graph/src/parsers/javascript.rs
Normal file
@@ -0,0 +1,372 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
|
||||
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
pub struct JavaScriptParser;
|
||||
|
||||
impl JavaScriptParser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn walk_tree(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
match node.kind() {
|
||||
"function_declaration" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
let is_entry = self.is_exported_function(&node, source);
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Function,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "javascript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: is_entry,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body, source, file_path, repo_id, graph_build_id, &qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
"class_declaration" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Class,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "javascript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// Extract superclass
|
||||
if let Some(heritage) = node.child_by_field_name("superclass") {
|
||||
let base_name = &source[heritage.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: qualified.clone(),
|
||||
target: base_name.to_string(),
|
||||
kind: CodeEdgeKind::Inherits,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.walk_children(
|
||||
body, source, file_path, repo_id, graph_build_id, Some(&qualified),
|
||||
output,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
"method_definition" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Method,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "javascript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body, source, file_path, repo_id, graph_build_id, &qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Arrow functions assigned to variables: const foo = () => {}
|
||||
"lexical_declaration" | "variable_declaration" => {
|
||||
self.extract_arrow_functions(
|
||||
node, source, file_path, repo_id, graph_build_id, parent_qualified, output,
|
||||
);
|
||||
}
|
||||
"import_statement" => {
|
||||
let text = &source[node.byte_range()];
|
||||
if let Some(module) = self.extract_import_source(text) {
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: parent_qualified.unwrap_or(file_path).to_string(),
|
||||
target: module,
|
||||
kind: CodeEdgeKind::Imports,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.walk_children(
|
||||
node,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
parent_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
|
||||
fn walk_children(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.walk_tree(
|
||||
child, source, file_path, repo_id, graph_build_id, parent_qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_calls(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
caller_qualified: &str,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
if node.kind() == "call_expression" {
|
||||
if let Some(func_node) = node.child_by_field_name("function") {
|
||||
let callee = &source[func_node.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: caller_qualified.to_string(),
|
||||
target: callee.to_string(),
|
||||
kind: CodeEdgeKind::Calls,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.extract_calls(
|
||||
child, source, file_path, repo_id, graph_build_id, caller_qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_arrow_functions(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "variable_declarator" {
|
||||
let name_node = child.child_by_field_name("name");
|
||||
let value_node = child.child_by_field_name("value");
|
||||
if let (Some(name_n), Some(value_n)) = (name_node, value_node) {
|
||||
if value_n.kind() == "arrow_function" || value_n.kind() == "function" {
|
||||
let name = &source[name_n.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Function,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: child.start_position().row as u32 + 1,
|
||||
end_line: child.end_position().row as u32 + 1,
|
||||
language: "javascript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
if let Some(body) = value_n.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body, source, file_path, repo_id, graph_build_id, &qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_exported_function(&self, node: &Node<'_>, source: &str) -> bool {
|
||||
if let Some(parent) = node.parent() {
|
||||
if parent.kind() == "export_statement" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Check for module.exports patterns
|
||||
if let Some(prev) = node.prev_sibling() {
|
||||
let text = &source[prev.byte_range()];
|
||||
if text.contains("module.exports") || text.contains("exports.") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn extract_import_source(&self, import_text: &str) -> Option<String> {
|
||||
// import ... from 'module' or import 'module'
|
||||
let from_idx = import_text.find("from ");
|
||||
let start = if let Some(idx) = from_idx {
|
||||
idx + 5
|
||||
} else {
|
||||
import_text.find("import ")? + 7
|
||||
};
|
||||
let rest = &import_text[start..];
|
||||
let module = rest
|
||||
.trim()
|
||||
.trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' ');
|
||||
if module.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(module.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageParser for JavaScriptParser {
|
||||
fn language(&self) -> &str {
|
||||
"javascript"
|
||||
}
|
||||
|
||||
fn extensions(&self) -> &[&str] {
|
||||
&["js", "jsx", "mjs", "cjs"]
|
||||
}
|
||||
|
||||
fn parse_file(
|
||||
&self,
|
||||
file_path: &Path,
|
||||
source: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<ParseOutput, CoreError> {
|
||||
let mut parser = Parser::new();
|
||||
let language = tree_sitter_javascript::LANGUAGE;
|
||||
parser
|
||||
.set_language(&language.into())
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to set JavaScript language: {e}")))?;
|
||||
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| CoreError::Graph("Failed to parse JavaScript file".to_string()))?;
|
||||
|
||||
let file_path_str = file_path.to_string_lossy().to_string();
|
||||
let mut output = ParseOutput::default();
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: file_path_str.clone(),
|
||||
name: file_path
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_default(),
|
||||
kind: CodeNodeKind::File,
|
||||
file_path: file_path_str.clone(),
|
||||
start_line: 1,
|
||||
end_line: source.lines().count() as u32,
|
||||
language: "javascript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
self.walk_tree(
|
||||
tree.root_node(),
|
||||
source,
|
||||
&file_path_str,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
None,
|
||||
&mut output,
|
||||
);
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
5
compliance-graph/src/parsers/mod.rs
Normal file
5
compliance-graph/src/parsers/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub mod javascript;
|
||||
pub mod python;
|
||||
pub mod registry;
|
||||
pub mod rust_parser;
|
||||
pub mod typescript;
|
||||
336
compliance-graph/src/parsers/python.rs
Normal file
336
compliance-graph/src/parsers/python.rs
Normal file
@@ -0,0 +1,336 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
|
||||
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
pub struct PythonParser;
|
||||
|
||||
impl PythonParser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn walk_tree(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
match node.kind() {
|
||||
"function_definition" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
let is_method = parent_qualified
|
||||
.map(|p| p.contains("class"))
|
||||
.unwrap_or(false);
|
||||
let kind = if is_method {
|
||||
CodeNodeKind::Method
|
||||
} else {
|
||||
CodeNodeKind::Function
|
||||
};
|
||||
|
||||
let is_entry = name == "__main__"
|
||||
|| name == "main"
|
||||
|| self.has_decorator(&node, source, "app.route")
|
||||
|| self.has_decorator(&node, source, "app.get")
|
||||
|| self.has_decorator(&node, source, "app.post");
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "python".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: is_entry,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// Extract calls in function body
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
&qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
"class_definition" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Class,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "python".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// Extract superclasses
|
||||
if let Some(bases) = node.child_by_field_name("superclasses") {
|
||||
self.extract_inheritance(
|
||||
bases,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
&qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
|
||||
// Walk methods
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.walk_children(
|
||||
body,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
Some(&qualified),
|
||||
output,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
"import_statement" | "import_from_statement" => {
|
||||
let import_text = &source[node.byte_range()];
|
||||
if let Some(module) = self.extract_import_module(import_text) {
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: parent_qualified.unwrap_or(file_path).to_string(),
|
||||
target: module,
|
||||
kind: CodeEdgeKind::Imports,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.walk_children(
|
||||
node,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
parent_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
|
||||
fn walk_children(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.walk_tree(
|
||||
child,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
parent_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_calls(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
caller_qualified: &str,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
if node.kind() == "call" {
|
||||
if let Some(func_node) = node.child_by_field_name("function") {
|
||||
let callee = &source[func_node.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: caller_qualified.to_string(),
|
||||
target: callee.to_string(),
|
||||
kind: CodeEdgeKind::Calls,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.extract_calls(
|
||||
child,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
caller_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_inheritance(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
class_qualified: &str,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "identifier" || child.kind() == "attribute" {
|
||||
let base_name = &source[child.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: class_qualified.to_string(),
|
||||
target: base_name.to_string(),
|
||||
kind: CodeEdgeKind::Inherits,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn has_decorator(&self, node: &Node<'_>, source: &str, decorator_name: &str) -> bool {
|
||||
if let Some(prev) = node.prev_sibling() {
|
||||
if prev.kind() == "decorator" {
|
||||
let text = &source[prev.byte_range()];
|
||||
return text.contains(decorator_name);
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn extract_import_module(&self, import_text: &str) -> Option<String> {
|
||||
if let Some(rest) = import_text.strip_prefix("from ") {
|
||||
// "from foo.bar import baz" -> "foo.bar"
|
||||
let module = rest.split_whitespace().next()?;
|
||||
Some(module.to_string())
|
||||
} else if let Some(rest) = import_text.strip_prefix("import ") {
|
||||
let module = rest.trim().trim_end_matches(';');
|
||||
Some(module.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageParser for PythonParser {
|
||||
fn language(&self) -> &str {
|
||||
"python"
|
||||
}
|
||||
|
||||
fn extensions(&self) -> &[&str] {
|
||||
&["py"]
|
||||
}
|
||||
|
||||
fn parse_file(
|
||||
&self,
|
||||
file_path: &Path,
|
||||
source: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<ParseOutput, CoreError> {
|
||||
let mut parser = Parser::new();
|
||||
let language = tree_sitter_python::LANGUAGE;
|
||||
parser
|
||||
.set_language(&language.into())
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to set Python language: {e}")))?;
|
||||
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| CoreError::Graph("Failed to parse Python file".to_string()))?;
|
||||
|
||||
let file_path_str = file_path.to_string_lossy().to_string();
|
||||
let mut output = ParseOutput::default();
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: file_path_str.clone(),
|
||||
name: file_path
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_default(),
|
||||
kind: CodeNodeKind::File,
|
||||
file_path: file_path_str.clone(),
|
||||
start_line: 1,
|
||||
end_line: source.lines().count() as u32,
|
||||
language: "python".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
self.walk_tree(
|
||||
tree.root_node(),
|
||||
source,
|
||||
&file_path_str,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
None,
|
||||
&mut output,
|
||||
);
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
182
compliance-graph/src/parsers/registry.rs
Normal file
182
compliance-graph/src/parsers/registry.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
||||
use tracing::info;
|
||||
|
||||
use super::javascript::JavaScriptParser;
|
||||
use super::python::PythonParser;
|
||||
use super::rust_parser::RustParser;
|
||||
use super::typescript::TypeScriptParser;
|
||||
|
||||
/// Registry of language parsers, indexed by file extension
|
||||
pub struct ParserRegistry {
|
||||
parsers: Vec<Box<dyn LanguageParser>>,
|
||||
extension_map: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl ParserRegistry {
|
||||
/// Create a registry with all built-in parsers
|
||||
pub fn new() -> Self {
|
||||
let parsers: Vec<Box<dyn LanguageParser>> = vec![
|
||||
Box::new(RustParser::new()),
|
||||
Box::new(PythonParser::new()),
|
||||
Box::new(JavaScriptParser::new()),
|
||||
Box::new(TypeScriptParser::new()),
|
||||
];
|
||||
|
||||
let mut extension_map = HashMap::new();
|
||||
for (idx, parser) in parsers.iter().enumerate() {
|
||||
for ext in parser.extensions() {
|
||||
extension_map.insert(ext.to_string(), idx);
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
parsers,
|
||||
extension_map,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a file extension is supported
|
||||
pub fn supports_extension(&self, ext: &str) -> bool {
|
||||
self.extension_map.contains_key(ext)
|
||||
}
|
||||
|
||||
/// Get supported extensions
|
||||
pub fn supported_extensions(&self) -> Vec<&str> {
|
||||
self.extension_map.keys().map(|s| s.as_str()).collect()
|
||||
}
|
||||
|
||||
/// Parse a file, selecting the appropriate parser by extension
|
||||
pub fn parse_file(
|
||||
&self,
|
||||
file_path: &Path,
|
||||
source: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<Option<ParseOutput>, CoreError> {
|
||||
let ext = file_path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("");
|
||||
|
||||
let parser_idx = match self.extension_map.get(ext) {
|
||||
Some(idx) => *idx,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let parser = &self.parsers[parser_idx];
|
||||
info!(
|
||||
file = %file_path.display(),
|
||||
language = parser.language(),
|
||||
"Parsing file"
|
||||
);
|
||||
|
||||
let output = parser.parse_file(file_path, source, repo_id, graph_build_id)?;
|
||||
Ok(Some(output))
|
||||
}
|
||||
|
||||
/// Parse all supported files in a directory tree
|
||||
pub fn parse_directory(
|
||||
&self,
|
||||
dir: &Path,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
max_nodes: u32,
|
||||
) -> Result<ParseOutput, CoreError> {
|
||||
let mut combined = ParseOutput::default();
|
||||
let mut node_count: u32 = 0;
|
||||
|
||||
self.walk_directory(dir, dir, repo_id, graph_build_id, max_nodes, &mut node_count, &mut combined)?;
|
||||
|
||||
info!(
|
||||
nodes = combined.nodes.len(),
|
||||
edges = combined.edges.len(),
|
||||
"Directory parsing complete"
|
||||
);
|
||||
|
||||
Ok(combined)
|
||||
}
|
||||
|
||||
fn walk_directory(
|
||||
&self,
|
||||
base: &Path,
|
||||
dir: &Path,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
max_nodes: u32,
|
||||
node_count: &mut u32,
|
||||
combined: &mut ParseOutput,
|
||||
) -> Result<(), CoreError> {
|
||||
let entries = std::fs::read_dir(dir).map_err(|e| {
|
||||
CoreError::Graph(format!("Failed to read directory {}: {e}", dir.display()))
|
||||
})?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry.map_err(|e| CoreError::Graph(format!("Dir entry error: {e}")))?;
|
||||
let path = entry.path();
|
||||
|
||||
// Skip hidden directories and common non-source dirs
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
if name.starts_with('.')
|
||||
|| name == "node_modules"
|
||||
|| name == "target"
|
||||
|| name == "__pycache__"
|
||||
|| name == "vendor"
|
||||
|| name == "dist"
|
||||
|| name == "build"
|
||||
|| name == ".git"
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if path.is_dir() {
|
||||
self.walk_directory(
|
||||
base,
|
||||
&path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
max_nodes,
|
||||
node_count,
|
||||
combined,
|
||||
)?;
|
||||
} else if path.is_file() {
|
||||
if *node_count >= max_nodes {
|
||||
info!(max_nodes, "Reached node limit, stopping parse");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
|
||||
if !self.supports_extension(ext) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Use relative path from base
|
||||
let rel_path = path.strip_prefix(base).unwrap_or(&path);
|
||||
|
||||
let source = match std::fs::read_to_string(&path) {
|
||||
Ok(s) => s,
|
||||
Err(_) => continue, // Skip binary/unreadable files
|
||||
};
|
||||
|
||||
if let Some(output) = self.parse_file(rel_path, &source, repo_id, graph_build_id)?
|
||||
{
|
||||
*node_count += output.nodes.len() as u32;
|
||||
combined.nodes.extend(output.nodes);
|
||||
combined.edges.extend(output.edges);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ParserRegistry {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
426
compliance-graph/src/parsers/rust_parser.rs
Normal file
426
compliance-graph/src/parsers/rust_parser.rs
Normal file
@@ -0,0 +1,426 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
|
||||
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
pub struct RustParser;
|
||||
|
||||
impl RustParser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn walk_tree(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
match node.kind() {
|
||||
"function_item" | "function_signature_item" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}::{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
let is_entry = name == "main"
|
||||
|| self.has_attribute(&node, source, "test")
|
||||
|| self.has_attribute(&node, source, "tokio::main")
|
||||
|| self.has_pub_visibility(&node, source);
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Function,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "rust".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: is_entry,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// Extract function calls within the body
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
&qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
"struct_item" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}::{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified,
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Struct,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "rust".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
"enum_item" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}::{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified,
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Enum,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "rust".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
"trait_item" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}::{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Trait,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "rust".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// Parse methods inside the trait
|
||||
self.walk_children(
|
||||
node,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
Some(&qualified),
|
||||
output,
|
||||
);
|
||||
return; // Don't walk children again
|
||||
}
|
||||
}
|
||||
"impl_item" => {
|
||||
// Extract impl target type for qualified naming
|
||||
let impl_name = self.extract_impl_type(&node, source);
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}::{impl_name}"),
|
||||
None => format!("{file_path}::{impl_name}"),
|
||||
};
|
||||
|
||||
// Check for trait impl (impl Trait for Type)
|
||||
if let Some(trait_node) = node.child_by_field_name("trait") {
|
||||
let trait_name = &source[trait_node.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: qualified.clone(),
|
||||
target: trait_name.to_string(),
|
||||
kind: CodeEdgeKind::Implements,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
|
||||
// Walk methods inside impl block
|
||||
self.walk_children(
|
||||
node,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
Some(&qualified),
|
||||
output,
|
||||
);
|
||||
return;
|
||||
}
|
||||
"use_declaration" => {
|
||||
let use_text = &source[node.byte_range()];
|
||||
// Extract the imported path
|
||||
if let Some(path) = self.extract_use_path(use_text) {
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: parent_qualified
|
||||
.unwrap_or(file_path)
|
||||
.to_string(),
|
||||
target: path,
|
||||
kind: CodeEdgeKind::Imports,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
"mod_item" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}::{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Module,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "rust".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// If it has a body (inline module), walk it
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.walk_children(
|
||||
body,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
Some(&qualified),
|
||||
output,
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Default: walk children
|
||||
self.walk_children(
|
||||
node,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
parent_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
|
||||
fn walk_children(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.walk_tree(
|
||||
child,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
parent_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_calls(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
caller_qualified: &str,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
if node.kind() == "call_expression" {
|
||||
if let Some(func_node) = node.child_by_field_name("function") {
|
||||
let callee = &source[func_node.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: caller_qualified.to_string(),
|
||||
target: callee.to_string(),
|
||||
kind: CodeEdgeKind::Calls,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.extract_calls(
|
||||
child,
|
||||
source,
|
||||
file_path,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
caller_qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn has_attribute(&self, node: &Node<'_>, source: &str, attr_name: &str) -> bool {
|
||||
if let Some(prev) = node.prev_sibling() {
|
||||
if prev.kind() == "attribute_item" || prev.kind() == "attribute" {
|
||||
let text = &source[prev.byte_range()];
|
||||
return text.contains(attr_name);
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn has_pub_visibility(&self, node: &Node<'_>, source: &str) -> bool {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "visibility_modifier" {
|
||||
let text = &source[child.byte_range()];
|
||||
return text == "pub";
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn extract_impl_type(&self, node: &Node<'_>, source: &str) -> String {
|
||||
if let Some(type_node) = node.child_by_field_name("type") {
|
||||
return source[type_node.byte_range()].to_string();
|
||||
}
|
||||
"unknown".to_string()
|
||||
}
|
||||
|
||||
fn extract_use_path(&self, use_text: &str) -> Option<String> {
|
||||
// "use foo::bar::baz;" -> "foo::bar::baz"
|
||||
let trimmed = use_text
|
||||
.strip_prefix("use ")?
|
||||
.trim_end_matches(';')
|
||||
.trim();
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageParser for RustParser {
|
||||
fn language(&self) -> &str {
|
||||
"rust"
|
||||
}
|
||||
|
||||
fn extensions(&self) -> &[&str] {
|
||||
&["rs"]
|
||||
}
|
||||
|
||||
fn parse_file(
|
||||
&self,
|
||||
file_path: &Path,
|
||||
source: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<ParseOutput, CoreError> {
|
||||
let mut parser = Parser::new();
|
||||
let language = tree_sitter_rust::LANGUAGE;
|
||||
parser
|
||||
.set_language(&language.into())
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to set Rust language: {e}")))?;
|
||||
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| CoreError::Graph("Failed to parse Rust file".to_string()))?;
|
||||
|
||||
let file_path_str = file_path.to_string_lossy().to_string();
|
||||
let mut output = ParseOutput::default();
|
||||
|
||||
// Add file node
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: file_path_str.clone(),
|
||||
name: file_path
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_default(),
|
||||
kind: CodeNodeKind::File,
|
||||
file_path: file_path_str.clone(),
|
||||
start_line: 1,
|
||||
end_line: source.lines().count() as u32,
|
||||
language: "rust".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
self.walk_tree(
|
||||
tree.root_node(),
|
||||
source,
|
||||
&file_path_str,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
None,
|
||||
&mut output,
|
||||
);
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
419
compliance-graph/src/parsers/typescript.rs
Normal file
419
compliance-graph/src/parsers/typescript.rs
Normal file
@@ -0,0 +1,419 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
|
||||
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
pub struct TypeScriptParser;
|
||||
|
||||
impl TypeScriptParser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn walk_tree(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
match node.kind() {
|
||||
"function_declaration" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Function,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "typescript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: self.is_exported(&node),
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body, source, file_path, repo_id, graph_build_id, &qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
"class_declaration" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Class,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "typescript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
// Heritage clause (extends/implements)
|
||||
self.extract_heritage(
|
||||
&node, source, file_path, repo_id, graph_build_id, &qualified, output,
|
||||
);
|
||||
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.walk_children(
|
||||
body, source, file_path, repo_id, graph_build_id, Some(&qualified),
|
||||
output,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
"interface_declaration" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Interface,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "typescript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
"method_definition" | "public_field_definition" => {
|
||||
if let Some(name_node) = node.child_by_field_name("name") {
|
||||
let name = &source[name_node.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Method,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: node.start_position().row as u32 + 1,
|
||||
end_line: node.end_position().row as u32 + 1,
|
||||
language: "typescript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
if let Some(body) = node.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body, source, file_path, repo_id, graph_build_id, &qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
"lexical_declaration" | "variable_declaration" => {
|
||||
self.extract_arrow_functions(
|
||||
node, source, file_path, repo_id, graph_build_id, parent_qualified, output,
|
||||
);
|
||||
}
|
||||
"import_statement" => {
|
||||
let text = &source[node.byte_range()];
|
||||
if let Some(module) = self.extract_import_source(text) {
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: parent_qualified.unwrap_or(file_path).to_string(),
|
||||
target: module,
|
||||
kind: CodeEdgeKind::Imports,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.walk_children(
|
||||
node, source, file_path, repo_id, graph_build_id, parent_qualified, output,
|
||||
);
|
||||
}
|
||||
|
||||
fn walk_children(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.walk_tree(
|
||||
child, source, file_path, repo_id, graph_build_id, parent_qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_calls(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
caller_qualified: &str,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
if node.kind() == "call_expression" {
|
||||
if let Some(func_node) = node.child_by_field_name("function") {
|
||||
let callee = &source[func_node.byte_range()];
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: caller_qualified.to_string(),
|
||||
target: callee.to_string(),
|
||||
kind: CodeEdgeKind::Calls,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(node.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
self.extract_calls(
|
||||
child, source, file_path, repo_id, graph_build_id, caller_qualified, output,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_arrow_functions(
|
||||
&self,
|
||||
node: Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
parent_qualified: Option<&str>,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "variable_declarator" {
|
||||
let name_node = child.child_by_field_name("name");
|
||||
let value_node = child.child_by_field_name("value");
|
||||
if let (Some(name_n), Some(value_n)) = (name_node, value_node) {
|
||||
if value_n.kind() == "arrow_function" || value_n.kind() == "function" {
|
||||
let name = &source[name_n.byte_range()];
|
||||
let qualified = match parent_qualified {
|
||||
Some(p) => format!("{p}.{name}"),
|
||||
None => format!("{file_path}::{name}"),
|
||||
};
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: qualified.clone(),
|
||||
name: name.to_string(),
|
||||
kind: CodeNodeKind::Function,
|
||||
file_path: file_path.to_string(),
|
||||
start_line: child.start_position().row as u32 + 1,
|
||||
end_line: child.end_position().row as u32 + 1,
|
||||
language: "typescript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
if let Some(body) = value_n.child_by_field_name("body") {
|
||||
self.extract_calls(
|
||||
body, source, file_path, repo_id, graph_build_id, &qualified,
|
||||
output,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_heritage(
|
||||
&self,
|
||||
node: &Node<'_>,
|
||||
source: &str,
|
||||
file_path: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
class_qualified: &str,
|
||||
output: &mut ParseOutput,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "class_heritage" {
|
||||
let text = &source[child.byte_range()];
|
||||
// "extends Base implements IFoo, IBar"
|
||||
if let Some(rest) = text.strip_prefix("extends ") {
|
||||
let base = rest.split_whitespace().next().unwrap_or(rest);
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: class_qualified.to_string(),
|
||||
target: base.trim_matches(',').to_string(),
|
||||
kind: CodeEdgeKind::Inherits,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(child.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
if text.contains("implements ") {
|
||||
if let Some(impl_part) = text.split("implements ").nth(1) {
|
||||
for iface in impl_part.split(',') {
|
||||
let iface = iface.trim();
|
||||
if !iface.is_empty() {
|
||||
output.edges.push(CodeEdge {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
source: class_qualified.to_string(),
|
||||
target: iface.to_string(),
|
||||
kind: CodeEdgeKind::Implements,
|
||||
file_path: file_path.to_string(),
|
||||
line_number: Some(child.start_position().row as u32 + 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_exported(&self, node: &Node<'_>) -> bool {
|
||||
if let Some(parent) = node.parent() {
|
||||
return parent.kind() == "export_statement";
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn extract_import_source(&self, import_text: &str) -> Option<String> {
|
||||
let from_idx = import_text.find("from ");
|
||||
let start = if let Some(idx) = from_idx {
|
||||
idx + 5
|
||||
} else {
|
||||
import_text.find("import ")? + 7
|
||||
};
|
||||
let rest = &import_text[start..];
|
||||
let module = rest
|
||||
.trim()
|
||||
.trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' ');
|
||||
if module.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(module.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageParser for TypeScriptParser {
|
||||
fn language(&self) -> &str {
|
||||
"typescript"
|
||||
}
|
||||
|
||||
fn extensions(&self) -> &[&str] {
|
||||
&["ts", "tsx"]
|
||||
}
|
||||
|
||||
fn parse_file(
|
||||
&self,
|
||||
file_path: &Path,
|
||||
source: &str,
|
||||
repo_id: &str,
|
||||
graph_build_id: &str,
|
||||
) -> Result<ParseOutput, CoreError> {
|
||||
let mut parser = Parser::new();
|
||||
let language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT;
|
||||
parser
|
||||
.set_language(&language.into())
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to set TypeScript language: {e}")))?;
|
||||
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| CoreError::Graph("Failed to parse TypeScript file".to_string()))?;
|
||||
|
||||
let file_path_str = file_path.to_string_lossy().to_string();
|
||||
let mut output = ParseOutput::default();
|
||||
|
||||
output.nodes.push(CodeNode {
|
||||
id: None,
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_build_id: graph_build_id.to_string(),
|
||||
qualified_name: file_path_str.clone(),
|
||||
name: file_path
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_default(),
|
||||
kind: CodeNodeKind::File,
|
||||
file_path: file_path_str.clone(),
|
||||
start_line: 1,
|
||||
end_line: source.lines().count() as u32,
|
||||
language: "typescript".to_string(),
|
||||
community_id: None,
|
||||
is_entry_point: false,
|
||||
graph_index: None,
|
||||
});
|
||||
|
||||
self.walk_tree(
|
||||
tree.root_node(),
|
||||
source,
|
||||
&file_path_str,
|
||||
repo_id,
|
||||
graph_build_id,
|
||||
None,
|
||||
&mut output,
|
||||
);
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
128
compliance-graph/src/search/index.rs
Normal file
128
compliance-graph/src/search/index.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
use compliance_core::error::CoreError;
|
||||
use compliance_core::models::graph::CodeNode;
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::{Schema, Value, STORED, TEXT};
|
||||
use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
|
||||
use tracing::info;
|
||||
|
||||
/// BM25 text search index over code symbols
|
||||
pub struct SymbolIndex {
|
||||
index: Index,
|
||||
#[allow(dead_code)]
|
||||
schema: Schema,
|
||||
qualified_name_field: tantivy::schema::Field,
|
||||
name_field: tantivy::schema::Field,
|
||||
kind_field: tantivy::schema::Field,
|
||||
file_path_field: tantivy::schema::Field,
|
||||
language_field: tantivy::schema::Field,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub qualified_name: String,
|
||||
pub name: String,
|
||||
pub kind: String,
|
||||
pub file_path: String,
|
||||
pub language: String,
|
||||
pub score: f32,
|
||||
}
|
||||
|
||||
impl SymbolIndex {
|
||||
/// Create a new in-memory symbol index
|
||||
pub fn new() -> Result<Self, CoreError> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let qualified_name_field = schema_builder.add_text_field("qualified_name", TEXT | STORED);
|
||||
let name_field = schema_builder.add_text_field("name", TEXT | STORED);
|
||||
let kind_field = schema_builder.add_text_field("kind", TEXT | STORED);
|
||||
let file_path_field = schema_builder.add_text_field("file_path", TEXT | STORED);
|
||||
let language_field = schema_builder.add_text_field("language", TEXT | STORED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
schema,
|
||||
qualified_name_field,
|
||||
name_field,
|
||||
kind_field,
|
||||
file_path_field,
|
||||
language_field,
|
||||
})
|
||||
}
|
||||
|
||||
/// Index a set of code nodes
|
||||
pub fn index_nodes(&self, nodes: &[CodeNode]) -> Result<(), CoreError> {
|
||||
let mut writer: IndexWriter = self
|
||||
.index
|
||||
.writer(50_000_000)
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to create index writer: {e}")))?;
|
||||
|
||||
for node in nodes {
|
||||
writer
|
||||
.add_document(doc!(
|
||||
self.qualified_name_field => node.qualified_name.as_str(),
|
||||
self.name_field => node.name.as_str(),
|
||||
self.kind_field => node.kind.to_string(),
|
||||
self.file_path_field => node.file_path.as_str(),
|
||||
self.language_field => node.language.as_str(),
|
||||
))
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to add document: {e}")))?;
|
||||
}
|
||||
|
||||
writer
|
||||
.commit()
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to commit index: {e}")))?;
|
||||
|
||||
info!(nodes = nodes.len(), "Symbol index built");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Search for symbols matching a query
|
||||
pub fn search(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>, CoreError> {
|
||||
let reader = self
|
||||
.index
|
||||
.reader_builder()
|
||||
.reload_policy(ReloadPolicy::Manual)
|
||||
.try_into()
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to create reader: {e}")))?;
|
||||
|
||||
let searcher = reader.searcher();
|
||||
let query_parser =
|
||||
QueryParser::for_index(&self.index, vec![self.name_field, self.qualified_name_field]);
|
||||
|
||||
let query = query_parser
|
||||
.parse_query(query_str)
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to parse query: {e}")))?;
|
||||
|
||||
let top_docs = searcher
|
||||
.search(&query, &TopDocs::with_limit(limit))
|
||||
.map_err(|e| CoreError::Graph(format!("Search failed: {e}")))?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
for (score, doc_address) in top_docs {
|
||||
let doc: tantivy::TantivyDocument = searcher
|
||||
.doc(doc_address)
|
||||
.map_err(|e| CoreError::Graph(format!("Failed to retrieve doc: {e}")))?;
|
||||
|
||||
let get_field = |field: tantivy::schema::Field| -> String {
|
||||
doc.get_first(field)
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
};
|
||||
|
||||
results.push(SearchResult {
|
||||
qualified_name: get_field(self.qualified_name_field),
|
||||
name: get_field(self.name_field),
|
||||
kind: get_field(self.kind_field),
|
||||
file_path: get_field(self.file_path_field),
|
||||
language: get_field(self.language_field),
|
||||
score,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
1
compliance-graph/src/search/mod.rs
Normal file
1
compliance-graph/src/search/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod index;
|
||||
Reference in New Issue
Block a user