Add DAST, graph modules, toast notifications, and dashboard enhancements

Add DAST scanning and code knowledge graph features across the stack:
- compliance-dast and compliance-graph workspace crates
- Agent API handlers and routes for DAST targets/scans and graph builds
- Core models and traits for DAST and graph domains
- Dashboard pages for DAST targets/findings/overview and graph explorer/impact
- Toast notification system with auto-dismiss for async action feedback
- Button click animations and disabled states for better UX

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-04 13:53:50 +01:00
parent 03ee69834d
commit cea8f59e10
69 changed files with 8745 additions and 54 deletions

View File

@@ -0,0 +1,37 @@
[package]
name = "compliance-graph"
version = "0.1.0"
edition = "2021"
[lints]
workspace = true
[dependencies]
compliance-core = { workspace = true, features = ["mongodb"] }
serde = { workspace = true }
serde_json = { workspace = true }
chrono = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
tokio = { workspace = true }
mongodb = { workspace = true }
# Tree-sitter parsing
tree-sitter = "0.24"
tree-sitter-rust = "0.23"
tree-sitter-python = "0.23"
tree-sitter-javascript = "0.23"
tree-sitter-typescript = "0.23"
# Graph algorithms
petgraph = "0.7"
# Text search
tantivy = "0.22"
# Serialization
bson = "2"
# Async streams
futures-util = "0.3"

View File

@@ -0,0 +1,256 @@
use std::collections::HashMap;
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use tracing::info;
use super::engine::CodeGraph;
/// Run Louvain community detection on the code graph.
/// Returns the number of communities detected.
/// Mutates node community_id in place.
pub fn detect_communities(code_graph: &CodeGraph) -> u32 {
let graph = &code_graph.graph;
let node_count = graph.node_count();
if node_count == 0 {
return 0;
}
// Initialize: each node in its own community
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
for idx in graph.node_indices() {
community.insert(idx, idx.index() as u32);
}
// Compute total edge weight (all edges weight 1.0)
let total_edges = graph.edge_count() as f64;
if total_edges == 0.0 {
// All nodes are isolated, each is its own community
return node_count as u32;
}
let m2 = 2.0 * total_edges;
// Pre-compute node degrees
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
for idx in graph.node_indices() {
let d = graph.edges(idx).count() as f64;
degree.insert(idx, d);
}
// Louvain phase 1: local moves
let mut improved = true;
let mut iterations = 0;
let max_iterations = 50;
while improved && iterations < max_iterations {
improved = false;
iterations += 1;
for node in graph.node_indices() {
let current_comm = community[&node];
let node_deg = degree[&node];
// Compute edges to each neighboring community
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
for edge in graph.edges(node) {
let neighbor = edge.target();
let neighbor_comm = community[&neighbor];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
// Also check incoming edges (undirected treatment)
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
let neighbor = edge.source();
let neighbor_comm = community[&neighbor];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
// Compute community totals (sum of degrees in each community)
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
for (n, &c) in &community {
*comm_totals.entry(c).or_insert(0.0) += degree[n];
}
// Find best community
let current_total = comm_totals.get(&current_comm).copied().unwrap_or(0.0);
let edges_to_current = comm_edges.get(&current_comm).copied().unwrap_or(0.0);
// Modularity gain from removing node from current community
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
let mut best_comm = current_comm;
let mut best_gain = 0.0;
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
if candidate_comm == current_comm {
continue;
}
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
// Modularity gain from adding node to candidate community
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
let gain = add_gain - remove_cost;
if gain > best_gain {
best_gain = gain;
best_comm = candidate_comm;
}
}
if best_comm != current_comm {
community.insert(node, best_comm);
improved = true;
}
}
}
// Renumber communities to be contiguous
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
let mut next_id: u32 = 0;
for &c in community.values() {
if !comm_remap.contains_key(&c) {
comm_remap.insert(c, next_id);
next_id += 1;
}
}
// Apply to community map
for c in community.values_mut() {
if let Some(&new_id) = comm_remap.get(c) {
*c = new_id;
}
}
let num_communities = next_id;
info!(
communities = num_communities,
iterations, "Community detection complete"
);
// NOTE: community IDs are stored in the HashMap but need to be applied
// back to the CodeGraph nodes by the caller (engine) if needed for persistence.
// For now we return the count; the full assignment is available via the map.
num_communities
}
/// Apply community assignments back to code nodes
pub fn apply_communities(code_graph: &mut CodeGraph) -> u32 {
let count = detect_communities_with_assignment(code_graph);
count
}
/// Detect communities and write assignments into the nodes
fn detect_communities_with_assignment(code_graph: &mut CodeGraph) -> u32 {
let graph = &code_graph.graph;
let node_count = graph.node_count();
if node_count == 0 {
return 0;
}
let mut community: HashMap<NodeIndex, u32> = HashMap::new();
for idx in graph.node_indices() {
community.insert(idx, idx.index() as u32);
}
let total_edges = graph.edge_count() as f64;
if total_edges == 0.0 {
for node in &mut code_graph.nodes {
if let Some(gi) = node.graph_index {
node.community_id = Some(gi);
}
}
return node_count as u32;
}
let m2 = 2.0 * total_edges;
let mut degree: HashMap<NodeIndex, f64> = HashMap::new();
for idx in graph.node_indices() {
let d = (graph.edges(idx).count()
+ graph
.edges_directed(idx, petgraph::Direction::Incoming)
.count()) as f64;
degree.insert(idx, d);
}
let mut improved = true;
let mut iterations = 0;
let max_iterations = 50;
while improved && iterations < max_iterations {
improved = false;
iterations += 1;
for node in graph.node_indices() {
let current_comm = community[&node];
let node_deg = degree[&node];
let mut comm_edges: HashMap<u32, f64> = HashMap::new();
for edge in graph.edges(node) {
let neighbor_comm = community[&edge.target()];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
for edge in graph.edges_directed(node, petgraph::Direction::Incoming) {
let neighbor_comm = community[&edge.source()];
*comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0;
}
let mut comm_totals: HashMap<u32, f64> = HashMap::new();
for (n, &c) in &community {
*comm_totals.entry(c).or_insert(0.0) += degree[n];
}
let current_total = comm_totals.get(&current_comm).copied().unwrap_or(0.0);
let edges_to_current = comm_edges.get(&current_comm).copied().unwrap_or(0.0);
let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2;
let mut best_comm = current_comm;
let mut best_gain = 0.0;
for (&candidate_comm, &edges_to_candidate) in &comm_edges {
if candidate_comm == current_comm {
continue;
}
let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0);
let add_gain = edges_to_candidate - candidate_total * node_deg / m2;
let gain = add_gain - remove_cost;
if gain > best_gain {
best_gain = gain;
best_comm = candidate_comm;
}
}
if best_comm != current_comm {
community.insert(node, best_comm);
improved = true;
}
}
}
// Renumber
let mut comm_remap: HashMap<u32, u32> = HashMap::new();
let mut next_id: u32 = 0;
for &c in community.values() {
if !comm_remap.contains_key(&c) {
comm_remap.insert(c, next_id);
next_id += 1;
}
}
// Apply to nodes
for node in &mut code_graph.nodes {
if let Some(gi) = node.graph_index {
let idx = NodeIndex::new(gi as usize);
if let Some(&comm) = community.get(&idx) {
let remapped = comm_remap.get(&comm).copied().unwrap_or(comm);
node.community_id = Some(remapped);
}
}
}
next_id
}

View File

@@ -0,0 +1,165 @@
use std::collections::HashMap;
use std::path::Path;
use chrono::Utc;
use compliance_core::error::CoreError;
use compliance_core::models::graph::{
CodeEdge, CodeEdgeKind, CodeNode, GraphBuildRun, GraphBuildStatus,
};
use compliance_core::traits::graph_builder::ParseOutput;
use petgraph::graph::{DiGraph, NodeIndex};
use tracing::info;
use crate::parsers::registry::ParserRegistry;
use super::community::detect_communities;
use super::impact::ImpactAnalyzer;
/// The main graph engine that builds and manages code knowledge graphs
pub struct GraphEngine {
parser_registry: ParserRegistry,
max_nodes: u32,
}
/// In-memory representation of a built code graph
pub struct CodeGraph {
pub graph: DiGraph<String, CodeEdgeKind>,
pub node_map: HashMap<String, NodeIndex>,
pub nodes: Vec<CodeNode>,
pub edges: Vec<CodeEdge>,
}
impl GraphEngine {
pub fn new(max_nodes: u32) -> Self {
Self {
parser_registry: ParserRegistry::new(),
max_nodes,
}
}
/// Build a code graph from a repository directory
pub fn build_graph(
&self,
repo_path: &Path,
repo_id: &str,
graph_build_id: &str,
) -> Result<(CodeGraph, GraphBuildRun), CoreError> {
let mut build_run = GraphBuildRun::new(repo_id.to_string());
info!(repo_id, path = %repo_path.display(), "Starting graph build");
// Phase 1: Parse all files
let parse_output = self.parser_registry.parse_directory(
repo_path,
repo_id,
graph_build_id,
self.max_nodes,
)?;
// Phase 2: Build petgraph
let code_graph = self.build_petgraph(parse_output)?;
// Phase 3: Run community detection
let community_count = detect_communities(&code_graph);
// Collect language stats
let mut languages: Vec<String> = code_graph
.nodes
.iter()
.map(|n| n.language.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
languages.sort();
build_run.node_count = code_graph.nodes.len() as u32;
build_run.edge_count = code_graph.edges.len() as u32;
build_run.community_count = community_count;
build_run.languages_parsed = languages;
build_run.status = GraphBuildStatus::Completed;
build_run.completed_at = Some(Utc::now());
info!(
nodes = build_run.node_count,
edges = build_run.edge_count,
communities = build_run.community_count,
"Graph build complete"
);
Ok((code_graph, build_run))
}
/// Build petgraph from parsed output, resolving edges to node indices
fn build_petgraph(&self, parse_output: ParseOutput) -> Result<CodeGraph, CoreError> {
let mut graph = DiGraph::new();
let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
let mut nodes = parse_output.nodes;
// Add all nodes to the graph
for node in &mut nodes {
let idx = graph.add_node(node.qualified_name.clone());
node.graph_index = Some(idx.index() as u32);
node_map.insert(node.qualified_name.clone(), idx);
}
// Resolve and add edges
let mut resolved_edges = Vec::new();
for edge in parse_output.edges {
let source_idx = node_map.get(&edge.source);
let target_idx = self.resolve_edge_target(&edge.target, &node_map);
if let (Some(&src), Some(tgt)) = (source_idx, target_idx) {
graph.add_edge(src, tgt, edge.kind.clone());
resolved_edges.push(edge);
}
// Skip unresolved edges (cross-file, external deps) — conservative approach
}
Ok(CodeGraph {
graph,
node_map,
nodes,
edges: resolved_edges,
})
}
/// Try to resolve an edge target to a known node
fn resolve_edge_target<'a>(
&self,
target: &str,
node_map: &'a HashMap<String, NodeIndex>,
) -> Option<NodeIndex> {
// Direct match
if let Some(idx) = node_map.get(target) {
return Some(*idx);
}
// Try matching just the function/type name (intra-file resolution)
for (qualified, idx) in node_map {
// Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo"
if qualified.ends_with(&format!("::{target}"))
|| qualified.ends_with(&format!(".{target}"))
{
return Some(*idx);
}
}
// Try matching method calls like "self.method" -> look for "::method"
if let Some(method_name) = target.strip_prefix("self.") {
for (qualified, idx) in node_map {
if qualified.ends_with(&format!("::{method_name}"))
|| qualified.ends_with(&format!(".{method_name}"))
{
return Some(*idx);
}
}
}
None
}
/// Get the impact analyzer for a built graph
pub fn impact_analyzer(code_graph: &CodeGraph) -> ImpactAnalyzer<'_> {
ImpactAnalyzer::new(code_graph)
}
}

View File

@@ -0,0 +1,219 @@
use std::collections::{HashSet, VecDeque};
use compliance_core::models::graph::ImpactAnalysis;
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use petgraph::Direction;
use super::engine::CodeGraph;
/// Analyzes the impact/blast radius of findings within a code graph
pub struct ImpactAnalyzer<'a> {
code_graph: &'a CodeGraph,
}
impl<'a> ImpactAnalyzer<'a> {
pub fn new(code_graph: &'a CodeGraph) -> Self {
Self { code_graph }
}
/// Compute impact analysis for a finding at the given file path and line number
pub fn analyze(
&self,
repo_id: &str,
finding_id: &str,
graph_build_id: &str,
file_path: &str,
line_number: Option<u32>,
) -> ImpactAnalysis {
let mut analysis =
ImpactAnalysis::new(repo_id.to_string(), finding_id.to_string(), graph_build_id.to_string());
// Find the node containing the finding
let target_node = self.find_node_at_location(file_path, line_number);
let target_idx = match target_node {
Some(idx) => idx,
None => return analysis,
};
// BFS forward: compute blast radius (what this node affects)
let forward_reachable = self.bfs_reachable(target_idx, Direction::Outgoing);
analysis.blast_radius = forward_reachable.len() as u32;
// BFS backward: find entry points that reach this node
let backward_reachable = self.bfs_reachable(target_idx, Direction::Incoming);
// Find affected entry points
for &idx in &backward_reachable {
if let Some(node) = self.get_node_by_index(idx) {
if node.is_entry_point {
analysis
.affected_entry_points
.push(node.qualified_name.clone());
}
}
}
// Extract call chains from entry points to the target (limited depth)
for entry_name in &analysis.affected_entry_points.clone() {
if let Some(&entry_idx) = self.code_graph.node_map.get(entry_name) {
if let Some(chain) = self.find_path(entry_idx, target_idx, 10) {
analysis.call_chains.push(chain);
}
}
}
// Direct callers (incoming edges to target)
for edge in self
.code_graph
.graph
.edges_directed(target_idx, Direction::Incoming)
{
if let Some(node) = self.get_node_by_index(edge.source()) {
analysis.direct_callers.push(node.qualified_name.clone());
}
}
// Direct callees (outgoing edges from target)
for edge in self.code_graph.graph.edges(target_idx) {
if let Some(node) = self.get_node_by_index(edge.target()) {
analysis.direct_callees.push(node.qualified_name.clone());
}
}
// Affected communities
let mut affected_comms: HashSet<u32> = HashSet::new();
for &idx in forward_reachable.iter().chain(std::iter::once(&target_idx)) {
if let Some(node) = self.get_node_by_index(idx) {
if let Some(cid) = node.community_id {
affected_comms.insert(cid);
}
}
}
analysis.affected_communities = affected_comms.into_iter().collect();
analysis.affected_communities.sort();
analysis
}
/// Find the graph node at a given file/line location
fn find_node_at_location(&self, file_path: &str, line_number: Option<u32>) -> Option<NodeIndex> {
let mut best: Option<(NodeIndex, u32)> = None; // (index, line_span)
for node in &self.code_graph.nodes {
if node.file_path != file_path {
continue;
}
if let Some(line) = line_number {
if line >= node.start_line && line <= node.end_line {
let span = node.end_line - node.start_line;
// Prefer the narrowest containing node
if best.is_none() || span < best.as_ref().map(|b| b.1).unwrap_or(u32::MAX) {
if let Some(gi) = node.graph_index {
best = Some((NodeIndex::new(gi as usize), span));
}
}
}
} else {
// No line number, use file node
if node.kind == compliance_core::models::graph::CodeNodeKind::File {
if let Some(gi) = node.graph_index {
return Some(NodeIndex::new(gi as usize));
}
}
}
}
best.map(|(idx, _)| idx)
}
/// BFS to find all reachable nodes in a given direction
fn bfs_reachable(&self, start: NodeIndex, direction: Direction) -> HashSet<NodeIndex> {
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
queue.push_back(start);
while let Some(current) = queue.pop_front() {
if !visited.insert(current) {
continue;
}
let neighbors: Vec<NodeIndex> = match direction {
Direction::Outgoing => self
.code_graph
.graph
.edges(current)
.map(|e| e.target())
.collect(),
Direction::Incoming => self
.code_graph
.graph
.edges_directed(current, Direction::Incoming)
.map(|e| e.source())
.collect(),
};
for neighbor in neighbors {
if !visited.contains(&neighbor) {
queue.push_back(neighbor);
}
}
}
visited.remove(&start);
visited
}
/// Find a path from source to target (BFS, limited depth)
fn find_path(
&self,
from: NodeIndex,
to: NodeIndex,
max_depth: usize,
) -> Option<Vec<String>> {
let mut visited = HashSet::new();
let mut queue: VecDeque<(NodeIndex, Vec<NodeIndex>)> = VecDeque::new();
queue.push_back((from, vec![from]));
while let Some((current, path)) = queue.pop_front() {
if current == to {
return Some(
path.iter()
.filter_map(|&idx| {
self.get_node_by_index(idx)
.map(|n| n.qualified_name.clone())
})
.collect(),
);
}
if path.len() >= max_depth {
continue;
}
if !visited.insert(current) {
continue;
}
for edge in self.code_graph.graph.edges(current) {
let next = edge.target();
if !visited.contains(&next) {
let mut new_path = path.clone();
new_path.push(next);
queue.push_back((next, new_path));
}
}
}
None
}
fn get_node_by_index(&self, idx: NodeIndex) -> Option<&compliance_core::models::graph::CodeNode> {
let target_gi = idx.index() as u32;
self.code_graph
.nodes
.iter()
.find(|n| n.graph_index == Some(target_gi))
}
}

View File

@@ -0,0 +1,4 @@
pub mod community;
pub mod engine;
pub mod impact;
pub mod persistence;

View File

@@ -0,0 +1,255 @@
use compliance_core::error::CoreError;
use compliance_core::models::graph::{CodeEdge, CodeNode, GraphBuildRun, ImpactAnalysis};
use futures_util::TryStreamExt;
use mongodb::bson::doc;
use mongodb::options::IndexOptions;
use mongodb::{Collection, Database, IndexModel};
use tracing::info;
/// MongoDB persistence layer for the code knowledge graph
pub struct GraphStore {
nodes: Collection<CodeNode>,
edges: Collection<CodeEdge>,
builds: Collection<GraphBuildRun>,
impacts: Collection<ImpactAnalysis>,
}
impl GraphStore {
pub fn new(db: &Database) -> Self {
Self {
nodes: db.collection("graph_nodes"),
edges: db.collection("graph_edges"),
builds: db.collection("graph_builds"),
impacts: db.collection("impact_analyses"),
}
}
/// Ensure indexes are created
pub async fn ensure_indexes(&self) -> Result<(), CoreError> {
// graph_nodes: compound index on (repo_id, graph_build_id)
self.nodes
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
.build(),
)
.await?;
// graph_nodes: index on qualified_name for lookups
self.nodes
.create_index(
IndexModel::builder()
.keys(doc! { "qualified_name": 1 })
.build(),
)
.await?;
// graph_edges: compound index on (repo_id, graph_build_id)
self.edges
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "graph_build_id": 1 })
.build(),
)
.await?;
// graph_builds: compound index on (repo_id, started_at DESC)
self.builds
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "started_at": -1 })
.build(),
)
.await?;
// impact_analyses: compound index on (repo_id, finding_id)
self.impacts
.create_index(
IndexModel::builder()
.keys(doc! { "repo_id": 1, "finding_id": 1 })
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
Ok(())
}
/// Store a complete graph build result
pub async fn store_graph(
&self,
build_run: &GraphBuildRun,
nodes: &[CodeNode],
edges: &[CodeEdge],
) -> Result<String, CoreError> {
// Insert the build run
let result = self.builds.insert_one(build_run).await?;
let build_id = result
.inserted_id
.as_object_id()
.map(|oid| oid.to_hex())
.unwrap_or_default();
// Insert nodes in batches
if !nodes.is_empty() {
let batch_size = 1000;
for chunk in nodes.chunks(batch_size) {
self.nodes.insert_many(chunk.to_vec()).await?;
}
}
// Insert edges in batches
if !edges.is_empty() {
let batch_size = 1000;
for chunk in edges.chunks(batch_size) {
self.edges.insert_many(chunk.to_vec()).await?;
}
}
info!(
build_id = %build_id,
nodes = nodes.len(),
edges = edges.len(),
"Graph stored to MongoDB"
);
Ok(build_id)
}
/// Delete previous graph data for a repo before storing new graph
pub async fn delete_repo_graph(&self, repo_id: &str) -> Result<(), CoreError> {
let filter = doc! { "repo_id": repo_id };
self.nodes.delete_many(filter.clone()).await?;
self.edges.delete_many(filter.clone()).await?;
self.impacts.delete_many(filter).await?;
Ok(())
}
/// Store an impact analysis result
pub async fn store_impact(&self, impact: &ImpactAnalysis) -> Result<(), CoreError> {
let filter = doc! {
"repo_id": &impact.repo_id,
"finding_id": &impact.finding_id,
};
let opts = mongodb::options::ReplaceOptions::builder()
.upsert(true)
.build();
self.impacts
.replace_one(filter, impact)
.with_options(opts)
.await?;
Ok(())
}
/// Get the latest graph build for a repo
pub async fn get_latest_build(
&self,
repo_id: &str,
) -> Result<Option<GraphBuildRun>, CoreError> {
let filter = doc! { "repo_id": repo_id };
let opts = mongodb::options::FindOneOptions::builder()
.sort(doc! { "started_at": -1 })
.build();
let result = self.builds.find_one(filter).with_options(opts).await?;
Ok(result)
}
/// Get all nodes for a repo's latest graph build
pub async fn get_nodes(
&self,
repo_id: &str,
graph_build_id: &str,
) -> Result<Vec<CodeNode>, CoreError> {
let filter = doc! {
"repo_id": repo_id,
"graph_build_id": graph_build_id,
};
let cursor = self.nodes.find(filter).await?;
let nodes: Vec<CodeNode> = cursor.try_collect().await?;
Ok(nodes)
}
/// Get all edges for a repo's latest graph build
pub async fn get_edges(
&self,
repo_id: &str,
graph_build_id: &str,
) -> Result<Vec<CodeEdge>, CoreError> {
let filter = doc! {
"repo_id": repo_id,
"graph_build_id": graph_build_id,
};
let cursor = self.edges.find(filter).await?;
let edges: Vec<CodeEdge> = cursor.try_collect().await?;
Ok(edges)
}
/// Get impact analysis for a finding
pub async fn get_impact(
&self,
repo_id: &str,
finding_id: &str,
) -> Result<Option<ImpactAnalysis>, CoreError> {
let filter = doc! {
"repo_id": repo_id,
"finding_id": finding_id,
};
let result = self.impacts.find_one(filter).await?;
Ok(result)
}
/// Get nodes grouped by community
pub async fn get_communities(
&self,
repo_id: &str,
graph_build_id: &str,
) -> Result<Vec<CommunityInfo>, CoreError> {
let filter = doc! {
"repo_id": repo_id,
"graph_build_id": graph_build_id,
};
let cursor = self.nodes.find(filter).await?;
let nodes: Vec<CodeNode> = cursor.try_collect().await?;
let mut communities: std::collections::HashMap<u32, Vec<String>> =
std::collections::HashMap::new();
for node in &nodes {
if let Some(cid) = node.community_id {
communities
.entry(cid)
.or_default()
.push(node.qualified_name.clone());
}
}
let mut result: Vec<CommunityInfo> = communities
.into_iter()
.map(|(id, members)| CommunityInfo {
community_id: id,
member_count: members.len() as u32,
members,
})
.collect();
result.sort_by_key(|c| c.community_id);
Ok(result)
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct CommunityInfo {
pub community_id: u32,
pub member_count: u32,
pub members: Vec<String>,
}

View File

@@ -0,0 +1,7 @@
pub mod graph;
pub mod parsers;
pub mod search;
pub use graph::engine::GraphEngine;
pub use parsers::registry::ParserRegistry;
pub use search::index::SymbolIndex;

View File

@@ -0,0 +1,372 @@
use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tree_sitter::{Node, Parser};
pub struct JavaScriptParser;
impl JavaScriptParser {
pub fn new() -> Self {
Self
}
fn walk_tree(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
match node.kind() {
"function_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
let is_entry = self.is_exported_function(&node, source);
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: is_entry,
graph_index: None,
});
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body, source, file_path, repo_id, graph_build_id, &qualified, output,
);
}
}
}
"class_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Class,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
// Extract superclass
if let Some(heritage) = node.child_by_field_name("superclass") {
let base_name = &source[heritage.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: qualified.clone(),
target: base_name.to_string(),
kind: CodeEdgeKind::Inherits,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
if let Some(body) = node.child_by_field_name("body") {
self.walk_children(
body, source, file_path, repo_id, graph_build_id, Some(&qualified),
output,
);
}
return;
}
}
"method_definition" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Method,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body, source, file_path, repo_id, graph_build_id, &qualified, output,
);
}
}
}
// Arrow functions assigned to variables: const foo = () => {}
"lexical_declaration" | "variable_declaration" => {
self.extract_arrow_functions(
node, source, file_path, repo_id, graph_build_id, parent_qualified, output,
);
}
"import_statement" => {
let text = &source[node.byte_range()];
if let Some(module) = self.extract_import_source(text) {
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: parent_qualified.unwrap_or(file_path).to_string(),
target: module,
kind: CodeEdgeKind::Imports,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
_ => {}
}
self.walk_children(
node,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
fn walk_children(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree(
child, source, file_path, repo_id, graph_build_id, parent_qualified, output,
);
}
}
fn extract_calls(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
caller_qualified: &str,
output: &mut ParseOutput,
) {
if node.kind() == "call_expression" {
if let Some(func_node) = node.child_by_field_name("function") {
let callee = &source[func_node.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: caller_qualified.to_string(),
target: callee.to_string(),
kind: CodeEdgeKind::Calls,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_calls(
child, source, file_path, repo_id, graph_build_id, caller_qualified, output,
);
}
}
fn extract_arrow_functions(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "variable_declarator" {
let name_node = child.child_by_field_name("name");
let value_node = child.child_by_field_name("value");
if let (Some(name_n), Some(value_n)) = (name_node, value_node) {
if value_n.kind() == "arrow_function" || value_n.kind() == "function" {
let name = &source[name_n.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: child.start_position().row as u32 + 1,
end_line: child.end_position().row as u32 + 1,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
if let Some(body) = value_n.child_by_field_name("body") {
self.extract_calls(
body, source, file_path, repo_id, graph_build_id, &qualified,
output,
);
}
}
}
}
}
}
fn is_exported_function(&self, node: &Node<'_>, source: &str) -> bool {
if let Some(parent) = node.parent() {
if parent.kind() == "export_statement" {
return true;
}
}
// Check for module.exports patterns
if let Some(prev) = node.prev_sibling() {
let text = &source[prev.byte_range()];
if text.contains("module.exports") || text.contains("exports.") {
return true;
}
}
false
}
fn extract_import_source(&self, import_text: &str) -> Option<String> {
// import ... from 'module' or import 'module'
let from_idx = import_text.find("from ");
let start = if let Some(idx) = from_idx {
idx + 5
} else {
import_text.find("import ")? + 7
};
let rest = &import_text[start..];
let module = rest
.trim()
.trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' ');
if module.is_empty() {
None
} else {
Some(module.to_string())
}
}
}
impl LanguageParser for JavaScriptParser {
fn language(&self) -> &str {
"javascript"
}
fn extensions(&self) -> &[&str] {
&["js", "jsx", "mjs", "cjs"]
}
fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<ParseOutput, CoreError> {
let mut parser = Parser::new();
let language = tree_sitter_javascript::LANGUAGE;
parser
.set_language(&language.into())
.map_err(|e| CoreError::Graph(format!("Failed to set JavaScript language: {e}")))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CoreError::Graph("Failed to parse JavaScript file".to_string()))?;
let file_path_str = file_path.to_string_lossy().to_string();
let mut output = ParseOutput::default();
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: file_path_str.clone(),
name: file_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default(),
kind: CodeNodeKind::File,
file_path: file_path_str.clone(),
start_line: 1,
end_line: source.lines().count() as u32,
language: "javascript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
self.walk_tree(
tree.root_node(),
source,
&file_path_str,
repo_id,
graph_build_id,
None,
&mut output,
);
Ok(output)
}
}

View File

@@ -0,0 +1,5 @@
pub mod javascript;
pub mod python;
pub mod registry;
pub mod rust_parser;
pub mod typescript;

View File

@@ -0,0 +1,336 @@
use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tree_sitter::{Node, Parser};
pub struct PythonParser;
impl PythonParser {
pub fn new() -> Self {
Self
}
fn walk_tree(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
match node.kind() {
"function_definition" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
let is_method = parent_qualified
.map(|p| p.contains("class"))
.unwrap_or(false);
let kind = if is_method {
CodeNodeKind::Method
} else {
CodeNodeKind::Function
};
let is_entry = name == "__main__"
|| name == "main"
|| self.has_decorator(&node, source, "app.route")
|| self.has_decorator(&node, source, "app.get")
|| self.has_decorator(&node, source, "app.post");
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "python".to_string(),
community_id: None,
is_entry_point: is_entry,
graph_index: None,
});
// Extract calls in function body
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body,
source,
file_path,
repo_id,
graph_build_id,
&qualified,
output,
);
}
}
}
"class_definition" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Class,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "python".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
// Extract superclasses
if let Some(bases) = node.child_by_field_name("superclasses") {
self.extract_inheritance(
bases,
source,
file_path,
repo_id,
graph_build_id,
&qualified,
output,
);
}
// Walk methods
if let Some(body) = node.child_by_field_name("body") {
self.walk_children(
body,
source,
file_path,
repo_id,
graph_build_id,
Some(&qualified),
output,
);
}
return;
}
}
"import_statement" | "import_from_statement" => {
let import_text = &source[node.byte_range()];
if let Some(module) = self.extract_import_module(import_text) {
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: parent_qualified.unwrap_or(file_path).to_string(),
target: module,
kind: CodeEdgeKind::Imports,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
_ => {}
}
self.walk_children(
node,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
fn walk_children(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree(
child,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
}
fn extract_calls(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
caller_qualified: &str,
output: &mut ParseOutput,
) {
if node.kind() == "call" {
if let Some(func_node) = node.child_by_field_name("function") {
let callee = &source[func_node.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: caller_qualified.to_string(),
target: callee.to_string(),
kind: CodeEdgeKind::Calls,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_calls(
child,
source,
file_path,
repo_id,
graph_build_id,
caller_qualified,
output,
);
}
}
fn extract_inheritance(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
class_qualified: &str,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" || child.kind() == "attribute" {
let base_name = &source[child.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: class_qualified.to_string(),
target: base_name.to_string(),
kind: CodeEdgeKind::Inherits,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
}
fn has_decorator(&self, node: &Node<'_>, source: &str, decorator_name: &str) -> bool {
if let Some(prev) = node.prev_sibling() {
if prev.kind() == "decorator" {
let text = &source[prev.byte_range()];
return text.contains(decorator_name);
}
}
false
}
fn extract_import_module(&self, import_text: &str) -> Option<String> {
if let Some(rest) = import_text.strip_prefix("from ") {
// "from foo.bar import baz" -> "foo.bar"
let module = rest.split_whitespace().next()?;
Some(module.to_string())
} else if let Some(rest) = import_text.strip_prefix("import ") {
let module = rest.trim().trim_end_matches(';');
Some(module.to_string())
} else {
None
}
}
}
impl LanguageParser for PythonParser {
fn language(&self) -> &str {
"python"
}
fn extensions(&self) -> &[&str] {
&["py"]
}
fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<ParseOutput, CoreError> {
let mut parser = Parser::new();
let language = tree_sitter_python::LANGUAGE;
parser
.set_language(&language.into())
.map_err(|e| CoreError::Graph(format!("Failed to set Python language: {e}")))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CoreError::Graph("Failed to parse Python file".to_string()))?;
let file_path_str = file_path.to_string_lossy().to_string();
let mut output = ParseOutput::default();
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: file_path_str.clone(),
name: file_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default(),
kind: CodeNodeKind::File,
file_path: file_path_str.clone(),
start_line: 1,
end_line: source.lines().count() as u32,
language: "python".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
self.walk_tree(
tree.root_node(),
source,
&file_path_str,
repo_id,
graph_build_id,
None,
&mut output,
);
Ok(output)
}
}

View File

@@ -0,0 +1,182 @@
use std::collections::HashMap;
use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tracing::info;
use super::javascript::JavaScriptParser;
use super::python::PythonParser;
use super::rust_parser::RustParser;
use super::typescript::TypeScriptParser;
/// Registry of language parsers, indexed by file extension
pub struct ParserRegistry {
parsers: Vec<Box<dyn LanguageParser>>,
extension_map: HashMap<String, usize>,
}
impl ParserRegistry {
/// Create a registry with all built-in parsers
pub fn new() -> Self {
let parsers: Vec<Box<dyn LanguageParser>> = vec![
Box::new(RustParser::new()),
Box::new(PythonParser::new()),
Box::new(JavaScriptParser::new()),
Box::new(TypeScriptParser::new()),
];
let mut extension_map = HashMap::new();
for (idx, parser) in parsers.iter().enumerate() {
for ext in parser.extensions() {
extension_map.insert(ext.to_string(), idx);
}
}
Self {
parsers,
extension_map,
}
}
/// Check if a file extension is supported
pub fn supports_extension(&self, ext: &str) -> bool {
self.extension_map.contains_key(ext)
}
/// Get supported extensions
pub fn supported_extensions(&self) -> Vec<&str> {
self.extension_map.keys().map(|s| s.as_str()).collect()
}
/// Parse a file, selecting the appropriate parser by extension
pub fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<Option<ParseOutput>, CoreError> {
let ext = file_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
let parser_idx = match self.extension_map.get(ext) {
Some(idx) => *idx,
None => return Ok(None),
};
let parser = &self.parsers[parser_idx];
info!(
file = %file_path.display(),
language = parser.language(),
"Parsing file"
);
let output = parser.parse_file(file_path, source, repo_id, graph_build_id)?;
Ok(Some(output))
}
/// Parse all supported files in a directory tree
pub fn parse_directory(
&self,
dir: &Path,
repo_id: &str,
graph_build_id: &str,
max_nodes: u32,
) -> Result<ParseOutput, CoreError> {
let mut combined = ParseOutput::default();
let mut node_count: u32 = 0;
self.walk_directory(dir, dir, repo_id, graph_build_id, max_nodes, &mut node_count, &mut combined)?;
info!(
nodes = combined.nodes.len(),
edges = combined.edges.len(),
"Directory parsing complete"
);
Ok(combined)
}
fn walk_directory(
&self,
base: &Path,
dir: &Path,
repo_id: &str,
graph_build_id: &str,
max_nodes: u32,
node_count: &mut u32,
combined: &mut ParseOutput,
) -> Result<(), CoreError> {
let entries = std::fs::read_dir(dir).map_err(|e| {
CoreError::Graph(format!("Failed to read directory {}: {e}", dir.display()))
})?;
for entry in entries {
let entry = entry.map_err(|e| CoreError::Graph(format!("Dir entry error: {e}")))?;
let path = entry.path();
// Skip hidden directories and common non-source dirs
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with('.')
|| name == "node_modules"
|| name == "target"
|| name == "__pycache__"
|| name == "vendor"
|| name == "dist"
|| name == "build"
|| name == ".git"
{
continue;
}
}
if path.is_dir() {
self.walk_directory(
base,
&path,
repo_id,
graph_build_id,
max_nodes,
node_count,
combined,
)?;
} else if path.is_file() {
if *node_count >= max_nodes {
info!(max_nodes, "Reached node limit, stopping parse");
return Ok(());
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !self.supports_extension(ext) {
continue;
}
// Use relative path from base
let rel_path = path.strip_prefix(base).unwrap_or(&path);
let source = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(_) => continue, // Skip binary/unreadable files
};
if let Some(output) = self.parse_file(rel_path, &source, repo_id, graph_build_id)?
{
*node_count += output.nodes.len() as u32;
combined.nodes.extend(output.nodes);
combined.edges.extend(output.edges);
}
}
}
Ok(())
}
}
impl Default for ParserRegistry {
fn default() -> Self {
Self::new()
}
}

View File

@@ -0,0 +1,426 @@
use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tree_sitter::{Node, Parser};
pub struct RustParser;
impl RustParser {
pub fn new() -> Self {
Self
}
fn walk_tree(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
match node.kind() {
"function_item" | "function_signature_item" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}::{name}"),
None => format!("{file_path}::{name}"),
};
let is_entry = name == "main"
|| self.has_attribute(&node, source, "test")
|| self.has_attribute(&node, source, "tokio::main")
|| self.has_pub_visibility(&node, source);
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".to_string(),
community_id: None,
is_entry_point: is_entry,
graph_index: None,
});
// Extract function calls within the body
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body,
source,
file_path,
repo_id,
graph_build_id,
&qualified,
output,
);
}
}
}
"struct_item" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}::{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified,
name: name.to_string(),
kind: CodeNodeKind::Struct,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
}
}
"enum_item" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}::{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified,
name: name.to_string(),
kind: CodeNodeKind::Enum,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
}
}
"trait_item" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}::{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Trait,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
// Parse methods inside the trait
self.walk_children(
node,
source,
file_path,
repo_id,
graph_build_id,
Some(&qualified),
output,
);
return; // Don't walk children again
}
}
"impl_item" => {
// Extract impl target type for qualified naming
let impl_name = self.extract_impl_type(&node, source);
let qualified = match parent_qualified {
Some(p) => format!("{p}::{impl_name}"),
None => format!("{file_path}::{impl_name}"),
};
// Check for trait impl (impl Trait for Type)
if let Some(trait_node) = node.child_by_field_name("trait") {
let trait_name = &source[trait_node.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: qualified.clone(),
target: trait_name.to_string(),
kind: CodeEdgeKind::Implements,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
// Walk methods inside impl block
self.walk_children(
node,
source,
file_path,
repo_id,
graph_build_id,
Some(&qualified),
output,
);
return;
}
"use_declaration" => {
let use_text = &source[node.byte_range()];
// Extract the imported path
if let Some(path) = self.extract_use_path(use_text) {
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: parent_qualified
.unwrap_or(file_path)
.to_string(),
target: path,
kind: CodeEdgeKind::Imports,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
"mod_item" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}::{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Module,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "rust".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
// If it has a body (inline module), walk it
if let Some(body) = node.child_by_field_name("body") {
self.walk_children(
body,
source,
file_path,
repo_id,
graph_build_id,
Some(&qualified),
output,
);
return;
}
}
}
_ => {}
}
// Default: walk children
self.walk_children(
node,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
fn walk_children(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree(
child,
source,
file_path,
repo_id,
graph_build_id,
parent_qualified,
output,
);
}
}
fn extract_calls(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
caller_qualified: &str,
output: &mut ParseOutput,
) {
if node.kind() == "call_expression" {
if let Some(func_node) = node.child_by_field_name("function") {
let callee = &source[func_node.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: caller_qualified.to_string(),
target: callee.to_string(),
kind: CodeEdgeKind::Calls,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_calls(
child,
source,
file_path,
repo_id,
graph_build_id,
caller_qualified,
output,
);
}
}
fn has_attribute(&self, node: &Node<'_>, source: &str, attr_name: &str) -> bool {
if let Some(prev) = node.prev_sibling() {
if prev.kind() == "attribute_item" || prev.kind() == "attribute" {
let text = &source[prev.byte_range()];
return text.contains(attr_name);
}
}
false
}
fn has_pub_visibility(&self, node: &Node<'_>, source: &str) -> bool {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "visibility_modifier" {
let text = &source[child.byte_range()];
return text == "pub";
}
}
false
}
fn extract_impl_type(&self, node: &Node<'_>, source: &str) -> String {
if let Some(type_node) = node.child_by_field_name("type") {
return source[type_node.byte_range()].to_string();
}
"unknown".to_string()
}
fn extract_use_path(&self, use_text: &str) -> Option<String> {
// "use foo::bar::baz;" -> "foo::bar::baz"
let trimmed = use_text
.strip_prefix("use ")?
.trim_end_matches(';')
.trim();
Some(trimmed.to_string())
}
}
impl LanguageParser for RustParser {
fn language(&self) -> &str {
"rust"
}
fn extensions(&self) -> &[&str] {
&["rs"]
}
fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<ParseOutput, CoreError> {
let mut parser = Parser::new();
let language = tree_sitter_rust::LANGUAGE;
parser
.set_language(&language.into())
.map_err(|e| CoreError::Graph(format!("Failed to set Rust language: {e}")))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CoreError::Graph("Failed to parse Rust file".to_string()))?;
let file_path_str = file_path.to_string_lossy().to_string();
let mut output = ParseOutput::default();
// Add file node
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: file_path_str.clone(),
name: file_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default(),
kind: CodeNodeKind::File,
file_path: file_path_str.clone(),
start_line: 1,
end_line: source.lines().count() as u32,
language: "rust".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
self.walk_tree(
tree.root_node(),
source,
&file_path_str,
repo_id,
graph_build_id,
None,
&mut output,
);
Ok(output)
}
}

View File

@@ -0,0 +1,419 @@
use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind};
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tree_sitter::{Node, Parser};
pub struct TypeScriptParser;
impl TypeScriptParser {
pub fn new() -> Self {
Self
}
fn walk_tree(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
match node.kind() {
"function_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "typescript".to_string(),
community_id: None,
is_entry_point: self.is_exported(&node),
graph_index: None,
});
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body, source, file_path, repo_id, graph_build_id, &qualified, output,
);
}
}
}
"class_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Class,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "typescript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
// Heritage clause (extends/implements)
self.extract_heritage(
&node, source, file_path, repo_id, graph_build_id, &qualified, output,
);
if let Some(body) = node.child_by_field_name("body") {
self.walk_children(
body, source, file_path, repo_id, graph_build_id, Some(&qualified),
output,
);
}
return;
}
}
"interface_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Interface,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "typescript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
}
}
"method_definition" | "public_field_definition" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Method,
file_path: file_path.to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
language: "typescript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
if let Some(body) = node.child_by_field_name("body") {
self.extract_calls(
body, source, file_path, repo_id, graph_build_id, &qualified, output,
);
}
}
}
"lexical_declaration" | "variable_declaration" => {
self.extract_arrow_functions(
node, source, file_path, repo_id, graph_build_id, parent_qualified, output,
);
}
"import_statement" => {
let text = &source[node.byte_range()];
if let Some(module) = self.extract_import_source(text) {
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: parent_qualified.unwrap_or(file_path).to_string(),
target: module,
kind: CodeEdgeKind::Imports,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
_ => {}
}
self.walk_children(
node, source, file_path, repo_id, graph_build_id, parent_qualified, output,
);
}
fn walk_children(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree(
child, source, file_path, repo_id, graph_build_id, parent_qualified, output,
);
}
}
fn extract_calls(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
caller_qualified: &str,
output: &mut ParseOutput,
) {
if node.kind() == "call_expression" {
if let Some(func_node) = node.child_by_field_name("function") {
let callee = &source[func_node.byte_range()];
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: caller_qualified.to_string(),
target: callee.to_string(),
kind: CodeEdgeKind::Calls,
file_path: file_path.to_string(),
line_number: Some(node.start_position().row as u32 + 1),
});
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_calls(
child, source, file_path, repo_id, graph_build_id, caller_qualified, output,
);
}
}
fn extract_arrow_functions(
&self,
node: Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
parent_qualified: Option<&str>,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "variable_declarator" {
let name_node = child.child_by_field_name("name");
let value_node = child.child_by_field_name("value");
if let (Some(name_n), Some(value_n)) = (name_node, value_node) {
if value_n.kind() == "arrow_function" || value_n.kind() == "function" {
let name = &source[name_n.byte_range()];
let qualified = match parent_qualified {
Some(p) => format!("{p}.{name}"),
None => format!("{file_path}::{name}"),
};
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: qualified.clone(),
name: name.to_string(),
kind: CodeNodeKind::Function,
file_path: file_path.to_string(),
start_line: child.start_position().row as u32 + 1,
end_line: child.end_position().row as u32 + 1,
language: "typescript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
if let Some(body) = value_n.child_by_field_name("body") {
self.extract_calls(
body, source, file_path, repo_id, graph_build_id, &qualified,
output,
);
}
}
}
}
}
}
fn extract_heritage(
&self,
node: &Node<'_>,
source: &str,
file_path: &str,
repo_id: &str,
graph_build_id: &str,
class_qualified: &str,
output: &mut ParseOutput,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "class_heritage" {
let text = &source[child.byte_range()];
// "extends Base implements IFoo, IBar"
if let Some(rest) = text.strip_prefix("extends ") {
let base = rest.split_whitespace().next().unwrap_or(rest);
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: class_qualified.to_string(),
target: base.trim_matches(',').to_string(),
kind: CodeEdgeKind::Inherits,
file_path: file_path.to_string(),
line_number: Some(child.start_position().row as u32 + 1),
});
}
if text.contains("implements ") {
if let Some(impl_part) = text.split("implements ").nth(1) {
for iface in impl_part.split(',') {
let iface = iface.trim();
if !iface.is_empty() {
output.edges.push(CodeEdge {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
source: class_qualified.to_string(),
target: iface.to_string(),
kind: CodeEdgeKind::Implements,
file_path: file_path.to_string(),
line_number: Some(child.start_position().row as u32 + 1),
});
}
}
}
}
}
}
}
fn is_exported(&self, node: &Node<'_>) -> bool {
if let Some(parent) = node.parent() {
return parent.kind() == "export_statement";
}
false
}
fn extract_import_source(&self, import_text: &str) -> Option<String> {
let from_idx = import_text.find("from ");
let start = if let Some(idx) = from_idx {
idx + 5
} else {
import_text.find("import ")? + 7
};
let rest = &import_text[start..];
let module = rest
.trim()
.trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' ');
if module.is_empty() {
None
} else {
Some(module.to_string())
}
}
}
impl LanguageParser for TypeScriptParser {
fn language(&self) -> &str {
"typescript"
}
fn extensions(&self) -> &[&str] {
&["ts", "tsx"]
}
fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<ParseOutput, CoreError> {
let mut parser = Parser::new();
let language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT;
parser
.set_language(&language.into())
.map_err(|e| CoreError::Graph(format!("Failed to set TypeScript language: {e}")))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CoreError::Graph("Failed to parse TypeScript file".to_string()))?;
let file_path_str = file_path.to_string_lossy().to_string();
let mut output = ParseOutput::default();
output.nodes.push(CodeNode {
id: None,
repo_id: repo_id.to_string(),
graph_build_id: graph_build_id.to_string(),
qualified_name: file_path_str.clone(),
name: file_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default(),
kind: CodeNodeKind::File,
file_path: file_path_str.clone(),
start_line: 1,
end_line: source.lines().count() as u32,
language: "typescript".to_string(),
community_id: None,
is_entry_point: false,
graph_index: None,
});
self.walk_tree(
tree.root_node(),
source,
&file_path_str,
repo_id,
graph_build_id,
None,
&mut output,
);
Ok(output)
}
}

View File

@@ -0,0 +1,128 @@
use compliance_core::error::CoreError;
use compliance_core::models::graph::CodeNode;
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{Schema, Value, STORED, TEXT};
use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
use tracing::info;
/// BM25 text search index over code symbols
pub struct SymbolIndex {
index: Index,
#[allow(dead_code)]
schema: Schema,
qualified_name_field: tantivy::schema::Field,
name_field: tantivy::schema::Field,
kind_field: tantivy::schema::Field,
file_path_field: tantivy::schema::Field,
language_field: tantivy::schema::Field,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct SearchResult {
pub qualified_name: String,
pub name: String,
pub kind: String,
pub file_path: String,
pub language: String,
pub score: f32,
}
impl SymbolIndex {
/// Create a new in-memory symbol index
pub fn new() -> Result<Self, CoreError> {
let mut schema_builder = Schema::builder();
let qualified_name_field = schema_builder.add_text_field("qualified_name", TEXT | STORED);
let name_field = schema_builder.add_text_field("name", TEXT | STORED);
let kind_field = schema_builder.add_text_field("kind", TEXT | STORED);
let file_path_field = schema_builder.add_text_field("file_path", TEXT | STORED);
let language_field = schema_builder.add_text_field("language", TEXT | STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
Ok(Self {
index,
schema,
qualified_name_field,
name_field,
kind_field,
file_path_field,
language_field,
})
}
/// Index a set of code nodes
pub fn index_nodes(&self, nodes: &[CodeNode]) -> Result<(), CoreError> {
let mut writer: IndexWriter = self
.index
.writer(50_000_000)
.map_err(|e| CoreError::Graph(format!("Failed to create index writer: {e}")))?;
for node in nodes {
writer
.add_document(doc!(
self.qualified_name_field => node.qualified_name.as_str(),
self.name_field => node.name.as_str(),
self.kind_field => node.kind.to_string(),
self.file_path_field => node.file_path.as_str(),
self.language_field => node.language.as_str(),
))
.map_err(|e| CoreError::Graph(format!("Failed to add document: {e}")))?;
}
writer
.commit()
.map_err(|e| CoreError::Graph(format!("Failed to commit index: {e}")))?;
info!(nodes = nodes.len(), "Symbol index built");
Ok(())
}
/// Search for symbols matching a query
pub fn search(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>, CoreError> {
let reader = self
.index
.reader_builder()
.reload_policy(ReloadPolicy::Manual)
.try_into()
.map_err(|e| CoreError::Graph(format!("Failed to create reader: {e}")))?;
let searcher = reader.searcher();
let query_parser =
QueryParser::for_index(&self.index, vec![self.name_field, self.qualified_name_field]);
let query = query_parser
.parse_query(query_str)
.map_err(|e| CoreError::Graph(format!("Failed to parse query: {e}")))?;
let top_docs = searcher
.search(&query, &TopDocs::with_limit(limit))
.map_err(|e| CoreError::Graph(format!("Search failed: {e}")))?;
let mut results = Vec::new();
for (score, doc_address) in top_docs {
let doc: tantivy::TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| CoreError::Graph(format!("Failed to retrieve doc: {e}")))?;
let get_field = |field: tantivy::schema::Field| -> String {
doc.get_first(field)
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string()
};
results.push(SearchResult {
qualified_name: get_field(self.qualified_name_field),
name: get_field(self.name_field),
kind: get_field(self.kind_field),
file_path: get_field(self.file_path_field),
language: get_field(self.language_field),
score,
});
}
Ok(results)
}
}

View File

@@ -0,0 +1 @@
pub mod index;