Files
compliance-scanner-agent/compliance-graph/src/parsers/registry.rs
Sharang Parnerkar c9dc96ad73
Some checks failed
CI / Format (push) Successful in 2s
CI / Clippy (push) Failing after 1m23s
CI / Security Audit (push) Has been skipped
CI / Tests (push) Has been skipped
CI / Clippy (pull_request) Failing after 1m18s
CI / Security Audit (pull_request) Has been skipped
CI / Tests (pull_request) Has been skipped
CI / Format (pull_request) Successful in 3s
Run cargo fmt across all crates
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 23:30:26 +01:00

187 lines
5.4 KiB
Rust

use std::collections::HashMap;
use std::path::Path;
use compliance_core::error::CoreError;
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
use tracing::info;
use super::javascript::JavaScriptParser;
use super::python::PythonParser;
use super::rust_parser::RustParser;
use super::typescript::TypeScriptParser;
/// Registry of language parsers, indexed by file extension
pub struct ParserRegistry {
parsers: Vec<Box<dyn LanguageParser>>,
extension_map: HashMap<String, usize>,
}
impl ParserRegistry {
/// Create a registry with all built-in parsers
pub fn new() -> Self {
let parsers: Vec<Box<dyn LanguageParser>> = vec![
Box::new(RustParser::new()),
Box::new(PythonParser::new()),
Box::new(JavaScriptParser::new()),
Box::new(TypeScriptParser::new()),
];
let mut extension_map = HashMap::new();
for (idx, parser) in parsers.iter().enumerate() {
for ext in parser.extensions() {
extension_map.insert(ext.to_string(), idx);
}
}
Self {
parsers,
extension_map,
}
}
/// Check if a file extension is supported
pub fn supports_extension(&self, ext: &str) -> bool {
self.extension_map.contains_key(ext)
}
/// Get supported extensions
pub fn supported_extensions(&self) -> Vec<&str> {
self.extension_map.keys().map(|s| s.as_str()).collect()
}
/// Parse a file, selecting the appropriate parser by extension
pub fn parse_file(
&self,
file_path: &Path,
source: &str,
repo_id: &str,
graph_build_id: &str,
) -> Result<Option<ParseOutput>, CoreError> {
let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
let parser_idx = match self.extension_map.get(ext) {
Some(idx) => *idx,
None => return Ok(None),
};
let parser = &self.parsers[parser_idx];
info!(
file = %file_path.display(),
language = parser.language(),
"Parsing file"
);
let output = parser.parse_file(file_path, source, repo_id, graph_build_id)?;
Ok(Some(output))
}
/// Parse all supported files in a directory tree
pub fn parse_directory(
&self,
dir: &Path,
repo_id: &str,
graph_build_id: &str,
max_nodes: u32,
) -> Result<ParseOutput, CoreError> {
let mut combined = ParseOutput::default();
let mut node_count: u32 = 0;
self.walk_directory(
dir,
dir,
repo_id,
graph_build_id,
max_nodes,
&mut node_count,
&mut combined,
)?;
info!(
nodes = combined.nodes.len(),
edges = combined.edges.len(),
"Directory parsing complete"
);
Ok(combined)
}
fn walk_directory(
&self,
base: &Path,
dir: &Path,
repo_id: &str,
graph_build_id: &str,
max_nodes: u32,
node_count: &mut u32,
combined: &mut ParseOutput,
) -> Result<(), CoreError> {
let entries = std::fs::read_dir(dir).map_err(|e| {
CoreError::Graph(format!("Failed to read directory {}: {e}", dir.display()))
})?;
for entry in entries {
let entry = entry.map_err(|e| CoreError::Graph(format!("Dir entry error: {e}")))?;
let path = entry.path();
// Skip hidden directories and common non-source dirs
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with('.')
|| name == "node_modules"
|| name == "target"
|| name == "__pycache__"
|| name == "vendor"
|| name == "dist"
|| name == "build"
|| name == ".git"
{
continue;
}
}
if path.is_dir() {
self.walk_directory(
base,
&path,
repo_id,
graph_build_id,
max_nodes,
node_count,
combined,
)?;
} else if path.is_file() {
if *node_count >= max_nodes {
info!(max_nodes, "Reached node limit, stopping parse");
return Ok(());
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !self.supports_extension(ext) {
continue;
}
// Use relative path from base
let rel_path = path.strip_prefix(base).unwrap_or(&path);
let source = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(_) => continue, // Skip binary/unreadable files
};
if let Some(output) = self.parse_file(rel_path, &source, repo_id, graph_build_id)? {
*node_count += output.nodes.len() as u32;
combined.nodes.extend(output.nodes);
combined.edges.extend(output.edges);
}
}
}
Ok(())
}
}
impl Default for ParserRegistry {
fn default() -> Self {
Self::new()
}
}