Some checks failed
CI / Format (push) Successful in 2s
CI / Clippy (push) Failing after 1m23s
CI / Security Audit (push) Has been skipped
CI / Tests (push) Has been skipped
CI / Clippy (pull_request) Failing after 1m18s
CI / Security Audit (pull_request) Has been skipped
CI / Tests (pull_request) Has been skipped
CI / Format (pull_request) Successful in 3s
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
187 lines
5.4 KiB
Rust
187 lines
5.4 KiB
Rust
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
|
|
use compliance_core::error::CoreError;
|
|
use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput};
|
|
use tracing::info;
|
|
|
|
use super::javascript::JavaScriptParser;
|
|
use super::python::PythonParser;
|
|
use super::rust_parser::RustParser;
|
|
use super::typescript::TypeScriptParser;
|
|
|
|
/// Registry of language parsers, indexed by file extension
|
|
pub struct ParserRegistry {
|
|
parsers: Vec<Box<dyn LanguageParser>>,
|
|
extension_map: HashMap<String, usize>,
|
|
}
|
|
|
|
impl ParserRegistry {
|
|
/// Create a registry with all built-in parsers
|
|
pub fn new() -> Self {
|
|
let parsers: Vec<Box<dyn LanguageParser>> = vec![
|
|
Box::new(RustParser::new()),
|
|
Box::new(PythonParser::new()),
|
|
Box::new(JavaScriptParser::new()),
|
|
Box::new(TypeScriptParser::new()),
|
|
];
|
|
|
|
let mut extension_map = HashMap::new();
|
|
for (idx, parser) in parsers.iter().enumerate() {
|
|
for ext in parser.extensions() {
|
|
extension_map.insert(ext.to_string(), idx);
|
|
}
|
|
}
|
|
|
|
Self {
|
|
parsers,
|
|
extension_map,
|
|
}
|
|
}
|
|
|
|
/// Check if a file extension is supported
|
|
pub fn supports_extension(&self, ext: &str) -> bool {
|
|
self.extension_map.contains_key(ext)
|
|
}
|
|
|
|
/// Get supported extensions
|
|
pub fn supported_extensions(&self) -> Vec<&str> {
|
|
self.extension_map.keys().map(|s| s.as_str()).collect()
|
|
}
|
|
|
|
/// Parse a file, selecting the appropriate parser by extension
|
|
pub fn parse_file(
|
|
&self,
|
|
file_path: &Path,
|
|
source: &str,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
) -> Result<Option<ParseOutput>, CoreError> {
|
|
let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
|
|
|
|
let parser_idx = match self.extension_map.get(ext) {
|
|
Some(idx) => *idx,
|
|
None => return Ok(None),
|
|
};
|
|
|
|
let parser = &self.parsers[parser_idx];
|
|
info!(
|
|
file = %file_path.display(),
|
|
language = parser.language(),
|
|
"Parsing file"
|
|
);
|
|
|
|
let output = parser.parse_file(file_path, source, repo_id, graph_build_id)?;
|
|
Ok(Some(output))
|
|
}
|
|
|
|
/// Parse all supported files in a directory tree
|
|
pub fn parse_directory(
|
|
&self,
|
|
dir: &Path,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
max_nodes: u32,
|
|
) -> Result<ParseOutput, CoreError> {
|
|
let mut combined = ParseOutput::default();
|
|
let mut node_count: u32 = 0;
|
|
|
|
self.walk_directory(
|
|
dir,
|
|
dir,
|
|
repo_id,
|
|
graph_build_id,
|
|
max_nodes,
|
|
&mut node_count,
|
|
&mut combined,
|
|
)?;
|
|
|
|
info!(
|
|
nodes = combined.nodes.len(),
|
|
edges = combined.edges.len(),
|
|
"Directory parsing complete"
|
|
);
|
|
|
|
Ok(combined)
|
|
}
|
|
|
|
fn walk_directory(
|
|
&self,
|
|
base: &Path,
|
|
dir: &Path,
|
|
repo_id: &str,
|
|
graph_build_id: &str,
|
|
max_nodes: u32,
|
|
node_count: &mut u32,
|
|
combined: &mut ParseOutput,
|
|
) -> Result<(), CoreError> {
|
|
let entries = std::fs::read_dir(dir).map_err(|e| {
|
|
CoreError::Graph(format!("Failed to read directory {}: {e}", dir.display()))
|
|
})?;
|
|
|
|
for entry in entries {
|
|
let entry = entry.map_err(|e| CoreError::Graph(format!("Dir entry error: {e}")))?;
|
|
let path = entry.path();
|
|
|
|
// Skip hidden directories and common non-source dirs
|
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
|
if name.starts_with('.')
|
|
|| name == "node_modules"
|
|
|| name == "target"
|
|
|| name == "__pycache__"
|
|
|| name == "vendor"
|
|
|| name == "dist"
|
|
|| name == "build"
|
|
|| name == ".git"
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if path.is_dir() {
|
|
self.walk_directory(
|
|
base,
|
|
&path,
|
|
repo_id,
|
|
graph_build_id,
|
|
max_nodes,
|
|
node_count,
|
|
combined,
|
|
)?;
|
|
} else if path.is_file() {
|
|
if *node_count >= max_nodes {
|
|
info!(max_nodes, "Reached node limit, stopping parse");
|
|
return Ok(());
|
|
}
|
|
|
|
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
|
|
if !self.supports_extension(ext) {
|
|
continue;
|
|
}
|
|
|
|
// Use relative path from base
|
|
let rel_path = path.strip_prefix(base).unwrap_or(&path);
|
|
|
|
let source = match std::fs::read_to_string(&path) {
|
|
Ok(s) => s,
|
|
Err(_) => continue, // Skip binary/unreadable files
|
|
};
|
|
|
|
if let Some(output) = self.parse_file(rel_path, &source, repo_id, graph_build_id)? {
|
|
*node_count += output.nodes.len() as u32;
|
|
combined.nodes.extend(output.nodes);
|
|
combined.edges.extend(output.edges);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl Default for ParserRegistry {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|