use std::collections::HashMap; use std::path::Path; use compliance_core::error::CoreError; use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput}; use tracing::info; use super::javascript::JavaScriptParser; use super::python::PythonParser; use super::rust_parser::RustParser; use super::typescript::TypeScriptParser; /// Registry of language parsers, indexed by file extension pub struct ParserRegistry { parsers: Vec>, extension_map: HashMap, } impl ParserRegistry { /// Create a registry with all built-in parsers pub fn new() -> Self { let parsers: Vec> = vec![ Box::new(RustParser::new()), Box::new(PythonParser::new()), Box::new(JavaScriptParser::new()), Box::new(TypeScriptParser::new()), ]; let mut extension_map = HashMap::new(); for (idx, parser) in parsers.iter().enumerate() { for ext in parser.extensions() { extension_map.insert(ext.to_string(), idx); } } Self { parsers, extension_map, } } /// Check if a file extension is supported pub fn supports_extension(&self, ext: &str) -> bool { self.extension_map.contains_key(ext) } /// Get supported extensions pub fn supported_extensions(&self) -> Vec<&str> { self.extension_map.keys().map(|s| s.as_str()).collect() } /// Parse a file, selecting the appropriate parser by extension pub fn parse_file( &self, file_path: &Path, source: &str, repo_id: &str, graph_build_id: &str, ) -> Result, CoreError> { let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or(""); let parser_idx = match self.extension_map.get(ext) { Some(idx) => *idx, None => return Ok(None), }; let parser = &self.parsers[parser_idx]; info!( file = %file_path.display(), language = parser.language(), "Parsing file" ); let output = parser.parse_file(file_path, source, repo_id, graph_build_id)?; Ok(Some(output)) } /// Parse all supported files in a directory tree pub fn parse_directory( &self, dir: &Path, repo_id: &str, graph_build_id: &str, max_nodes: u32, ) -> Result { let mut combined = ParseOutput::default(); let mut node_count: u32 = 0; self.walk_directory( dir, dir, repo_id, graph_build_id, max_nodes, &mut node_count, &mut combined, )?; info!( nodes = combined.nodes.len(), edges = combined.edges.len(), "Directory parsing complete" ); Ok(combined) } fn walk_directory( &self, base: &Path, dir: &Path, repo_id: &str, graph_build_id: &str, max_nodes: u32, node_count: &mut u32, combined: &mut ParseOutput, ) -> Result<(), CoreError> { let entries = std::fs::read_dir(dir).map_err(|e| { CoreError::Graph(format!("Failed to read directory {}: {e}", dir.display())) })?; for entry in entries { let entry = entry.map_err(|e| CoreError::Graph(format!("Dir entry error: {e}")))?; let path = entry.path(); // Skip hidden directories and common non-source dirs if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if name.starts_with('.') || name == "node_modules" || name == "target" || name == "__pycache__" || name == "vendor" || name == "dist" || name == "build" || name == ".git" { continue; } } if path.is_dir() { self.walk_directory( base, &path, repo_id, graph_build_id, max_nodes, node_count, combined, )?; } else if path.is_file() { if *node_count >= max_nodes { info!(max_nodes, "Reached node limit, stopping parse"); return Ok(()); } let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); if !self.supports_extension(ext) { continue; } // Use relative path from base let rel_path = path.strip_prefix(base).unwrap_or(&path); let source = match std::fs::read_to_string(&path) { Ok(s) => s, Err(_) => continue, // Skip binary/unreadable files }; if let Some(output) = self.parse_file(rel_path, &source, repo_id, graph_build_id)? { *node_count += output.nodes.len() as u32; combined.nodes.extend(output.nodes); combined.edges.extend(output.edges); } } } Ok(()) } } impl Default for ParserRegistry { fn default() -> Self { Self::new() } }