All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s
314 lines
9.5 KiB
Rust
314 lines
9.5 KiB
Rust
use compliance_core::error::CoreError;
|
|
use compliance_core::models::graph::CodeNode;
|
|
use tantivy::collector::TopDocs;
|
|
use tantivy::query::QueryParser;
|
|
use tantivy::schema::{Schema, Value, STORED, TEXT};
|
|
use tantivy::{doc, Index, IndexWriter, ReloadPolicy};
|
|
use tracing::info;
|
|
|
|
/// BM25 text search index over code symbols
|
|
pub struct SymbolIndex {
|
|
index: Index,
|
|
#[allow(dead_code)]
|
|
schema: Schema,
|
|
qualified_name_field: tantivy::schema::Field,
|
|
name_field: tantivy::schema::Field,
|
|
kind_field: tantivy::schema::Field,
|
|
file_path_field: tantivy::schema::Field,
|
|
language_field: tantivy::schema::Field,
|
|
}
|
|
|
|
#[derive(Debug, Clone, serde::Serialize)]
|
|
pub struct SearchResult {
|
|
pub qualified_name: String,
|
|
pub name: String,
|
|
pub kind: String,
|
|
pub file_path: String,
|
|
pub language: String,
|
|
pub score: f32,
|
|
}
|
|
|
|
impl SymbolIndex {
|
|
/// Create a new in-memory symbol index
|
|
pub fn new() -> Result<Self, CoreError> {
|
|
let mut schema_builder = Schema::builder();
|
|
let qualified_name_field = schema_builder.add_text_field("qualified_name", TEXT | STORED);
|
|
let name_field = schema_builder.add_text_field("name", TEXT | STORED);
|
|
let kind_field = schema_builder.add_text_field("kind", TEXT | STORED);
|
|
let file_path_field = schema_builder.add_text_field("file_path", TEXT | STORED);
|
|
let language_field = schema_builder.add_text_field("language", TEXT | STORED);
|
|
let schema = schema_builder.build();
|
|
|
|
let index = Index::create_in_ram(schema.clone());
|
|
|
|
Ok(Self {
|
|
index,
|
|
schema,
|
|
qualified_name_field,
|
|
name_field,
|
|
kind_field,
|
|
file_path_field,
|
|
language_field,
|
|
})
|
|
}
|
|
|
|
/// Index a set of code nodes
|
|
pub fn index_nodes(&self, nodes: &[CodeNode]) -> Result<(), CoreError> {
|
|
let mut writer: IndexWriter = self
|
|
.index
|
|
.writer(50_000_000)
|
|
.map_err(|e| CoreError::Graph(format!("Failed to create index writer: {e}")))?;
|
|
|
|
for node in nodes {
|
|
writer
|
|
.add_document(doc!(
|
|
self.qualified_name_field => node.qualified_name.as_str(),
|
|
self.name_field => node.name.as_str(),
|
|
self.kind_field => node.kind.to_string(),
|
|
self.file_path_field => node.file_path.as_str(),
|
|
self.language_field => node.language.as_str(),
|
|
))
|
|
.map_err(|e| CoreError::Graph(format!("Failed to add document: {e}")))?;
|
|
}
|
|
|
|
writer
|
|
.commit()
|
|
.map_err(|e| CoreError::Graph(format!("Failed to commit index: {e}")))?;
|
|
|
|
info!(nodes = nodes.len(), "Symbol index built");
|
|
Ok(())
|
|
}
|
|
|
|
/// Search for symbols matching a query
|
|
pub fn search(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>, CoreError> {
|
|
let reader = self
|
|
.index
|
|
.reader_builder()
|
|
.reload_policy(ReloadPolicy::Manual)
|
|
.try_into()
|
|
.map_err(|e| CoreError::Graph(format!("Failed to create reader: {e}")))?;
|
|
|
|
let searcher = reader.searcher();
|
|
let query_parser = QueryParser::for_index(
|
|
&self.index,
|
|
vec![self.name_field, self.qualified_name_field],
|
|
);
|
|
|
|
let query = query_parser
|
|
.parse_query(query_str)
|
|
.map_err(|e| CoreError::Graph(format!("Failed to parse query: {e}")))?;
|
|
|
|
let top_docs = searcher
|
|
.search(&query, &TopDocs::with_limit(limit))
|
|
.map_err(|e| CoreError::Graph(format!("Search failed: {e}")))?;
|
|
|
|
let mut results = Vec::new();
|
|
for (score, doc_address) in top_docs {
|
|
let doc: tantivy::TantivyDocument = searcher
|
|
.doc(doc_address)
|
|
.map_err(|e| CoreError::Graph(format!("Failed to retrieve doc: {e}")))?;
|
|
|
|
let get_field = |field: tantivy::schema::Field| -> String {
|
|
doc.get_first(field)
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("")
|
|
.to_string()
|
|
};
|
|
|
|
results.push(SearchResult {
|
|
qualified_name: get_field(self.qualified_name_field),
|
|
name: get_field(self.name_field),
|
|
kind: get_field(self.kind_field),
|
|
file_path: get_field(self.file_path_field),
|
|
language: get_field(self.language_field),
|
|
score,
|
|
});
|
|
}
|
|
|
|
Ok(results)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use compliance_core::models::graph::CodeNodeKind;
|
|
|
|
fn make_node(
|
|
qualified_name: &str,
|
|
name: &str,
|
|
kind: CodeNodeKind,
|
|
file_path: &str,
|
|
language: &str,
|
|
) -> CodeNode {
|
|
CodeNode {
|
|
id: None,
|
|
repo_id: "test".to_string(),
|
|
graph_build_id: "build1".to_string(),
|
|
qualified_name: qualified_name.to_string(),
|
|
name: name.to_string(),
|
|
kind,
|
|
file_path: file_path.to_string(),
|
|
start_line: 1,
|
|
end_line: 10,
|
|
language: language.to_string(),
|
|
community_id: None,
|
|
is_entry_point: false,
|
|
graph_index: None,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_new_creates_index() {
|
|
let index = SymbolIndex::new();
|
|
assert!(index.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_index_empty_nodes() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let result = index.index_nodes(&[]);
|
|
assert!(result.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_index_and_search_single_node() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let nodes = vec![make_node(
|
|
"src/main.rs::main",
|
|
"main",
|
|
CodeNodeKind::Function,
|
|
"src/main.rs",
|
|
"rust",
|
|
)];
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
let results = index.search("main", 10).unwrap();
|
|
assert!(!results.is_empty());
|
|
assert_eq!(results[0].name, "main");
|
|
assert_eq!(results[0].qualified_name, "src/main.rs::main");
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_no_results() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let nodes = vec![make_node(
|
|
"src/main.rs::foo",
|
|
"foo",
|
|
CodeNodeKind::Function,
|
|
"src/main.rs",
|
|
"rust",
|
|
)];
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
let results = index.search("zzzznonexistent", 10).unwrap();
|
|
assert!(results.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_multiple_nodes() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let nodes = vec![
|
|
make_node(
|
|
"a.rs::handle_request",
|
|
"handle_request",
|
|
CodeNodeKind::Function,
|
|
"a.rs",
|
|
"rust",
|
|
),
|
|
make_node(
|
|
"b.rs::handle_response",
|
|
"handle_response",
|
|
CodeNodeKind::Function,
|
|
"b.rs",
|
|
"rust",
|
|
),
|
|
make_node(
|
|
"c.rs::process_data",
|
|
"process_data",
|
|
CodeNodeKind::Function,
|
|
"c.rs",
|
|
"rust",
|
|
),
|
|
];
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
let results = index.search("handle", 10).unwrap();
|
|
assert!(results.len() >= 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_limit() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let mut nodes = Vec::new();
|
|
for i in 0..20 {
|
|
nodes.push(make_node(
|
|
&format!("mod::func_{i}"),
|
|
&format!("func_{i}"),
|
|
CodeNodeKind::Function,
|
|
"mod.rs",
|
|
"rust",
|
|
));
|
|
}
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
let results = index.search("func", 5).unwrap();
|
|
assert!(results.len() <= 5);
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_result_has_score() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let nodes = vec![make_node(
|
|
"src/lib.rs::compute",
|
|
"compute",
|
|
CodeNodeKind::Function,
|
|
"src/lib.rs",
|
|
"rust",
|
|
)];
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
let results = index.search("compute", 10).unwrap();
|
|
assert!(!results.is_empty());
|
|
assert!(results[0].score > 0.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_result_fields() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let nodes = vec![make_node(
|
|
"src/app.py::MyClass",
|
|
"MyClass",
|
|
CodeNodeKind::Class,
|
|
"src/app.py",
|
|
"python",
|
|
)];
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
let results = index.search("MyClass", 10).unwrap();
|
|
assert_eq!(results.len(), 1);
|
|
assert_eq!(results[0].name, "MyClass");
|
|
assert_eq!(results[0].kind, "class");
|
|
assert_eq!(results[0].file_path, "src/app.py");
|
|
assert_eq!(results[0].language, "python");
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_empty_query() {
|
|
let index = SymbolIndex::new().unwrap();
|
|
let nodes = vec![make_node(
|
|
"src/lib.rs::foo",
|
|
"foo",
|
|
CodeNodeKind::Function,
|
|
"src/lib.rs",
|
|
"rust",
|
|
)];
|
|
index.index_nodes(&nodes).unwrap();
|
|
|
|
// Empty query may parse error or return empty - both acceptable
|
|
let result = index.search("", 10);
|
|
// Just verify it doesn't panic
|
|
let _ = result;
|
|
}
|
|
}
|