feat: rag-embedding-ai-chat (#1)
All checks were successful
CI / Format (push) Successful in 2s
CI / Clippy (push) Successful in 2m56s
CI / Security Audit (push) Successful in 1m25s
CI / Tests (push) Successful in 3m57s

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: #1
This commit was merged in pull request #1.
This commit is contained in:
2026-03-06 21:54:15 +00:00
parent db454867f3
commit 42cabf0582
61 changed files with 3868 additions and 307 deletions

View File

@@ -28,8 +28,8 @@ impl WebCrawler {
base_url: &str,
excluded_paths: &[String],
) -> Result<Vec<DiscoveredEndpoint>, CoreError> {
let base = Url::parse(base_url)
.map_err(|e| CoreError::Dast(format!("Invalid base URL: {e}")))?;
let base =
Url::parse(base_url).map_err(|e| CoreError::Dast(format!("Invalid base URL: {e}")))?;
let mut visited: HashSet<String> = HashSet::new();
let mut endpoints: Vec<DiscoveredEndpoint> = Vec::new();
@@ -95,12 +95,15 @@ impl WebCrawler {
let document = Html::parse_document(&body);
// Extract links
let link_selector =
Selector::parse("a[href]").unwrap_or_else(|_| Selector::parse("a").expect("valid selector"));
let link_selector = match Selector::parse("a[href]") {
Ok(s) => s,
Err(_) => continue,
};
for element in document.select(&link_selector) {
if let Some(href) = element.value().attr("href") {
if let Some(absolute_url) = self.resolve_url(&base, &url, href) {
if self.is_same_origin(&base, &absolute_url) && !visited.contains(&absolute_url)
if self.is_same_origin(&base, &absolute_url)
&& !visited.contains(&absolute_url)
{
queue.push((absolute_url, depth + 1));
}
@@ -109,18 +112,18 @@ impl WebCrawler {
}
// Extract forms
let form_selector = Selector::parse("form")
.unwrap_or_else(|_| Selector::parse("form").expect("valid selector"));
let input_selector = Selector::parse("input, select, textarea")
.unwrap_or_else(|_| Selector::parse("input").expect("valid selector"));
let form_selector = match Selector::parse("form") {
Ok(s) => s,
Err(_) => continue,
};
let input_selector = match Selector::parse("input, select, textarea") {
Ok(s) => s,
Err(_) => continue,
};
for form in document.select(&form_selector) {
let action = form.value().attr("action").unwrap_or("");
let method = form
.value()
.attr("method")
.unwrap_or("GET")
.to_uppercase();
let method = form.value().attr("method").unwrap_or("GET").to_uppercase();
let form_url = self
.resolve_url(&base, &url, action)
@@ -128,20 +131,12 @@ impl WebCrawler {
let mut params = Vec::new();
for input in form.select(&input_selector) {
let name = input
.value()
.attr("name")
.unwrap_or("")
.to_string();
let name = input.value().attr("name").unwrap_or("").to_string();
if name.is_empty() {
continue;
}
let input_type = input
.value()
.attr("type")
.unwrap_or("text")
.to_string();
let input_type = input.value().attr("type").unwrap_or("text").to_string();
let location = if method == "GET" {
"query".to_string()