diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..86507f3 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,483 @@ +You are an expert [0.7 Dioxus](https://dioxuslabs.com/learn/0.7) assistant. Dioxus 0.7 changes every api in dioxus. Only use this up to date documentation. `cx`, `Scope`, and `use_state` are gone + +Provide concise code examples with detailed descriptions + +# Dioxus Dependency + +You can add Dioxus to your `Cargo.toml` like this: + +```toml +[dependencies] +dioxus = { version = "0.7.1" } + +[features] +default = ["web", "webview", "server"] +web = ["dioxus/web"] +webview = ["dioxus/desktop"] +server = ["dioxus/server"] +``` + +# Launching your application + +You need to create a main function that sets up the Dioxus runtime and mounts your root component. + +```rust +use dioxus::prelude::*; + +fn main() { + dioxus::launch(App); +} + +#[component] +fn App() -> Element { + rsx! { "Hello, Dioxus!" } +} +``` + +Then serve with `dx serve`: + +```sh +curl -sSL http://dioxus.dev/install.sh | sh +dx serve +``` + +# UI with RSX + +```rust +rsx! { + div { + class: "container", // Attribute + color: "red", // Inline styles + width: if condition { "100%" }, // Conditional attributes + "Hello, Dioxus!" + } + // Prefer loops over iterators + for i in 0..5 { + div { "{i}" } // use elements or components directly in loops + } + if condition { + div { "Condition is true!" } // use elements or components directly in conditionals + } + + {children} // Expressions are wrapped in brace + {(0..5).map(|i| rsx! { span { "Item {i}" } })} // Iterators must be wrapped in braces +} +``` + +# Assets + +The asset macro can be used to link to local files to use in your project. All links start with `/` and are relative to the root of your project. + +```rust +rsx! { + img { + src: asset!("/assets/image.png"), + alt: "An image", + } +} +``` + +## Styles + +The `document::Stylesheet` component will inject the stylesheet into the `` of the document + +```rust +rsx! { + document::Stylesheet { + href: asset!("/assets/styles.css"), + } +} +``` + +# Components + +Components are the building blocks of apps + +* Component are functions annotated with the `#[component]` macro. +* The function name must start with a capital letter or contain an underscore. +* A component re-renders only under two conditions: + 1. Its props change (as determined by `PartialEq`). + 2. An internal reactive state it depends on is updated. + +```rust +#[component] +fn Input(mut value: Signal) -> Element { + rsx! { + input { + value, + oninput: move |e| { + *value.write() = e.value(); + }, + onkeydown: move |e| { + if e.key() == Key::Enter { + value.write().clear(); + } + }, + } + } +} +``` + +Each component accepts function arguments (props) + +* Props must be owned values, not references. Use `String` and `Vec` instead of `&str` or `&[T]`. +* Props must implement `PartialEq` and `Clone`. +* To make props reactive and copy, you can wrap the type in `ReadOnlySignal`. Any reactive state like memos and resources that read `ReadOnlySignal` props will automatically re-run when the prop changes. + +# State + +A signal is a wrapper around a value that automatically tracks where it's read and written. Changing a signal's value causes code that relies on the signal to rerun. + +## Local State + +The `use_signal` hook creates state that is local to a single component. You can call the signal like a function (e.g. `my_signal()`) to clone the value, or use `.read()` to get a reference. `.write()` gets a mutable reference to the value. + +Use `use_memo` to create a memoized value that recalculates when its dependencies change. Memos are useful for expensive calculations that you don't want to repeat unnecessarily. + +```rust +#[component] +fn Counter() -> Element { + let mut count = use_signal(|| 0); + let mut doubled = use_memo(move || count() * 2); // doubled will re-run when count changes because it reads the signal + + rsx! { + h1 { "Count: {count}" } // Counter will re-render when count changes because it reads the signal + h2 { "Doubled: {doubled}" } + button { + onclick: move |_| *count.write() += 1, // Writing to the signal rerenders Counter + "Increment" + } + button { + onclick: move |_| count.with_mut(|count| *count += 1), // use with_mut to mutate the signal + "Increment with with_mut" + } + } +} +``` + +## Context API + +The Context API allows you to share state down the component tree. A parent provides the state using `use_context_provider`, and any child can access it with `use_context` + +```rust +#[component] +fn App() -> Element { + let mut theme = use_signal(|| "light".to_string()); + use_context_provider(|| theme); // Provide a type to children + rsx! { Child {} } +} + +#[component] +fn Child() -> Element { + let theme = use_context::>(); // Consume the same type + rsx! { + div { + "Current theme: {theme}" + } + } +} +``` + +# Async + +For state that depends on an asynchronous operation (like a network request), Dioxus provides a hook called `use_resource`. This hook manages the lifecycle of the async task and provides the result to your component. + +* The `use_resource` hook takes an `async` closure. It re-runs this closure whenever any signals it depends on (reads) are updated +* The `Resource` object returned can be in several states when read: +1. `None` if the resource is still loading +2. `Some(value)` if the resource has successfully loaded + +```rust +let mut dog = use_resource(move || async move { + // api request +}); + +match dog() { + Some(dog_info) => rsx! { Dog { dog_info } }, + None => rsx! { "Loading..." }, +} +``` + +# Routing + +All possible routes are defined in a single Rust `enum` that derives `Routable`. Each variant represents a route and is annotated with `#[route("/path")]`. Dynamic Segments can capture parts of the URL path as parameters by using `:name` in the route string. These become fields in the enum variant. + +The `Router {}` component is the entry point that manages rendering the correct component for the current URL. + +You can use the `#[layout(NavBar)]` to create a layout shared between pages and place an `Outlet {}` inside your layout component. The child routes will be rendered in the outlet. + +```rust +#[derive(Routable, Clone, PartialEq)] +enum Route { + #[layout(NavBar)] // This will use NavBar as the layout for all routes + #[route("/")] + Home {}, + #[route("/blog/:id")] // Dynamic segment + BlogPost { id: i32 }, +} + +#[component] +fn NavBar() -> Element { + rsx! { + a { href: "/", "Home" } + Outlet {} // Renders Home or BlogPost + } +} + +#[component] +fn App() -> Element { + rsx! { Router:: {} } +} +``` + +```toml +dioxus = { version = "0.7.1", features = ["router"] } +``` + +# Fullstack + +Fullstack enables server rendering and ipc calls. It uses Cargo features (`server` and a client feature like `web`) to split the code into a server and client binaries. + +```toml +dioxus = { version = "0.7.1", features = ["fullstack"] } +``` + +## Server Functions + +Use the `#[post]` / `#[get]` macros to define an `async` function that will only run on the server. On the server, this macro generates an API endpoint. On the client, it generates a function that makes an HTTP request to that endpoint. + +```rust +#[post("/api/double/:path/&query")] +async fn double_server(number: i32, path: String, query: i32) -> Result { + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + Ok(number * 2) +} +``` + +## Hydration + +Hydration is the process of making a server-rendered HTML page interactive on the client. The server sends the initial HTML, and then the client-side runs, attaches event listeners, and takes control of future rendering. + +### Errors +The initial UI rendered by the component on the client must be identical to the UI rendered on the server. + +* Use the `use_server_future` hook instead of `use_resource`. It runs the future on the server, serializes the result, and sends it to the client, ensuring the client has the data immediately for its first render. +* Any code that relies on browser-specific APIs (like accessing `localStorage`) must be run *after* hydration. Place this code inside a `use_effect` hook. + +# Agent Guidelines for Rust Code Quality + +This document provides guidelines for maintaining high-quality Rust code. These rules MUST be followed by all AI coding agents and contributors. + +## Your Core Principles + +All code you write MUST be fully optimized. + +"Fully optimized" includes: + +- maximizing algorithmic big-O efficiency for memory and runtime +- using parallelization and SIMD where appropriate +- following proper style conventions for Rust (e.g. maximizing code reuse (DRY)) +- no extra code beyond what is absolutely necessary to solve the problem the user provides (i.e. no technical debt) + +If the code is not fully optimized before handing off to the user, you will be fined $100. You have permission to do another pass of the code if you believe it is not fully optimized. + +## Preferred Tools + +- Use `cargo` for project management, building, and dependency management. +- Use `indicatif` to track long-running operations with progress bars. The message should be contextually sensitive. +- Use `serde` with `serde_json` for JSON serialization/deserialization. +- Use `ratatui` adnd `crossterm` for terminal applications/TUIs. +- Use `axum` for creating any web servers or HTTP APIs. + - Keep request handlers async, returning `Result` to centralize error handling. + - Use layered extractors and shared state structs instead of global mutable data. + - Add `tower` middleware (timeouts, tracing, compression) for observability and resilience. + - Offload CPU-bound work to `tokio::task::spawn_blocking` or background services to avoid blocking the reactor. +- When reporting errors to the console, use `tracing::error!` or `log::error!` instead of `println!`. +- If designing applications with a web-based front end interface, e.g. compiling to WASM or using `dioxus`: + - All deep computation **MUST** occur within Rust processes (i.e. the WASM binary or the `dioxus` app Rust process). **NEVER** use JavaScript for deep computation. + - The front-end **MUST** use Pico CSS and vanilla JavaScript. **NEVER** use jQuery or any component-based frameworks such as React. + - The front-end should prioritize speed and common HID guidelines. + - The app should use adaptive light/dark themes by default, with a toggle to switch the themes. + - The typography/theming of the application **MUST** be modern and unique, similar to that of popular single-page web/mobile. **ALWAYS** add an appropriate font for headers and body text. You may reference fonts from Google Fonts. + - **NEVER** use the Pico CSS defaults as-is: a separate CSS/SCSS file is encouraged. The design **MUST** logically complement the semantics of the application use case. + - **ALWAYS** rebuild the WASM binary if any underlying Rust code that affects it is touched. +- For data processing: + - **ALWAYS** use `polars` instead of other data frame libraries for tabular data manipulation. + - If a `polars` dataframe will be printed, **NEVER** simultaneously print the number of entries in the dataframe nor the schema as it is redundant. + - **NEVER** ingest more than 10 rows of a data frame at a time. Only analyze subsets of data to avoid overloading your memory context. +- If using Python to implement Rust code using PyO3/`maturin`: + - Rebuild the Python package with `maturin` after finishing all Rust code changes. + - **ALWAYS** use `uv` for Python package management and to create a `.venv` if it is not present. **NEVER** use the base system Python installation. + - Ensure `.venv` is added to `.gitignore`. + - Ensure `ipykernel` and `ipywidgets` is installed in `.venv` for Jupyter Notebook compatability. This should not be in package requirements. + - **MUST** keep functions focused on a single responsibility + - **NEVER** use mutable objects (lists, dicts) as default argument values + - Limit function parameters to 5 or fewer + - Return early to reduce nesting + - **MUST** use type hints for all function signatures (parameters and return values) + - **NEVER** use `Any` type unless absolutely necessary + - **MUST** run mypy and resolve all type errors + - Use `Optional[T]` or `T | None` for nullable types + +## Code Style and Formatting + +- **MUST** use meaningful, descriptive variable and function names +- **MUST** follow Rust API Guidelines and idiomatic Rust conventions +- **MUST** use 4 spaces for indentation (never tabs) +- **NEVER** use emoji, or unicode that emulates emoji (e.g. ✓, ✗). The only exception is when writing tests and testing the impact of multibyte characters. +- Use snake_case for functions/variables/modules, PascalCase for types/traits, SCREAMING_SNAKE_CASE for constants +- Limit line length to 100 characters (rustfmt default) +- Assume the user is a Python expert, but a Rust novice. Include additional code comments around Rust-specific nuances that a Python developer may not recognize. + +## Documentation + +- **MUST** include doc comments for all public functions, structs, enums, and methods +- **MUST** document function parameters, return values, and errors +- Keep comments up-to-date with code changes +- Include examples in doc comments for complex functions + +Example doc comment: + +````rust +/// Calculate the total cost of items including tax. +/// +/// # Arguments +/// +/// * `items` - Slice of item structs with price fields +/// * `tax_rate` - Tax rate as decimal (e.g., 0.08 for 8%) +/// +/// # Returns +/// +/// Total cost including tax +/// +/// # Errors +/// +/// Returns `CalculationError::EmptyItems` if items is empty +/// Returns `CalculationError::InvalidTaxRate` if tax_rate is negative +/// +/// # Examples +/// +/// ``` +/// let items = vec![Item { price: 10.0 }, Item { price: 20.0 }]; +/// let total = calculate_total(&items, 0.08)?; +/// assert_eq!(total, 32.40); +/// ``` +pub fn calculate_total(items: &[Item], tax_rate: f64) -> Result { +```` + +## Type System + +- **MUST** leverage Rust's type system to prevent bugs at compile time +- **NEVER** use `.unwrap()` in library code; use `.expect()` only for invariant violations with a descriptive message +- **MUST** use meaningful custom error types with `thiserror` +- Use newtypes to distinguish semantically different values of the same underlying type +- Prefer `Option` over sentinel values + +## Error Handling + +- **NEVER** use `.unwrap()` in production code paths +- **MUST** use `Result` for fallible operations +- **MUST** use `thiserror` for defining error types and `anyhow` for application-level errors +- **MUST** propagate errors with `?` operator where appropriate +- Provide meaningful error messages with context using `.context()` from `anyhow` + +## Function Design + +- **MUST** keep functions focused on a single responsibility +- **MUST** prefer borrowing (`&T`, `&mut T`) over ownership when possible +- Limit function parameters to 5 or fewer; use a config struct for more +- Return early to reduce nesting +- Use iterators and combinators over explicit loops where clearer + +## Struct and Enum Design + +- **MUST** keep types focused on a single responsibility +- **MUST** derive common traits: `Debug`, `Clone`, `PartialEq` where appropriate +- Use `#[derive(Default)]` when a sensible default exists +- Prefer composition over inheritance-like patterns +- Use builder pattern for complex struct construction +- Make fields private by default; provide accessor methods when needed + +## Testing + +- **MUST** write unit tests for all new functions and types +- **MUST** mock external dependencies (APIs, databases, file systems) +- **MUST** use the built-in `#[test]` attribute and `cargo test` +- Follow the Arrange-Act-Assert pattern +- Do not commit commented-out tests +- Use `#[cfg(test)]` modules for test code + +## Imports and Dependencies + +- **MUST** avoid wildcard imports (`use module::*`) except for preludes, test modules (`use super::*`), and prelude re-exports +- **MUST** document dependencies in `Cargo.toml` with version constraints +- Use `cargo` for dependency management +- Organize imports: standard library, external crates, local modules +- Use `rustfmt` to automate import formatting + +## Rust Best Practices + +- **NEVER** use `unsafe` unless absolutely necessary; document safety invariants when used +- **MUST** call `.clone()` explicitly on non-`Copy` types; avoid hidden clones in closures and iterators +- **MUST** use pattern matching exhaustively; avoid catch-all `_` patterns when possible +- **MUST** use `format!` macro for string formatting +- Use iterators and iterator adapters over manual loops +- Use `enumerate()` instead of manual counter variables +- Prefer `if let` and `while let` for single-pattern matching + +## Memory and Performance + +- **MUST** avoid unnecessary allocations; prefer `&str` over `String` when possible +- **MUST** use `Cow<'_, str>` when ownership is conditionally needed +- Use `Vec::with_capacity()` when the size is known +- Prefer stack allocation over heap when appropriate +- Use `Arc` and `Rc` judiciously; prefer borrowing + +## Concurrency + +- **MUST** use `Send` and `Sync` bounds appropriately +- **MUST** prefer `tokio` for async runtime in async applications +- **MUST** use `rayon` for CPU-bound parallelism +- Avoid `Mutex` when `RwLock` or lock-free alternatives are appropriate +- Use channels (`mpsc`, `crossbeam`) for message passing + +## Security + +- **NEVER** store secrets, API keys, or passwords in code. Only store them in `.env`. + - Ensure `.env` is declared in `.gitignore`. +- **MUST** use environment variables for sensitive configuration via `dotenvy` or `std::env` +- **NEVER** log sensitive information (passwords, tokens, PII) +- Use `secrecy` crate for sensitive data types + +## Version Control + +- **MUST** write clear, descriptive commit messages +- **NEVER** commit commented-out code; delete it +- **NEVER** commit debug `println!` statements or `dbg!` macros +- **NEVER** commit credentials or sensitive data + +## Tools + +- **MUST** use `rustfmt` for code formatting +- **MUST** use `clippy` for linting and follow its suggestions +- **MUST** ensure code compiles with no warnings (use `-D warnings` flag in CI, not `#![deny(warnings)]` in source) +- Use `cargo` for building, testing, and dependency management +- Use `cargo test` for running tests +- Use `cargo doc` for generating documentation +- **NEVER** build with `cargo build --features python`: this will always fail. Instead, **ALWAYS** use `maturin`. + +## Before Committing + +- [ ] All tests pass (`cargo test`) +- [ ] No compiler warnings (`cargo build`) +- [ ] Clippy passes (`cargo clippy -- -D warnings`) +- [ ] Code is formatted (`cargo fmt --check`) +- [ ] If the project creates a Python package and Rust code is touched, rebuild the Python package (`source .venv/bin/activate && maturin develop --release --features python`) +- [ ] If the project creates a WASM package and Rust code is touched, rebuild the WASM package (`wasm-pack build --target web --out-dir web/pkg`) +- [ ] All public items have doc comments +- [ ] No commented-out code or debug statements +- [ ] No hardcoded credentials + +--- + +**Remember:** Prioritize clarity and maintainability over cleverness.3 \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index fc3837a..a7a9f22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,6 +93,20 @@ dependencies = [ "syn", ] +[[package]] +name = "async-tungstenite" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5359381fd414fbdb272c48f2111c16cb0bb3447bfacd59311ff3736da9f6664" +dependencies = [ + "futures-io", + "futures-util", + "log", + "pin-project-lite", + "tokio", + "tungstenite 0.23.0", +] + [[package]] name = "async-tungstenite" version = "0.31.0" @@ -233,6 +247,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "bitpacking" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" +dependencies = [ + "crunchy", +] + [[package]] name = "bitvec" version = "1.0.1" @@ -254,6 +277,50 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bollard" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97ccca1260af6a459d75994ad5acc1651bcabcbdbc41467cc9786519ab854c30" +dependencies = [ + "base64", + "bollard-stubs", + "bytes", + "futures-core", + "futures-util", + "hex", + "http", + "http-body-util", + "hyper", + "hyper-named-pipe", + "hyper-util", + "hyperlocal", + "log", + "pin-project-lite", + "serde", + "serde_derive", + "serde_json", + "serde_repr", + "serde_urlencoded", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tower-service", + "url", + "winapi", +] + +[[package]] +name = "bollard-stubs" +version = "1.47.1-rc.27.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f179cfbddb6e77a5472703d4b30436bff32929c0aa8a9008ecf23d1d3cdd0da" +dependencies = [ + "serde", + "serde_repr", + "serde_with", +] + [[package]] name = "bson" version = "2.15.0" @@ -263,13 +330,14 @@ dependencies = [ "ahash", "base64", "bitvec", + "chrono", "getrandom 0.2.17", "getrandom 0.3.4", "hex", - "indexmap", + "indexmap 2.13.0", "js-sys", "once_cell", - "rand", + "rand 0.9.2", "serde", "serde_bytes", "serde_json", @@ -310,6 +378,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + [[package]] name = "cesu8" version = "1.1.0" @@ -349,6 +423,72 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "chromiumoxide" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8380ce7721cc895fe8a184c49d615fe755b0c9a3d7986355cee847439fff907f" +dependencies = [ + "async-tungstenite 0.27.0", + "base64", + "cfg-if", + "chromiumoxide_cdp", + "chromiumoxide_types", + "dunce", + "fnv", + "futures", + "futures-timer", + "pin-project-lite", + "reqwest", + "serde", + "serde_json", + "thiserror 1.0.69", + "tokio", + "tracing", + "url", + "which", + "winreg 0.52.0", +] + +[[package]] +name = "chromiumoxide_cdp" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadbfb52fa0aeca43626f6c42ca04184b108b786f8e45198dc41a42aedcf2e50" +dependencies = [ + "chromiumoxide_pdl", + "chromiumoxide_types", + "serde", + "serde_json", +] + +[[package]] +name = "chromiumoxide_pdl" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c197aeb42872c5d4c923e7d8ad46d99a58fd0fec37f6491554ff677a6791d3c9" +dependencies = [ + "chromiumoxide_types", + "either", + "heck 0.4.1", + "once_cell", + "proc-macro2", + "quote", + "regex", + "serde", + "serde_json", +] + +[[package]] +name = "chromiumoxide_types" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923486888790528d55ac37ec2f7483ed19eb8ccbb44701878e5856d1ceadf5d8" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "chrono" version = "0.4.44" @@ -408,6 +548,8 @@ dependencies = [ "base64", "chrono", "compliance-core", + "compliance-dast", + "compliance-graph", "dotenvy", "futures-util", "git2", @@ -461,6 +603,7 @@ dependencies = [ "dioxus-fullstack", "dioxus-logger 0.6.2", "dotenvy", + "gloo-timers", "mongodb", "reqwest", "secrecy", @@ -473,6 +616,51 @@ dependencies = [ "web-sys", ] +[[package]] +name = "compliance-dast" +version = "0.1.0" +dependencies = [ + "bollard", + "bson", + "chromiumoxide", + "chrono", + "compliance-core", + "mongodb", + "reqwest", + "scraper", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "compliance-graph" +version = "0.1.0" +dependencies = [ + "bson", + "chrono", + "compliance-core", + "futures-util", + "mongodb", + "petgraph", + "serde", + "serde_json", + "tantivy", + "thiserror 2.0.18", + "tokio", + "tracing", + "tree-sitter", + "tree-sitter-javascript", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", + "uuid", +] + [[package]] name = "console_error_panic_hook" version = "0.1.7" @@ -662,6 +850,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "critical-section" version = "1.2.0" @@ -686,6 +883,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -717,6 +924,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "darling" version = "0.21.3" @@ -779,6 +1009,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", + "serde_core", ] [[package]] @@ -803,6 +1034,17 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_more" +version = "0.99.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -1030,7 +1272,7 @@ checksum = "7db1f8b70338072ec408b48d09c96559cf071f87847465d8161294197504c498" dependencies = [ "anyhow", "async-stream", - "async-tungstenite", + "async-tungstenite 0.31.0", "axum", "axum-core", "axum-extra", @@ -1040,7 +1282,7 @@ dependencies = [ "const-str", "const_format", "content_disposition", - "derive_more", + "derive_more 2.1.1", "dioxus-asset-resolver", "dioxus-cli-config 0.7.3", "dioxus-core", @@ -1353,7 +1595,7 @@ dependencies = [ "hyper", "hyper-util", "inventory", - "lru", + "lru 0.16.3", "parking_lot", "pin-project", "rustc-hash 2.1.1", @@ -1485,12 +1727,45 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "dunce" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "either" version = "1.15.0" @@ -1512,7 +1787,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -1565,12 +1840,30 @@ dependencies = [ "serde", ] +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "fnv" version = "1.0.7" @@ -1598,12 +1891,32 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix 0.38.44", + "windows-sys 0.52.0", +] + [[package]] name = "funty" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.32" @@ -1675,6 +1988,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.32" @@ -1692,6 +2011,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generational-box" version = "0.7.3" @@ -1712,6 +2040,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1825,7 +2162,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", @@ -1843,6 +2180,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" @@ -1855,6 +2198,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.1.5", ] @@ -1893,12 +2238,24 @@ dependencies = [ "http", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1921,7 +2278,7 @@ dependencies = [ "idna", "ipnet", "once_cell", - "rand", + "rand 0.9.2", "ring", "thiserror 2.0.18", "tinyvec", @@ -1943,7 +2300,7 @@ dependencies = [ "moka", "once_cell", "parking_lot", - "rand", + "rand 0.9.2", "resolv-conf", "smallvec", "thiserror 2.0.18", @@ -1960,6 +2317,33 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "html5ever" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" +dependencies = [ + "log", + "mac", + "markup5ever", + "match_token", +] + +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "1.4.0" @@ -2034,6 +2418,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-named-pipe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" +dependencies = [ + "hex", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", + "winapi", +] + [[package]] name = "hyper-rustls" version = "0.27.7" @@ -2092,6 +2491,21 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "hyperlocal" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" +dependencies = [ + "hex", + "http-body-util", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -2230,6 +2644,17 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -2251,6 +2676,18 @@ dependencies = [ "cfb", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "inventory" version = "0.3.22" @@ -2269,7 +2706,7 @@ dependencies = [ "socket2 0.5.10", "widestring", "windows-sys 0.48.0", - "winreg", + "winreg 0.50.0", ] [[package]] @@ -2288,6 +2725,15 @@ dependencies = [ "serde", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -2379,6 +2825,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "libc" version = "0.2.182" @@ -2409,6 +2861,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libssh2-sys" version = "0.3.1" @@ -2435,6 +2893,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -2474,6 +2938,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3bd0dd2cd90571056fdb71f6275fada10131182f84899f4b2a916e565d81d86" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "lru" version = "0.16.3" @@ -2489,6 +2962,18 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "macro-string" version = "0.1.4" @@ -2588,6 +3073,31 @@ dependencies = [ "syn", ] +[[package]] +name = "markup5ever" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "matchers" version = "0.2.0" @@ -2613,6 +3123,16 @@ dependencies = [ "digest", ] +[[package]] +name = "measure_time" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +dependencies = [ + "instant", + "log", +] + [[package]] name = "memchr" version = "2.8.0" @@ -2625,7 +3145,7 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad38eb12aea514a0466ea40a80fd8cc83637065948eb4a426e4aa46261175227" dependencies = [ - "rustix", + "rustix 1.1.4", ] [[package]] @@ -2653,6 +3173,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "mio" version = "1.1.1" @@ -2709,7 +3235,7 @@ dependencies = [ "bitflags", "bson", "derive-where", - "derive_more", + "derive_more 2.1.1", "futures-core", "futures-io", "futures-util", @@ -2723,7 +3249,7 @@ dependencies = [ "mongodb-internal-macros", "pbkdf2", "percent-encoding", - "rand", + "rand 0.9.2", "rustc_version_runtime", "rustls", "rustversion", @@ -2774,6 +3300,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + [[package]] name = "ndk" version = "0.9.0" @@ -2804,6 +3336,22 @@ dependencies = [ "jni-sys", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -2856,6 +3404,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", ] [[package]] @@ -2930,6 +3489,12 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "openssl-probe" version = "0.1.6" @@ -2954,6 +3519,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ownedbytes" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -3002,6 +3576,68 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.13.0", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.11" @@ -3070,6 +3706,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "prettyplease" version = "0.2.37" @@ -3155,7 +3797,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.2", "ring", "rustc-hash 2.1.1", "rustls", @@ -3202,14 +3844,35 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -3219,7 +3882,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -3231,12 +3903,42 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "raw-window-handle" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -3246,6 +3948,26 @@ dependencies = [ "bitflags", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regex" version = "1.12.3" @@ -3339,6 +4061,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-hash" version = "1.1.0" @@ -3370,6 +4102,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.52.0", +] + [[package]] name = "rustix" version = "1.1.4" @@ -3379,7 +4124,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] @@ -3461,12 +4206,51 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + [[package]] name = "secrecy" version = "0.10.3" @@ -3500,6 +4284,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags", + "cssparser", + "derive_more 0.99.20", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.27" @@ -3572,7 +4375,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ - "indexmap", + "indexmap 2.13.0", "itoa", "memchr", "serde", @@ -3631,8 +4434,17 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" dependencies = [ + "base64", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.13.0", + "schemars 0.9.0", + "schemars 1.2.1", "serde_core", + "serde_json", "serde_with_macros", + "time", ] [[package]] @@ -3647,6 +4459,15 @@ dependencies = [ "syn", ] +[[package]] +name = "servo_arc" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -3706,6 +4527,21 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.12" @@ -3772,7 +4608,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -3810,6 +4646,37 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "stringprep" version = "0.1.5" @@ -3925,12 +4792,177 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" +[[package]] +name = "tantivy" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96599ea6fccd844fc833fed21d2eecac2e6a7c1afd9e044057391d78b1feb141" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "itertools", + "levenshtein_automata", + "log", + "lru 0.12.5", + "lz4_flex", + "measure_time", + "memmap2", + "num_cpus", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash 1.1.0", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror 1.0.69", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" +dependencies = [ + "nom", +] + +[[package]] +name = "tantivy-sstable" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" +dependencies = [ + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" +dependencies = [ + "serde", +] + [[package]] name = "tap" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tempfile" +version = "3.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +dependencies = [ + "fastrand", + "getrandom 0.4.1", + "once_cell", + "rustix 1.1.4", + "windows-sys 0.61.2", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -4164,7 +5196,7 @@ version = "0.23.10+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ - "indexmap", + "indexmap 2.13.0", "toml_datetime", "toml_parser", "winnow", @@ -4319,12 +5351,89 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "tree-sitter" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "try-lock" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tungstenite" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e2ce1e47ed2994fd43b04c8f618008d4cabdd5ee34027cf14f9d918edd9c8" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.8.5", + "sha1", + "thiserror 1.0.69", + "utf-8", +] + [[package]] name = "tungstenite" version = "0.27.0" @@ -4336,7 +5445,7 @@ dependencies = [ "http", "httparse", "log", - "rand", + "rand 0.9.2", "sha1", "thiserror 2.0.18", "utf-8", @@ -4353,7 +5462,7 @@ dependencies = [ "http", "httparse", "log", - "rand", + "rand 0.9.2", "sha1", "thiserror 2.0.18", "utf-8", @@ -4424,6 +5533,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -4461,6 +5576,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -4638,7 +5759,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap", + "indexmap 2.13.0", "wasm-encoder", "wasmparser", ] @@ -4664,7 +5785,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", - "indexmap", + "indexmap 2.13.0", "semver", ] @@ -4698,12 +5819,40 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "which" +version = "6.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" +dependencies = [ + "either", + "home", + "rustix 0.38.44", + "winsafe", +] + [[package]] name = "widestring" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -4713,6 +5862,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" @@ -5090,6 +6245,22 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -5106,7 +6277,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -5117,8 +6288,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", - "indexmap", + "heck 0.5.0", + "indexmap 2.13.0", "prettyplease", "syn", "wasm-metadata", @@ -5149,7 +6320,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap", + "indexmap 2.13.0", "log", "serde", "serde_derive", @@ -5168,7 +6339,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap", + "indexmap 2.13.0", "log", "semver", "serde", @@ -5307,3 +6478,31 @@ name = "zmij" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 8e7a9a0..b2dcf1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,11 @@ [workspace] -members = ["compliance-core", "compliance-agent", "compliance-dashboard"] +members = [ + "compliance-core", + "compliance-agent", + "compliance-dashboard", + "compliance-graph", + "compliance-dast", +] resolver = "2" [workspace.lints.clippy] diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard index 252d952..8845e8c 100644 --- a/Dockerfile.dashboard +++ b/Dockerfile.dashboard @@ -4,15 +4,15 @@ RUN cargo install dioxus-cli --version 0.7.3 WORKDIR /app COPY . . -RUN dx build --release --features server --platform web +RUN dx build --release --package compliance-dashboard FROM debian:bookworm-slim RUN apt-get update && apt-get install -y ca-certificates libssl3 && rm -rf /var/lib/apt/lists/* -COPY --from=builder /app/target/release/compliance-dashboard /usr/local/bin/compliance-dashboard +WORKDIR /app +COPY --from=builder /app/target/dx/compliance-dashboard/release/web/compliance-dashboard /app/compliance-dashboard COPY --from=builder /app/target/dx/compliance-dashboard/release/web/public /app/public EXPOSE 8080 -WORKDIR /app -ENTRYPOINT ["compliance-dashboard"] +ENTRYPOINT ["./compliance-dashboard"] diff --git a/compliance-agent/Cargo.toml b/compliance-agent/Cargo.toml index d0a5e61..7349248 100644 --- a/compliance-agent/Cargo.toml +++ b/compliance-agent/Cargo.toml @@ -8,6 +8,8 @@ workspace = true [dependencies] compliance-core = { workspace = true, features = ["mongodb"] } +compliance-graph = { path = "../compliance-graph" } +compliance-dast = { path = "../compliance-dast" } serde = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } diff --git a/compliance-agent/src/api/handlers/dast.rs b/compliance-agent/src/api/handlers/dast.rs new file mode 100644 index 0000000..9046770 --- /dev/null +++ b/compliance-agent/src/api/handlers/dast.rs @@ -0,0 +1,226 @@ +use std::sync::Arc; + +use axum::extract::{Extension, Path, Query}; +use axum::http::StatusCode; +use axum::Json; +use mongodb::bson::doc; +use serde::Deserialize; + +use compliance_core::models::dast::{DastFinding, DastScanRun, DastTarget, DastTargetType}; + +use crate::agent::ComplianceAgent; + +use super::{collect_cursor_async, ApiResponse, PaginationParams}; + +type AgentExt = Extension>; + +#[derive(Deserialize)] +pub struct AddTargetRequest { + pub name: String, + pub base_url: String, + #[serde(default = "default_target_type")] + pub target_type: DastTargetType, + pub repo_id: Option, + #[serde(default)] + pub excluded_paths: Vec, + #[serde(default = "default_crawl_depth")] + pub max_crawl_depth: u32, + #[serde(default = "default_rate_limit")] + pub rate_limit: u32, + #[serde(default)] + pub allow_destructive: bool, +} + +fn default_target_type() -> DastTargetType { + DastTargetType::WebApp +} +fn default_crawl_depth() -> u32 { + 5 +} +fn default_rate_limit() -> u32 { + 10 +} + +/// GET /api/v1/dast/targets — List DAST targets +pub async fn list_targets( + Extension(agent): AgentExt, + Query(params): Query, +) -> Result>>, StatusCode> { + let db = &agent.db; + let skip = (params.page.saturating_sub(1)) * params.limit as u64; + let total = db + .dast_targets() + .count_documents(doc! {}) + .await + .unwrap_or(0); + + let targets = match db + .dast_targets() + .find(doc! {}) + .skip(skip) + .limit(params.limit) + .await + { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + + Ok(Json(ApiResponse { + data: targets, + total: Some(total), + page: Some(params.page), + })) +} + +/// POST /api/v1/dast/targets — Add a new DAST target +pub async fn add_target( + Extension(agent): AgentExt, + Json(req): Json, +) -> Result>, StatusCode> { + let mut target = DastTarget::new(req.name, req.base_url, req.target_type); + target.repo_id = req.repo_id; + target.excluded_paths = req.excluded_paths; + target.max_crawl_depth = req.max_crawl_depth; + target.rate_limit = req.rate_limit; + target.allow_destructive = req.allow_destructive; + + agent + .db + .dast_targets() + .insert_one(&target) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(ApiResponse { + data: target, + total: None, + page: None, + })) +} + +/// POST /api/v1/dast/targets/:id/scan — Trigger DAST scan +pub async fn trigger_scan( + Extension(agent): AgentExt, + Path(id): Path, +) -> Result, StatusCode> { + let oid = + mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?; + + let target = agent + .db + .dast_targets() + .find_one(doc! { "_id": oid }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? + .ok_or(StatusCode::NOT_FOUND)?; + + let db = agent.db.clone(); + tokio::spawn(async move { + let orchestrator = compliance_dast::DastOrchestrator::new(100); + match orchestrator.run_scan(&target, Vec::new()).await { + Ok((scan_run, findings)) => { + if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await { + tracing::error!("Failed to store DAST scan run: {e}"); + } + for finding in &findings { + if let Err(e) = db.dast_findings().insert_one(finding).await { + tracing::error!("Failed to store DAST finding: {e}"); + } + } + tracing::info!("DAST scan complete: {} findings", findings.len()); + } + Err(e) => { + tracing::error!("DAST scan failed: {e}"); + } + } + }); + + Ok(Json(serde_json::json!({ "status": "dast_scan_triggered" }))) +} + +/// GET /api/v1/dast/scan-runs — List DAST scan runs +pub async fn list_scan_runs( + Extension(agent): AgentExt, + Query(params): Query, +) -> Result>>, StatusCode> { + let db = &agent.db; + let skip = (params.page.saturating_sub(1)) * params.limit as u64; + let total = db + .dast_scan_runs() + .count_documents(doc! {}) + .await + .unwrap_or(0); + + let runs = match db + .dast_scan_runs() + .find(doc! {}) + .sort(doc! { "started_at": -1 }) + .skip(skip) + .limit(params.limit) + .await + { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + + Ok(Json(ApiResponse { + data: runs, + total: Some(total), + page: Some(params.page), + })) +} + +/// GET /api/v1/dast/findings — List DAST findings +pub async fn list_findings( + Extension(agent): AgentExt, + Query(params): Query, +) -> Result>>, StatusCode> { + let db = &agent.db; + let skip = (params.page.saturating_sub(1)) * params.limit as u64; + let total = db + .dast_findings() + .count_documents(doc! {}) + .await + .unwrap_or(0); + + let findings = match db + .dast_findings() + .find(doc! {}) + .sort(doc! { "created_at": -1 }) + .skip(skip) + .limit(params.limit) + .await + { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + + Ok(Json(ApiResponse { + data: findings, + total: Some(total), + page: Some(params.page), + })) +} + +/// GET /api/v1/dast/findings/:id — Finding detail with evidence +pub async fn get_finding( + Extension(agent): AgentExt, + Path(id): Path, +) -> Result>, StatusCode> { + let oid = + mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?; + + let finding = agent + .db + .dast_findings() + .find_one(doc! { "_id": oid }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? + .ok_or(StatusCode::NOT_FOUND)?; + + Ok(Json(ApiResponse { + data: finding, + total: None, + page: None, + })) +} diff --git a/compliance-agent/src/api/handlers/graph.rs b/compliance-agent/src/api/handlers/graph.rs new file mode 100644 index 0000000..7dc845f --- /dev/null +++ b/compliance-agent/src/api/handlers/graph.rs @@ -0,0 +1,256 @@ +use std::sync::Arc; + +use axum::extract::{Extension, Path, Query}; +use axum::http::StatusCode; +use axum::Json; +use mongodb::bson::doc; +use serde::{Deserialize, Serialize}; + +use compliance_core::models::graph::{CodeEdge, CodeNode, GraphBuildRun, ImpactAnalysis}; + +use crate::agent::ComplianceAgent; + +use super::{collect_cursor_async, ApiResponse}; + +type AgentExt = Extension>; + +#[derive(Serialize)] +pub struct GraphData { + pub build: Option, + pub nodes: Vec, + pub edges: Vec, +} + +#[derive(Deserialize)] +pub struct SearchParams { + pub q: String, + #[serde(default = "default_search_limit")] + pub limit: usize, +} + +fn default_search_limit() -> usize { + 50 +} + +/// GET /api/v1/graph/:repo_id — Full graph data +pub async fn get_graph( + Extension(agent): AgentExt, + Path(repo_id): Path, +) -> Result>, StatusCode> { + let db = &agent.db; + + // Get latest build + let build: Option = db + .graph_builds() + .find_one(doc! { "repo_id": &repo_id }) + .sort(doc! { "started_at": -1 }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let (nodes, edges) = if let Some(ref b) = build { + let build_id = b.id.map(|oid| oid.to_hex()).unwrap_or_default(); + let filter = doc! { "repo_id": &repo_id, "graph_build_id": &build_id }; + + let nodes: Vec = match db.graph_nodes().find(filter.clone()).await { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + let edges: Vec = match db.graph_edges().find(filter).await { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + (nodes, edges) + } else { + (Vec::new(), Vec::new()) + }; + + Ok(Json(ApiResponse { + data: GraphData { + build, + nodes, + edges, + }, + total: None, + page: None, + })) +} + +/// GET /api/v1/graph/:repo_id/nodes — List nodes (paginated) +pub async fn get_nodes( + Extension(agent): AgentExt, + Path(repo_id): Path, +) -> Result>>, StatusCode> { + let db = &agent.db; + let filter = doc! { "repo_id": &repo_id }; + + let nodes: Vec = match db.graph_nodes().find(filter).await { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + + let total = nodes.len() as u64; + Ok(Json(ApiResponse { + data: nodes, + total: Some(total), + page: None, + })) +} + +/// GET /api/v1/graph/:repo_id/communities — List detected communities +pub async fn get_communities( + Extension(agent): AgentExt, + Path(repo_id): Path, +) -> Result>>, StatusCode> { + let db = &agent.db; + let filter = doc! { "repo_id": &repo_id }; + + let nodes: Vec = match db.graph_nodes().find(filter).await { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + + let mut communities: std::collections::HashMap> = + std::collections::HashMap::new(); + for node in &nodes { + if let Some(cid) = node.community_id { + communities + .entry(cid) + .or_default() + .push(node.qualified_name.clone()); + } + } + + let mut result: Vec = communities + .into_iter() + .map(|(id, members)| CommunityInfo { + community_id: id, + member_count: members.len() as u32, + members, + }) + .collect(); + result.sort_by_key(|c| c.community_id); + + let total = result.len() as u64; + Ok(Json(ApiResponse { + data: result, + total: Some(total), + page: None, + })) +} + +#[derive(Serialize)] +pub struct CommunityInfo { + pub community_id: u32, + pub member_count: u32, + pub members: Vec, +} + +/// GET /api/v1/graph/:repo_id/impact/:finding_id — Impact analysis +pub async fn get_impact( + Extension(agent): AgentExt, + Path((repo_id, finding_id)): Path<(String, String)>, +) -> Result>>, StatusCode> { + let db = &agent.db; + let filter = doc! { "repo_id": &repo_id, "finding_id": &finding_id }; + + let impact = db + .impact_analyses() + .find_one(filter) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(ApiResponse { + data: impact, + total: None, + page: None, + })) +} + +/// GET /api/v1/graph/:repo_id/search — BM25 symbol search +pub async fn search_symbols( + Extension(agent): AgentExt, + Path(repo_id): Path, + Query(params): Query, +) -> Result>>, StatusCode> { + let db = &agent.db; + + // Simple text search on qualified_name and name fields + let filter = doc! { + "repo_id": &repo_id, + "name": { "$regex": ¶ms.q, "$options": "i" }, + }; + + let nodes: Vec = match db + .graph_nodes() + .find(filter) + .limit(params.limit as i64) + .await + { + Ok(cursor) => collect_cursor_async(cursor).await, + Err(_) => Vec::new(), + }; + + let total = nodes.len() as u64; + Ok(Json(ApiResponse { + data: nodes, + total: Some(total), + page: None, + })) +} + +/// POST /api/v1/graph/:repo_id/build — Trigger graph rebuild +pub async fn trigger_build( + Extension(agent): AgentExt, + Path(repo_id): Path, +) -> Result, StatusCode> { + let agent_clone = (*agent).clone(); + tokio::spawn(async move { + let repo = match agent_clone + .db + .repositories() + .find_one(doc! { "_id": mongodb::bson::oid::ObjectId::parse_str(&repo_id).ok() }) + .await + { + Ok(Some(r)) => r, + _ => { + tracing::error!("Repository {repo_id} not found for graph build"); + return; + } + }; + + let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path); + let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) { + Ok(p) => p, + Err(e) => { + tracing::error!("Failed to clone repo for graph build: {e}"); + return; + } + }; + + let graph_build_id = uuid::Uuid::new_v4().to_string(); + let engine = compliance_graph::GraphEngine::new(50_000); + + match engine.build_graph(&repo_path, &repo_id, &graph_build_id) { + Ok((code_graph, build_run)) => { + let store = + compliance_graph::graph::persistence::GraphStore::new(agent_clone.db.inner()); + let _ = store.delete_repo_graph(&repo_id).await; + let _ = store + .store_graph(&build_run, &code_graph.nodes, &code_graph.edges) + .await; + tracing::info!( + "[{repo_id}] Graph rebuild complete: {} nodes, {} edges", + build_run.node_count, + build_run.edge_count + ); + } + Err(e) => { + tracing::error!("[{repo_id}] Graph rebuild failed: {e}"); + } + } + }); + + Ok(Json( + serde_json::json!({ "status": "graph_build_triggered" }), + )) +} diff --git a/compliance-agent/src/api/handlers/mod.rs b/compliance-agent/src/api/handlers/mod.rs index d859d89..39af052 100644 --- a/compliance-agent/src/api/handlers/mod.rs +++ b/compliance-agent/src/api/handlers/mod.rs @@ -1,3 +1,6 @@ +pub mod dast; +pub mod graph; + use std::sync::Arc; #[allow(unused_imports)] @@ -410,8 +413,11 @@ async fn collect_cursor_async( ) -> Vec { use futures_util::StreamExt; let mut items = Vec::new(); - while let Some(Ok(item)) = cursor.next().await { - items.push(item); + while let Some(result) = cursor.next().await { + match result { + Ok(item) => items.push(item), + Err(e) => tracing::warn!("Failed to deserialize document: {e}"), + } } items } diff --git a/compliance-agent/src/api/routes.rs b/compliance-agent/src/api/routes.rs index 88f5bdf..92a1b1d 100644 --- a/compliance-agent/src/api/routes.rs +++ b/compliance-agent/src/api/routes.rs @@ -22,4 +22,54 @@ pub fn build_router() -> Router { .route("/api/v1/sbom", get(handlers::list_sbom)) .route("/api/v1/issues", get(handlers::list_issues)) .route("/api/v1/scan-runs", get(handlers::list_scan_runs)) + // Graph API endpoints + .route( + "/api/v1/graph/{repo_id}", + get(handlers::graph::get_graph), + ) + .route( + "/api/v1/graph/{repo_id}/nodes", + get(handlers::graph::get_nodes), + ) + .route( + "/api/v1/graph/{repo_id}/communities", + get(handlers::graph::get_communities), + ) + .route( + "/api/v1/graph/{repo_id}/impact/{finding_id}", + get(handlers::graph::get_impact), + ) + .route( + "/api/v1/graph/{repo_id}/search", + get(handlers::graph::search_symbols), + ) + .route( + "/api/v1/graph/{repo_id}/build", + post(handlers::graph::trigger_build), + ) + // DAST API endpoints + .route( + "/api/v1/dast/targets", + get(handlers::dast::list_targets), + ) + .route( + "/api/v1/dast/targets", + post(handlers::dast::add_target), + ) + .route( + "/api/v1/dast/targets/{id}/scan", + post(handlers::dast::trigger_scan), + ) + .route( + "/api/v1/dast/scan-runs", + get(handlers::dast::list_scan_runs), + ) + .route( + "/api/v1/dast/findings", + get(handlers::dast::list_findings), + ) + .route( + "/api/v1/dast/findings/{id}", + get(handlers::dast::get_finding), + ) } diff --git a/compliance-agent/src/database.rs b/compliance-agent/src/database.rs index a9010d2..3f32df5 100644 --- a/compliance-agent/src/database.rs +++ b/compliance-agent/src/database.rs @@ -88,6 +88,70 @@ impl Database { ) .await?; + // graph_nodes: compound (repo_id, graph_build_id) + self.graph_nodes() + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "graph_build_id": 1 }) + .build(), + ) + .await?; + + // graph_edges: compound (repo_id, graph_build_id) + self.graph_edges() + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "graph_build_id": 1 }) + .build(), + ) + .await?; + + // graph_builds: compound (repo_id, started_at DESC) + self.graph_builds() + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "started_at": -1 }) + .build(), + ) + .await?; + + // impact_analyses: unique (repo_id, finding_id) + self.impact_analyses() + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "finding_id": 1 }) + .options(IndexOptions::builder().unique(true).build()) + .build(), + ) + .await?; + + // dast_targets: index on repo_id + self.dast_targets() + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1 }) + .build(), + ) + .await?; + + // dast_scan_runs: compound (target_id, started_at DESC) + self.dast_scan_runs() + .create_index( + IndexModel::builder() + .keys(doc! { "target_id": 1, "started_at": -1 }) + .build(), + ) + .await?; + + // dast_findings: compound (scan_run_id, vuln_type) + self.dast_findings() + .create_index( + IndexModel::builder() + .keys(doc! { "scan_run_id": 1, "vuln_type": 1 }) + .build(), + ) + .await?; + tracing::info!("Database indexes ensured"); Ok(()) } @@ -116,8 +180,43 @@ impl Database { self.inner.collection("tracker_issues") } + // Graph collections + pub fn graph_nodes(&self) -> Collection { + self.inner.collection("graph_nodes") + } + + pub fn graph_edges(&self) -> Collection { + self.inner.collection("graph_edges") + } + + pub fn graph_builds(&self) -> Collection { + self.inner.collection("graph_builds") + } + + pub fn impact_analyses(&self) -> Collection { + self.inner.collection("impact_analyses") + } + + // DAST collections + pub fn dast_targets(&self) -> Collection { + self.inner.collection("dast_targets") + } + + pub fn dast_scan_runs(&self) -> Collection { + self.inner.collection("dast_scan_runs") + } + + pub fn dast_findings(&self) -> Collection { + self.inner.collection("dast_findings") + } + #[allow(dead_code)] pub fn raw_collection(&self, name: &str) -> Collection { self.inner.collection(name) } + + /// Get the raw MongoDB database handle (for graph persistence) + pub fn inner(&self) -> &mongodb::Database { + &self.inner + } } diff --git a/compliance-agent/src/llm/triage.rs b/compliance-agent/src/llm/triage.rs index 87d745c..a0625b2 100644 --- a/compliance-agent/src/llm/triage.rs +++ b/compliance-agent/src/llm/triage.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use compliance_core::models::{Finding, FindingStatus}; use crate::llm::LlmClient; +use crate::pipeline::orchestrator::GraphContext; const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine: 1. Is this a true positive? (yes/no) @@ -12,11 +13,15 @@ const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Respond in JSON format: {"true_positive": true/false, "confidence": N, "remediation": "..."}"#; -pub async fn triage_findings(llm: &Arc, findings: &mut Vec) -> usize { +pub async fn triage_findings( + llm: &Arc, + findings: &mut Vec, + graph_context: Option<&GraphContext>, +) -> usize { let mut passed = 0; for finding in findings.iter_mut() { - let user_prompt = format!( + let mut user_prompt = format!( "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}", finding.scanner, finding.rule_id.as_deref().unwrap_or("N/A"), @@ -28,6 +33,37 @@ pub async fn triage_findings(llm: &Arc, findings: &mut Vec) finding.code_snippet.as_deref().unwrap_or("N/A"), ); + // Enrich with graph context if available + if let Some(ctx) = graph_context { + if let Some(impact) = ctx + .impacts + .iter() + .find(|i| i.finding_id == finding.fingerprint) + { + user_prompt.push_str(&format!( + "\n\n--- Code Graph Context ---\n\ + Blast radius: {} nodes affected\n\ + Entry points affected: {}\n\ + Direct callers: {}\n\ + Communities affected: {}\n\ + Call chains: {}", + impact.blast_radius, + if impact.affected_entry_points.is_empty() { + "none".to_string() + } else { + impact.affected_entry_points.join(", ") + }, + if impact.direct_callers.is_empty() { + "none".to_string() + } else { + impact.direct_callers.join(", ") + }, + impact.affected_communities.len(), + impact.call_chains.len(), + )); + } + } + match llm .chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1)) .await diff --git a/compliance-agent/src/pipeline/orchestrator.rs b/compliance-agent/src/pipeline/orchestrator.rs index 0393dee..3abcecc 100644 --- a/compliance-agent/src/pipeline/orchestrator.rs +++ b/compliance-agent/src/pipeline/orchestrator.rs @@ -15,6 +15,16 @@ use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner}; use crate::pipeline::sbom::SbomScanner; use crate::pipeline::semgrep::SemgrepScanner; +/// Context from graph analysis passed to LLM triage for enhanced filtering +#[derive(Debug)] +#[allow(dead_code)] +pub struct GraphContext { + pub node_count: u32, + pub edge_count: u32, + pub community_count: u32, + pub impacts: Vec, +} + pub struct PipelineOrchestrator { config: AgentConfig, db: Database, @@ -172,13 +182,30 @@ impl PipelineOrchestrator { Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"), } - // Stage 5: LLM Triage + // Stage 4.5: Graph Building + tracing::info!("[{repo_id}] Stage 4.5: Graph Building"); + self.update_phase(scan_run_id, "graph_building").await; + let graph_context = match self.build_code_graph(&repo_path, &repo_id, &all_findings).await + { + Ok(ctx) => Some(ctx), + Err(e) => { + tracing::warn!("[{repo_id}] Graph building failed: {e}"); + None + } + }; + + // Stage 5: LLM Triage (enhanced with graph context) tracing::info!( "[{repo_id}] Stage 5: LLM Triage ({} findings)", all_findings.len() ); self.update_phase(scan_run_id, "llm_triage").await; - let triaged = crate::llm::triage::triage_findings(&self.llm, &mut all_findings).await; + let triaged = crate::llm::triage::triage_findings( + &self.llm, + &mut all_findings, + graph_context.as_ref(), + ) + .await; tracing::info!("[{repo_id}] Triaged: {triaged} findings passed confidence threshold"); // Dedup against existing findings and insert new ones @@ -250,10 +277,121 @@ impl PipelineOrchestrator { ) .await?; + // Stage 8: DAST (async, optional — only if a DastTarget is configured) + tracing::info!("[{repo_id}] Stage 8: Checking for DAST targets"); + self.update_phase(scan_run_id, "dast_scanning").await; + self.maybe_trigger_dast(&repo_id, scan_run_id).await; + tracing::info!("[{repo_id}] Scan complete: {new_count} new findings"); Ok(new_count) } + /// Build the code knowledge graph for a repo and compute impact analyses + async fn build_code_graph( + &self, + repo_path: &std::path::Path, + repo_id: &str, + findings: &[Finding], + ) -> Result { + let graph_build_id = uuid::Uuid::new_v4().to_string(); + let engine = compliance_graph::GraphEngine::new(50_000); + + let (mut code_graph, build_run) = engine + .build_graph(repo_path, repo_id, &graph_build_id) + .map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?; + + // Apply community detection + compliance_graph::graph::community::apply_communities(&mut code_graph); + + // Store graph in MongoDB + let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner()); + store + .delete_repo_graph(repo_id) + .await + .map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?; + store + .store_graph(&build_run, &code_graph.nodes, &code_graph.edges) + .await + .map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?; + + // Compute impact analysis for each finding + let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph); + let mut impacts = Vec::new(); + + for finding in findings { + if let Some(file_path) = &finding.file_path { + let impact = analyzer.analyze( + repo_id, + &finding.fingerprint, + &graph_build_id, + file_path, + finding.line_number, + ); + store + .store_impact(&impact) + .await + .map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?; + impacts.push(impact); + } + } + + Ok(GraphContext { + node_count: build_run.node_count, + edge_count: build_run.edge_count, + community_count: build_run.community_count, + impacts, + }) + } + + /// Trigger DAST scan if a target is configured for this repo + async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) { + use futures_util::TryStreamExt; + + let filter = mongodb::bson::doc! { "repo_id": repo_id }; + let targets: Vec = match self + .db + .dast_targets() + .find(filter) + .await + { + Ok(cursor) => cursor.try_collect().await.unwrap_or_default(), + Err(_) => return, + }; + + if targets.is_empty() { + tracing::info!("[{repo_id}] No DAST targets configured, skipping"); + return; + } + + for target in targets { + let db = self.db.clone(); + let scan_run_id = scan_run_id.to_string(); + tokio::spawn(async move { + let orchestrator = compliance_dast::DastOrchestrator::new(100); + match orchestrator.run_scan(&target, Vec::new()).await { + Ok((mut scan_run, findings)) => { + scan_run.sast_scan_run_id = Some(scan_run_id); + if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await { + tracing::error!("Failed to store DAST scan run: {e}"); + } + for finding in &findings { + if let Err(e) = db.dast_findings().insert_one(finding).await { + tracing::error!("Failed to store DAST finding: {e}"); + } + } + tracing::info!( + "DAST scan complete: {} findings", + findings.len() + ); + } + Err(e) => { + tracing::error!("DAST scan failed: {e}"); + } + } + }); + } + } + async fn update_phase(&self, scan_run_id: &str, phase: &str) { if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) { let _ = self diff --git a/compliance-core/Cargo.toml b/compliance-core/Cargo.toml index 4bf4ce0..e6e85b6 100644 --- a/compliance-core/Cargo.toml +++ b/compliance-core/Cargo.toml @@ -19,5 +19,5 @@ sha2 = { workspace = true } hex = { workspace = true } uuid = { workspace = true } secrecy = { workspace = true } -bson = "2" +bson = { version = "2", features = ["chrono-0_4"] } mongodb = { workspace = true, optional = true } diff --git a/compliance-core/src/error.rs b/compliance-core/src/error.rs index a4f0633..1e9db6e 100644 --- a/compliance-core/src/error.rs +++ b/compliance-core/src/error.rs @@ -38,6 +38,12 @@ pub enum CoreError { #[error("IO error: {0}")] Io(#[from] std::io::Error), + #[error("Graph error: {0}")] + Graph(String), + + #[error("DAST error: {0}")] + Dast(String), + #[error("Not found: {0}")] NotFound(String), diff --git a/compliance-core/src/models/dast.rs b/compliance-core/src/models/dast.rs new file mode 100644 index 0000000..521e513 --- /dev/null +++ b/compliance-core/src/models/dast.rs @@ -0,0 +1,276 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +use super::finding::Severity; + +/// Type of DAST target application +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum DastTargetType { + WebApp, + RestApi, + GraphQl, +} + +impl std::fmt::Display for DastTargetType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::WebApp => write!(f, "webapp"), + Self::RestApi => write!(f, "rest_api"), + Self::GraphQl => write!(f, "graphql"), + } + } +} + +/// Authentication configuration for DAST target +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DastAuthConfig { + /// Authentication method: "none", "basic", "bearer", "cookie", "form" + pub method: String, + /// Login URL for form-based auth + pub login_url: Option, + /// Username or token + pub username: Option, + /// Password (stored encrypted in practice) + pub password: Option, + /// Bearer token + pub token: Option, + /// Custom headers for auth + pub headers: Option>, +} + +/// A target for DAST scanning +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DastTarget { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub name: String, + pub base_url: String, + pub target_type: DastTargetType, + pub auth_config: Option, + /// Linked repository ID (for SAST correlation) + pub repo_id: Option, + /// URL paths to exclude from scanning + pub excluded_paths: Vec, + /// Maximum crawl depth + pub max_crawl_depth: u32, + /// Rate limit (requests per second) + pub rate_limit: u32, + /// Whether destructive tests (DELETE, PUT) are allowed + pub allow_destructive: bool, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +impl DastTarget { + pub fn new(name: String, base_url: String, target_type: DastTargetType) -> Self { + let now = Utc::now(); + Self { + id: None, + name, + base_url, + target_type, + auth_config: None, + repo_id: None, + excluded_paths: Vec::new(), + max_crawl_depth: 5, + rate_limit: 10, + allow_destructive: false, + created_at: now, + updated_at: now, + } + } +} + +/// Phase of a DAST scan +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum DastScanPhase { + Reconnaissance, + Crawling, + VulnerabilityAnalysis, + Exploitation, + Reporting, + Completed, +} + +impl std::fmt::Display for DastScanPhase { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Reconnaissance => write!(f, "reconnaissance"), + Self::Crawling => write!(f, "crawling"), + Self::VulnerabilityAnalysis => write!(f, "vulnerability_analysis"), + Self::Exploitation => write!(f, "exploitation"), + Self::Reporting => write!(f, "reporting"), + Self::Completed => write!(f, "completed"), + } + } +} + +/// Status of a DAST scan run +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum DastScanStatus { + Running, + Completed, + Failed, + Cancelled, +} + +/// A DAST scan run +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DastScanRun { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub target_id: String, + pub status: DastScanStatus, + pub current_phase: DastScanPhase, + pub phases_completed: Vec, + /// Number of endpoints discovered during crawling + pub endpoints_discovered: u32, + /// Number of findings + pub findings_count: u32, + /// Number of confirmed exploitable findings + pub exploitable_count: u32, + pub error_message: Option, + /// Linked SAST scan run ID (if triggered as part of pipeline) + pub sast_scan_run_id: Option, + pub started_at: DateTime, + pub completed_at: Option>, +} + +impl DastScanRun { + pub fn new(target_id: String) -> Self { + Self { + id: None, + target_id, + status: DastScanStatus::Running, + current_phase: DastScanPhase::Reconnaissance, + phases_completed: Vec::new(), + endpoints_discovered: 0, + findings_count: 0, + exploitable_count: 0, + error_message: None, + sast_scan_run_id: None, + started_at: Utc::now(), + completed_at: None, + } + } +} + +/// Type of DAST vulnerability +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum DastVulnType { + SqlInjection, + Xss, + AuthBypass, + Ssrf, + ApiMisconfiguration, + OpenRedirect, + Idor, + InformationDisclosure, + SecurityMisconfiguration, + BrokenAuth, + Other, +} + +impl std::fmt::Display for DastVulnType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::SqlInjection => write!(f, "sql_injection"), + Self::Xss => write!(f, "xss"), + Self::AuthBypass => write!(f, "auth_bypass"), + Self::Ssrf => write!(f, "ssrf"), + Self::ApiMisconfiguration => write!(f, "api_misconfiguration"), + Self::OpenRedirect => write!(f, "open_redirect"), + Self::Idor => write!(f, "idor"), + Self::InformationDisclosure => write!(f, "information_disclosure"), + Self::SecurityMisconfiguration => write!(f, "security_misconfiguration"), + Self::BrokenAuth => write!(f, "broken_auth"), + Self::Other => write!(f, "other"), + } + } +} + +/// Evidence collected during DAST testing +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DastEvidence { + /// HTTP request that triggered the finding + pub request_method: String, + pub request_url: String, + pub request_headers: Option>, + pub request_body: Option, + /// HTTP response + pub response_status: u16, + pub response_headers: Option>, + /// Relevant snippet of response body + pub response_snippet: Option, + /// Path to screenshot file (if captured) + pub screenshot_path: Option, + /// The payload that triggered the vulnerability + pub payload: Option, + /// Timing information (for timing-based attacks) + pub response_time_ms: Option, +} + +/// A finding from DAST scanning +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DastFinding { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub scan_run_id: String, + pub target_id: String, + pub vuln_type: DastVulnType, + pub title: String, + pub description: String, + pub severity: Severity, + pub cwe: Option, + /// The URL endpoint where the vulnerability was found + pub endpoint: String, + /// HTTP method + pub method: String, + /// Parameter that is vulnerable + pub parameter: Option, + /// Whether exploitability was confirmed with a working payload + pub exploitable: bool, + /// Evidence chain + pub evidence: Vec, + /// Remediation guidance + pub remediation: Option, + /// Linked SAST finding ID (if correlated) + pub linked_sast_finding_id: Option, + pub created_at: DateTime, +} + +impl DastFinding { + pub fn new( + scan_run_id: String, + target_id: String, + vuln_type: DastVulnType, + title: String, + description: String, + severity: Severity, + endpoint: String, + method: String, + ) -> Self { + Self { + id: None, + scan_run_id, + target_id, + vuln_type, + title, + description, + severity, + cwe: None, + endpoint, + method, + parameter: None, + exploitable: false, + evidence: Vec::new(), + remediation: None, + linked_sast_finding_id: None, + created_at: Utc::now(), + } + } +} diff --git a/compliance-core/src/models/graph.rs b/compliance-core/src/models/graph.rs new file mode 100644 index 0000000..7bbd9be --- /dev/null +++ b/compliance-core/src/models/graph.rs @@ -0,0 +1,186 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// Type of code node in the knowledge graph +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum CodeNodeKind { + Function, + Method, + Class, + Struct, + Enum, + Interface, + Trait, + Module, + File, +} + +impl std::fmt::Display for CodeNodeKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Function => write!(f, "function"), + Self::Method => write!(f, "method"), + Self::Class => write!(f, "class"), + Self::Struct => write!(f, "struct"), + Self::Enum => write!(f, "enum"), + Self::Interface => write!(f, "interface"), + Self::Trait => write!(f, "trait"), + Self::Module => write!(f, "module"), + Self::File => write!(f, "file"), + } + } +} + +/// A node in the code knowledge graph +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CodeNode { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub repo_id: String, + pub graph_build_id: String, + /// Unique identifier within the graph (e.g., "src/main.rs::main") + pub qualified_name: String, + pub name: String, + pub kind: CodeNodeKind, + pub file_path: String, + pub start_line: u32, + pub end_line: u32, + /// Language of the source file + pub language: String, + /// Community ID from Louvain clustering + pub community_id: Option, + /// Whether this is a public entry point (main, exported fn, HTTP handler, etc.) + pub is_entry_point: bool, + /// Internal petgraph node index for fast lookups + #[serde(skip_serializing_if = "Option::is_none")] + pub graph_index: Option, +} + +/// Type of relationship between code nodes +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum CodeEdgeKind { + Calls, + Imports, + Inherits, + Implements, + Contains, + /// A type reference (e.g., function parameter type, return type) + TypeRef, +} + +impl std::fmt::Display for CodeEdgeKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Calls => write!(f, "calls"), + Self::Imports => write!(f, "imports"), + Self::Inherits => write!(f, "inherits"), + Self::Implements => write!(f, "implements"), + Self::Contains => write!(f, "contains"), + Self::TypeRef => write!(f, "type_ref"), + } + } +} + +/// An edge in the code knowledge graph +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CodeEdge { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub repo_id: String, + pub graph_build_id: String, + /// Qualified name of source node + pub source: String, + /// Qualified name of target node + pub target: String, + pub kind: CodeEdgeKind, + /// File where this relationship was found + pub file_path: String, + pub line_number: Option, +} + +/// Status of a graph build operation +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum GraphBuildStatus { + Running, + Completed, + Failed, +} + +/// Tracks a graph build operation for a repo/commit +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GraphBuildRun { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub repo_id: String, + pub commit_sha: Option, + pub status: GraphBuildStatus, + pub node_count: u32, + pub edge_count: u32, + pub community_count: u32, + pub languages_parsed: Vec, + pub error_message: Option, + pub started_at: DateTime, + pub completed_at: Option>, +} + +impl GraphBuildRun { + pub fn new(repo_id: String) -> Self { + Self { + id: None, + repo_id, + commit_sha: None, + status: GraphBuildStatus::Running, + node_count: 0, + edge_count: 0, + community_count: 0, + languages_parsed: Vec::new(), + error_message: None, + started_at: Utc::now(), + completed_at: None, + } + } +} + +/// Impact analysis result for a finding +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImpactAnalysis { + #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub repo_id: String, + pub finding_id: String, + pub graph_build_id: String, + /// Number of nodes reachable from the finding location + pub blast_radius: u32, + /// Entry points affected by this finding (via reverse call chain) + pub affected_entry_points: Vec, + /// Call chains from entry points to the finding location + pub call_chains: Vec>, + /// Community IDs affected + pub affected_communities: Vec, + /// Direct callers of the affected function + pub direct_callers: Vec, + /// Direct callees of the affected function + pub direct_callees: Vec, + pub created_at: DateTime, +} + +impl ImpactAnalysis { + pub fn new(repo_id: String, finding_id: String, graph_build_id: String) -> Self { + Self { + id: None, + repo_id, + finding_id, + graph_build_id, + blast_radius: 0, + affected_entry_points: Vec::new(), + call_chains: Vec::new(), + affected_communities: Vec::new(), + direct_callers: Vec::new(), + direct_callees: Vec::new(), + created_at: Utc::now(), + } + } +} diff --git a/compliance-core/src/models/mod.rs b/compliance-core/src/models/mod.rs index 099b5bc..1a210a5 100644 --- a/compliance-core/src/models/mod.rs +++ b/compliance-core/src/models/mod.rs @@ -1,12 +1,22 @@ pub mod cve; +pub mod dast; pub mod finding; +pub mod graph; pub mod issue; pub mod repository; pub mod sbom; pub mod scan; pub use cve::{CveAlert, CveSource}; +pub use dast::{ + DastAuthConfig, DastEvidence, DastFinding, DastScanPhase, DastScanRun, DastScanStatus, + DastTarget, DastTargetType, DastVulnType, +}; pub use finding::{Finding, FindingStatus, Severity}; +pub use graph::{ + CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind, GraphBuildRun, GraphBuildStatus, + ImpactAnalysis, +}; pub use issue::{IssueStatus, TrackerIssue, TrackerType}; pub use repository::{ScanTrigger, TrackedRepository}; pub use sbom::{SbomEntry, VulnRef}; diff --git a/compliance-core/src/models/repository.rs b/compliance-core/src/models/repository.rs index aa8ee41..e283afe 100644 --- a/compliance-core/src/models/repository.rs +++ b/compliance-core/src/models/repository.rs @@ -1,5 +1,5 @@ use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use super::issue::TrackerType; @@ -15,21 +15,64 @@ pub enum ScanTrigger { pub struct TrackedRepository { #[serde(rename = "_id", skip_serializing_if = "Option::is_none")] pub id: Option, + #[serde(default)] pub name: String, + #[serde(default)] pub git_url: String, + #[serde(default = "default_branch")] pub default_branch: String, pub local_path: Option, pub scan_schedule: Option, + #[serde(default)] pub webhook_enabled: bool, pub tracker_type: Option, pub tracker_owner: Option, pub tracker_repo: Option, pub last_scanned_commit: Option, + #[serde(default, deserialize_with = "deserialize_findings_count")] pub findings_count: u32, + #[serde(default = "chrono::Utc::now", deserialize_with = "deserialize_datetime")] pub created_at: DateTime, + #[serde(default = "chrono::Utc::now", deserialize_with = "deserialize_datetime")] pub updated_at: DateTime, } +fn default_branch() -> String { + "main".to_string() +} + +/// Handles findings_count stored as either a plain integer or a BSON Int64 +/// which the driver may present as a map `{"low": N, "high": N, "unsigned": bool}`. +/// Handles datetime stored as either a BSON DateTime or an RFC 3339 string. +fn deserialize_datetime<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let bson = bson::Bson::deserialize(deserializer)?; + match bson { + bson::Bson::DateTime(dt) => Ok(dt.into()), + bson::Bson::String(s) => s + .parse::>() + .map_err(serde::de::Error::custom), + other => Err(serde::de::Error::custom(format!( + "expected DateTime or string, got: {other:?}" + ))), + } +} + +fn deserialize_findings_count<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let bson = bson::Bson::deserialize(deserializer)?; + match &bson { + bson::Bson::Int32(n) => Ok(*n as u32), + bson::Bson::Int64(n) => Ok(*n as u32), + bson::Bson::Double(n) => Ok(*n as u32), + _ => Ok(0), + } +} + impl TrackedRepository { pub fn new(name: String, git_url: String) -> Self { let now = Utc::now(); diff --git a/compliance-core/src/models/scan.rs b/compliance-core/src/models/scan.rs index b444440..f4ba15a 100644 --- a/compliance-core/src/models/scan.rs +++ b/compliance-core/src/models/scan.rs @@ -11,6 +11,8 @@ pub enum ScanType { Cve, Gdpr, OAuth, + Graph, + Dast, } impl std::fmt::Display for ScanType { @@ -21,6 +23,8 @@ impl std::fmt::Display for ScanType { Self::Cve => write!(f, "cve"), Self::Gdpr => write!(f, "gdpr"), Self::OAuth => write!(f, "oauth"), + Self::Graph => write!(f, "graph"), + Self::Dast => write!(f, "dast"), } } } @@ -41,8 +45,10 @@ pub enum ScanPhase { SbomGeneration, CveScanning, PatternScanning, + GraphBuilding, LlmTriage, IssueCreation, + DastScanning, Completed, } diff --git a/compliance-core/src/traits/dast_agent.rs b/compliance-core/src/traits/dast_agent.rs new file mode 100644 index 0000000..b67216f --- /dev/null +++ b/compliance-core/src/traits/dast_agent.rs @@ -0,0 +1,47 @@ +use crate::error::CoreError; +use crate::models::dast::{DastFinding, DastTarget}; + +/// Context passed to DAST agents containing discovered information +#[derive(Debug, Clone, Default)] +pub struct DastContext { + /// Discovered endpoints from crawling + pub endpoints: Vec, + /// Technologies detected during recon + pub technologies: Vec, + /// Existing SAST findings for prioritization + pub sast_hints: Vec, +} + +/// An endpoint discovered during crawling +#[derive(Debug, Clone)] +pub struct DiscoveredEndpoint { + pub url: String, + pub method: String, + pub parameters: Vec, + pub content_type: Option, + pub requires_auth: bool, +} + +/// A parameter on a discovered endpoint +#[derive(Debug, Clone)] +pub struct EndpointParameter { + pub name: String, + /// "query", "body", "header", "path", "cookie" + pub location: String, + pub param_type: Option, + pub example_value: Option, +} + +/// Trait for DAST testing agents (injection, XSS, auth bypass, etc.) +#[allow(async_fn_in_trait)] +pub trait DastAgent: Send + Sync { + /// Agent name (e.g., "sql_injection", "xss", "auth_bypass") + fn name(&self) -> &str; + + /// Run the agent against a target with discovered context + async fn run( + &self, + target: &DastTarget, + context: &DastContext, + ) -> Result, CoreError>; +} diff --git a/compliance-core/src/traits/graph_builder.rs b/compliance-core/src/traits/graph_builder.rs new file mode 100644 index 0000000..126d460 --- /dev/null +++ b/compliance-core/src/traits/graph_builder.rs @@ -0,0 +1,30 @@ +use std::path::Path; + +use crate::error::CoreError; +use crate::models::graph::{CodeEdge, CodeNode}; + +/// Output from parsing a single file +#[derive(Debug, Default)] +pub struct ParseOutput { + pub nodes: Vec, + pub edges: Vec, +} + +/// Trait for language-specific code parsers +#[allow(async_fn_in_trait)] +pub trait LanguageParser: Send + Sync { + /// Language name (e.g., "rust", "python", "javascript") + fn language(&self) -> &str; + + /// File extensions this parser handles + fn extensions(&self) -> &[&str]; + + /// Parse a single file and extract nodes + edges + fn parse_file( + &self, + file_path: &Path, + source: &str, + repo_id: &str, + graph_build_id: &str, + ) -> Result; +} diff --git a/compliance-core/src/traits/mod.rs b/compliance-core/src/traits/mod.rs index e2d1790..2d10e8b 100644 --- a/compliance-core/src/traits/mod.rs +++ b/compliance-core/src/traits/mod.rs @@ -1,5 +1,9 @@ +pub mod dast_agent; +pub mod graph_builder; pub mod issue_tracker; pub mod scanner; +pub use dast_agent::{DastAgent, DastContext, DiscoveredEndpoint, EndpointParameter}; +pub use graph_builder::{LanguageParser, ParseOutput}; pub use issue_tracker::IssueTracker; pub use scanner::{ScanOutput, Scanner}; diff --git a/compliance-dashboard/Cargo.toml b/compliance-dashboard/Cargo.toml index b585c71..9f1366c 100644 --- a/compliance-dashboard/Cargo.toml +++ b/compliance-dashboard/Cargo.toml @@ -12,7 +12,7 @@ path = "../bin/main.rs" workspace = true [features] -web = ["dioxus/web", "dioxus/router", "dioxus/fullstack", "dep:web-sys"] +web = ["dioxus/web", "dioxus/router", "dioxus/fullstack", "dep:web-sys", "dep:gloo-timers"] server = [ "dioxus/server", "dioxus/router", @@ -43,6 +43,7 @@ thiserror = { workspace = true } # Web-only reqwest = { workspace = true, optional = true } web-sys = { version = "0.3", optional = true } +gloo-timers = { version = "0.3", features = ["futures"], optional = true } # Server-only axum = { version = "0.8", optional = true } diff --git a/compliance-dashboard/assets/main.css b/compliance-dashboard/assets/main.css index 4bbdd35..d2c9218 100644 --- a/compliance-dashboard/assets/main.css +++ b/compliance-dashboard/assets/main.css @@ -300,6 +300,87 @@ tr:hover { color: var(--text-secondary); } +/* Toast notifications */ +.toast-container { + position: fixed; + top: 20px; + right: 20px; + z-index: 50; + display: flex; + flex-direction: column; + gap: 8px; + pointer-events: none; +} + +.toast { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + min-width: 280px; + max-width: 420px; + padding: 12px 16px; + border-radius: 8px; + font-size: 14px; + font-weight: 500; + pointer-events: auto; + animation: toast-in 0.3s ease-out; +} + +.toast-success { + background: rgba(34, 197, 94, 0.15); + border: 1px solid var(--success); + color: #86efac; +} + +.toast-error { + background: rgba(239, 68, 68, 0.15); + border: 1px solid var(--danger); + color: #fca5a5; +} + +.toast-info { + background: rgba(59, 130, 246, 0.15); + border: 1px solid var(--info); + color: #93c5fd; +} + +.toast-dismiss { + background: none; + border: none; + color: inherit; + font-size: 18px; + cursor: pointer; + opacity: 0.7; + padding: 0 4px; + line-height: 1; +} + +.toast-dismiss:hover { + opacity: 1; +} + +@keyframes toast-in { + from { + transform: translateX(100%); + opacity: 0; + } + to { + transform: translateX(0); + opacity: 1; + } +} + +/* Button click animation + disabled */ +.btn:active { + transform: scale(0.95); +} + +.btn:disabled { + opacity: 0.6; + cursor: not-allowed; +} + @media (max-width: 768px) { .sidebar { transform: translateX(-100%); diff --git a/compliance-dashboard/src/app.rs b/compliance-dashboard/src/app.rs index 24f5d40..80d398e 100644 --- a/compliance-dashboard/src/app.rs +++ b/compliance-dashboard/src/app.rs @@ -20,6 +20,20 @@ pub enum Route { SbomPage {}, #[route("/issues")] IssuesPage {}, + #[route("/graph")] + GraphIndexPage {}, + #[route("/graph/:repo_id")] + GraphExplorerPage { repo_id: String }, + #[route("/graph/:repo_id/impact/:finding_id")] + ImpactAnalysisPage { repo_id: String, finding_id: String }, + #[route("/dast")] + DastOverviewPage {}, + #[route("/dast/targets")] + DastTargetsPage {}, + #[route("/dast/findings")] + DastFindingsPage {}, + #[route("/dast/findings/:id")] + DastFindingDetailPage { id: String }, #[route("/settings")] SettingsPage {}, } diff --git a/compliance-dashboard/src/components/app_shell.rs b/compliance-dashboard/src/components/app_shell.rs index c982f49..f165d5d 100644 --- a/compliance-dashboard/src/components/app_shell.rs +++ b/compliance-dashboard/src/components/app_shell.rs @@ -2,15 +2,18 @@ use dioxus::prelude::*; use crate::app::Route; use crate::components::sidebar::Sidebar; +use crate::components::toast::{ToastContainer, Toasts}; #[component] pub fn AppShell() -> Element { + use_context_provider(Toasts::new); rsx! { div { class: "app-shell", Sidebar {} main { class: "main-content", Outlet:: {} } + ToastContainer {} } } } diff --git a/compliance-dashboard/src/components/mod.rs b/compliance-dashboard/src/components/mod.rs index aebf095..5350659 100644 --- a/compliance-dashboard/src/components/mod.rs +++ b/compliance-dashboard/src/components/mod.rs @@ -5,3 +5,4 @@ pub mod pagination; pub mod severity_badge; pub mod sidebar; pub mod stat_card; +pub mod toast; diff --git a/compliance-dashboard/src/components/sidebar.rs b/compliance-dashboard/src/components/sidebar.rs index b183845..ea5d471 100644 --- a/compliance-dashboard/src/components/sidebar.rs +++ b/compliance-dashboard/src/components/sidebar.rs @@ -40,6 +40,16 @@ pub fn Sidebar() -> Element { route: Route::IssuesPage {}, icon: rsx! { Icon { icon: BsListTask, width: 18, height: 18 } }, }, + NavItem { + label: "Code Graph", + route: Route::GraphIndexPage {}, + icon: rsx! { Icon { icon: BsDiagram3, width: 18, height: 18 } }, + }, + NavItem { + label: "DAST", + route: Route::DastOverviewPage {}, + icon: rsx! { Icon { icon: BsBug, width: 18, height: 18 } }, + }, NavItem { label: "Settings", route: Route::SettingsPage {}, @@ -58,6 +68,12 @@ pub fn Sidebar() -> Element { { let is_active = match (¤t_route, &item.route) { (Route::FindingDetailPage { .. }, Route::FindingsPage {}) => true, + (Route::GraphIndexPage {}, Route::GraphIndexPage {}) => true, + (Route::GraphExplorerPage { .. }, Route::GraphIndexPage {}) => true, + (Route::ImpactAnalysisPage { .. }, Route::GraphIndexPage {}) => true, + (Route::DastTargetsPage {}, Route::DastOverviewPage {}) => true, + (Route::DastFindingsPage {}, Route::DastOverviewPage {}) => true, + (Route::DastFindingDetailPage { .. }, Route::DastOverviewPage {}) => true, (a, b) => a == b, }; let class = if is_active { "nav-item active" } else { "nav-item" }; diff --git a/compliance-dashboard/src/components/toast.rs b/compliance-dashboard/src/components/toast.rs new file mode 100644 index 0000000..627ab0d --- /dev/null +++ b/compliance-dashboard/src/components/toast.rs @@ -0,0 +1,86 @@ +use dioxus::prelude::*; + +#[derive(Clone, PartialEq)] +pub enum ToastType { + Success, + Error, + Info, +} + +#[derive(Clone, PartialEq)] +pub struct ToastMessage { + pub id: usize, + pub message: String, + pub toast_type: ToastType, +} + +#[derive(Clone, Copy)] +pub struct Toasts { + items: Signal>, + next_id: Signal, +} + +impl Toasts { + pub fn new() -> Self { + Self { + items: Signal::new(vec![]), + next_id: Signal::new(0), + } + } + + pub fn push(&mut self, toast_type: ToastType, message: impl Into) { + let id = *self.next_id.read(); + *self.next_id.write() = id + 1; + self.items.write().push(ToastMessage { + id, + message: message.into(), + toast_type, + }); + + #[cfg(feature = "web")] + { + let mut items = self.items; + spawn(async move { + gloo_timers::future::TimeoutFuture::new(4_000).await; + items.write().retain(|t| t.id != id); + }); + } + } + + pub fn remove(&mut self, id: usize) { + self.items.write().retain(|t| t.id != id); + } +} + +#[component] +pub fn ToastContainer() -> Element { + let mut toasts = use_context::(); + let items = toasts.items.read(); + + rsx! { + div { class: "toast-container", + for toast in items.iter() { + { + let toast_id = toast.id; + let type_class = match toast.toast_type { + ToastType::Success => "toast-success", + ToastType::Error => "toast-error", + ToastType::Info => "toast-info", + }; + rsx! { + div { + key: "{toast_id}", + class: "toast {type_class}", + span { "{toast.message}" } + button { + class: "toast-dismiss", + onclick: move |_| toasts.remove(toast_id), + "\u{00d7}" + } + } + } + } + } + } + } +} diff --git a/compliance-dashboard/src/infrastructure/dast.rs b/compliance-dashboard/src/infrastructure/dast.rs new file mode 100644 index 0000000..c042dd7 --- /dev/null +++ b/compliance-dashboard/src/infrastructure/dast.rs @@ -0,0 +1,125 @@ +use dioxus::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct DastTargetsResponse { + pub data: Vec, + pub total: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct DastScanRunsResponse { + pub data: Vec, + pub total: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct DastFindingsResponse { + pub data: Vec, + pub total: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct DastFindingDetailResponse { + pub data: serde_json::Value, +} + +#[server] +pub async fn fetch_dast_targets() -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/dast/targets", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: DastTargetsResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn fetch_dast_scan_runs() -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/dast/scan-runs", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: DastScanRunsResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn fetch_dast_findings() -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/dast/findings", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: DastFindingsResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn fetch_dast_finding_detail( + id: String, +) -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/dast/findings/{id}", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: DastFindingDetailResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn add_dast_target( + name: String, + base_url: String, +) -> Result<(), ServerFnError> { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/dast/targets", state.agent_api_url); + let client = reqwest::Client::new(); + client + .post(&url) + .json(&serde_json::json!({ + "name": name, + "base_url": base_url, + })) + .send() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(()) +} + +#[server] +pub async fn trigger_dast_scan(target_id: String) -> Result<(), ServerFnError> { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!( + "{}/api/v1/dast/targets/{target_id}/scan", + state.agent_api_url + ); + let client = reqwest::Client::new(); + client + .post(&url) + .send() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(()) +} diff --git a/compliance-dashboard/src/infrastructure/graph.rs b/compliance-dashboard/src/infrastructure/graph.rs new file mode 100644 index 0000000..ac6f678 --- /dev/null +++ b/compliance-dashboard/src/infrastructure/graph.rs @@ -0,0 +1,96 @@ +use dioxus::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct GraphDataResponse { + pub data: GraphData, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct GraphData { + pub build: Option, + pub nodes: Vec, + pub edges: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ImpactResponse { + pub data: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct CommunitiesResponse { + pub data: Vec, + pub total: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct NodesResponse { + pub data: Vec, + pub total: Option, +} + +#[server] +pub async fn fetch_graph(repo_id: String) -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/graph/{repo_id}", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: GraphDataResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn fetch_impact( + repo_id: String, + finding_id: String, +) -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!( + "{}/api/v1/graph/{repo_id}/impact/{finding_id}", + state.agent_api_url + ); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: ImpactResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn fetch_communities(repo_id: String) -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/graph/{repo_id}/communities", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: CommunitiesResponse = resp + .json() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(body) +} + +#[server] +pub async fn trigger_graph_build(repo_id: String) -> Result<(), ServerFnError> { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + let url = format!("{}/api/v1/graph/{repo_id}/build", state.agent_api_url); + let client = reqwest::Client::new(); + client + .post(&url) + .send() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + Ok(()) +} diff --git a/compliance-dashboard/src/infrastructure/mod.rs b/compliance-dashboard/src/infrastructure/mod.rs index 3244c5d..0862ecf 100644 --- a/compliance-dashboard/src/infrastructure/mod.rs +++ b/compliance-dashboard/src/infrastructure/mod.rs @@ -1,6 +1,8 @@ // Server function modules (compiled for both web and server; // the #[server] macro generates client stubs for the web target) +pub mod dast; pub mod findings; +pub mod graph; pub mod issues; pub mod repositories; pub mod sbom; diff --git a/compliance-dashboard/src/pages/dast_finding_detail.rs b/compliance-dashboard/src/pages/dast_finding_detail.rs new file mode 100644 index 0000000..6c14650 --- /dev/null +++ b/compliance-dashboard/src/pages/dast_finding_detail.rs @@ -0,0 +1,113 @@ +use dioxus::prelude::*; + +use crate::components::page_header::PageHeader; +use crate::components::severity_badge::SeverityBadge; +use crate::infrastructure::dast::fetch_dast_finding_detail; + +#[component] +pub fn DastFindingDetailPage(id: String) -> Element { + let finding = use_resource(move || { + let fid = id.clone(); + async move { fetch_dast_finding_detail(fid).await.ok() } + }); + + rsx! { + PageHeader { + title: "DAST Finding Detail", + description: "Full evidence and details for a dynamic security finding", + } + + div { class: "card", + match &*finding.read() { + Some(Some(resp)) => { + let f = resp.data.clone(); + let severity = f.get("severity").and_then(|v| v.as_str()).unwrap_or("info").to_string(); + rsx! { + div { class: "flex items-center gap-4 mb-4", + SeverityBadge { severity: severity } + h2 { "{f.get(\"title\").and_then(|v| v.as_str()).unwrap_or(\"Unknown Finding\")}" } + } + + div { class: "grid grid-cols-2 gap-4 mb-4", + div { + strong { "Vulnerability Type: " } + span { class: "badge", "{f.get(\"vuln_type\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + } + div { + strong { "CWE: " } + span { "{f.get(\"cwe\").and_then(|v| v.as_str()).unwrap_or(\"N/A\")}" } + } + div { + strong { "Endpoint: " } + code { "{f.get(\"endpoint\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + } + div { + strong { "Method: " } + span { "{f.get(\"method\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + } + div { + strong { "Parameter: " } + code { "{f.get(\"parameter\").and_then(|v| v.as_str()).unwrap_or(\"N/A\")}" } + } + div { + strong { "Exploitable: " } + if f.get("exploitable").and_then(|v| v.as_bool()).unwrap_or(false) { + span { class: "badge badge-danger", "Confirmed" } + } else { + span { class: "badge", "Unconfirmed" } + } + } + } + + h3 { "Description" } + p { "{f.get(\"description\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + + if let Some(remediation) = f.get("remediation").and_then(|v| v.as_str()) { + h3 { class: "mt-4", "Remediation" } + p { "{remediation}" } + } + + h3 { class: "mt-4", "Evidence" } + if let Some(evidence_list) = f.get("evidence").and_then(|v| v.as_array()) { + for (i, evidence) in evidence_list.iter().enumerate() { + div { class: "card mb-3", + h4 { "Evidence #{i + 1}" } + div { class: "grid grid-cols-2 gap-2", + div { + strong { "Request: " } + code { "{evidence.get(\"request_method\").and_then(|v| v.as_str()).unwrap_or(\"-\")} {evidence.get(\"request_url\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + } + div { + strong { "Response Status: " } + span { "{evidence.get(\"response_status\").and_then(|v| v.as_u64()).unwrap_or(0)}" } + } + } + if let Some(payload) = evidence.get("payload").and_then(|v| v.as_str()) { + div { class: "mt-2", + strong { "Payload: " } + code { class: "block bg-gray-900 text-green-400 p-2 rounded mt-1", + "{payload}" + } + } + } + if let Some(snippet) = evidence.get("response_snippet").and_then(|v| v.as_str()) { + div { class: "mt-2", + strong { "Response Snippet: " } + pre { class: "block bg-gray-900 text-gray-300 p-2 rounded mt-1 overflow-x-auto text-sm", + "{snippet}" + } + } + } + } + } + } else { + p { "No evidence collected." } + } + } + }, + Some(None) => rsx! { p { "Finding not found." } }, + None => rsx! { p { "Loading..." } }, + } + } + } +} diff --git a/compliance-dashboard/src/pages/dast_findings.rs b/compliance-dashboard/src/pages/dast_findings.rs new file mode 100644 index 0000000..1729c60 --- /dev/null +++ b/compliance-dashboard/src/pages/dast_findings.rs @@ -0,0 +1,79 @@ +use dioxus::prelude::*; + +use crate::app::Route; +use crate::components::page_header::PageHeader; +use crate::components::severity_badge::SeverityBadge; +use crate::infrastructure::dast::fetch_dast_findings; + +#[component] +pub fn DastFindingsPage() -> Element { + let findings = use_resource(|| async { fetch_dast_findings().await.ok() }); + + rsx! { + PageHeader { + title: "DAST Findings", + description: "Vulnerabilities discovered through dynamic application security testing", + } + + div { class: "card", + match &*findings.read() { + Some(Some(data)) => { + let finding_list = &data.data; + if finding_list.is_empty() { + rsx! { p { "No DAST findings yet. Run a scan to discover vulnerabilities." } } + } else { + rsx! { + table { class: "table", + thead { + tr { + th { "Severity" } + th { "Type" } + th { "Title" } + th { "Endpoint" } + th { "Method" } + th { "Exploitable" } + } + } + tbody { + for finding in finding_list { + { + let id = finding.get("_id").and_then(|v| v.get("$oid")).and_then(|v| v.as_str()).unwrap_or("").to_string(); + let severity = finding.get("severity").and_then(|v| v.as_str()).unwrap_or("info").to_string(); + rsx! { + tr { + td { SeverityBadge { severity: severity } } + td { + span { class: "badge", + "{finding.get(\"vuln_type\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" + } + } + td { + Link { + to: Route::DastFindingDetailPage { id: id }, + "{finding.get(\"title\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" + } + } + td { code { class: "text-sm", "{finding.get(\"endpoint\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } } + td { "{finding.get(\"method\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + td { + if finding.get("exploitable").and_then(|v| v.as_bool()).unwrap_or(false) { + span { class: "badge badge-danger", "Confirmed" } + } else { + span { class: "badge", "Unconfirmed" } + } + } + } + } + } + } + } + } + } + } + }, + Some(None) => rsx! { p { "Failed to load findings." } }, + None => rsx! { p { "Loading..." } }, + } + } + } +} diff --git a/compliance-dashboard/src/pages/dast_overview.rs b/compliance-dashboard/src/pages/dast_overview.rs new file mode 100644 index 0000000..3f27f2d --- /dev/null +++ b/compliance-dashboard/src/pages/dast_overview.rs @@ -0,0 +1,107 @@ +use dioxus::prelude::*; + +use crate::app::Route; +use crate::components::page_header::PageHeader; +use crate::infrastructure::dast::{fetch_dast_findings, fetch_dast_scan_runs}; + +#[component] +pub fn DastOverviewPage() -> Element { + let scan_runs = use_resource(|| async { fetch_dast_scan_runs().await.ok() }); + let findings = use_resource(|| async { fetch_dast_findings().await.ok() }); + + rsx! { + PageHeader { + title: "DAST Overview", + description: "Dynamic Application Security Testing — scan running applications for vulnerabilities", + } + + div { class: "grid grid-cols-3 gap-4 mb-6", + div { class: "stat-card", + div { class: "stat-value", + match &*scan_runs.read() { + Some(Some(data)) => { + let count = data.total.unwrap_or(0); + rsx! { "{count}" } + }, + _ => rsx! { "—" }, + } + } + div { class: "stat-label", "Total Scans" } + } + div { class: "stat-card", + div { class: "stat-value", + match &*findings.read() { + Some(Some(data)) => { + let count = data.total.unwrap_or(0); + rsx! { "{count}" } + }, + _ => rsx! { "—" }, + } + } + div { class: "stat-label", "DAST Findings" } + } + div { class: "stat-card", + div { class: "stat-value", "—" } + div { class: "stat-label", "Active Targets" } + } + } + + div { class: "flex gap-4 mb-4", + Link { + to: Route::DastTargetsPage {}, + class: "btn btn-primary", + "Manage Targets" + } + Link { + to: Route::DastFindingsPage {}, + class: "btn btn-secondary", + "View Findings" + } + } + + div { class: "card", + h3 { "Recent Scan Runs" } + match &*scan_runs.read() { + Some(Some(data)) => { + let runs = &data.data; + if runs.is_empty() { + rsx! { p { "No scan runs yet." } } + } else { + rsx! { + table { class: "table", + thead { + tr { + th { "Target" } + th { "Status" } + th { "Phase" } + th { "Findings" } + th { "Exploitable" } + th { "Started" } + } + } + tbody { + for run in runs { + tr { + td { "{run.get(\"target_id\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + td { + span { class: "badge", + "{run.get(\"status\").and_then(|v| v.as_str()).unwrap_or(\"unknown\")}" + } + } + td { "{run.get(\"current_phase\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + td { "{run.get(\"findings_count\").and_then(|v| v.as_u64()).unwrap_or(0)}" } + td { "{run.get(\"exploitable_count\").and_then(|v| v.as_u64()).unwrap_or(0)}" } + td { "{run.get(\"started_at\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + } + } + } + } + } + } + }, + Some(None) => rsx! { p { "Failed to load scan runs." } }, + None => rsx! { p { "Loading..." } }, + } + } + } +} diff --git a/compliance-dashboard/src/pages/dast_targets.rs b/compliance-dashboard/src/pages/dast_targets.rs new file mode 100644 index 0000000..20ba254 --- /dev/null +++ b/compliance-dashboard/src/pages/dast_targets.rs @@ -0,0 +1,145 @@ +use dioxus::prelude::*; + +use crate::components::page_header::PageHeader; +use crate::components::toast::{ToastType, Toasts}; +use crate::infrastructure::dast::{add_dast_target, fetch_dast_targets, trigger_dast_scan}; + +#[component] +pub fn DastTargetsPage() -> Element { + let mut targets = use_resource(|| async { fetch_dast_targets().await.ok() }); + let mut toasts = use_context::(); + + let mut show_form = use_signal(|| false); + let mut new_name = use_signal(String::new); + let mut new_url = use_signal(String::new); + + rsx! { + PageHeader { + title: "DAST Targets", + description: "Configure target applications for dynamic security testing", + } + + div { class: "mb-4", + button { + class: "btn btn-primary", + onclick: move |_| show_form.set(!show_form()), + if show_form() { "Cancel" } else { "Add Target" } + } + } + + if show_form() { + div { class: "card mb-4", + h3 { "Add New Target" } + div { class: "form-group", + label { "Name" } + input { + class: "input", + r#type: "text", + placeholder: "My Web App", + value: "{new_name}", + oninput: move |e| new_name.set(e.value()), + } + } + div { class: "form-group", + label { "Base URL" } + input { + class: "input", + r#type: "text", + placeholder: "https://example.com", + value: "{new_url}", + oninput: move |e| new_url.set(e.value()), + } + } + button { + class: "btn btn-primary", + onclick: move |_| { + let name = new_name(); + let url = new_url(); + spawn(async move { + match add_dast_target(name, url).await { + Ok(_) => { + toasts.push(ToastType::Success, "Target created"); + targets.restart(); + } + Err(e) => toasts.push(ToastType::Error, e.to_string()), + } + }); + show_form.set(false); + new_name.set(String::new()); + new_url.set(String::new()); + }, + "Create Target" + } + } + } + + div { class: "card", + h3 { "Configured Targets" } + match &*targets.read() { + Some(Some(data)) => { + let target_list = &data.data; + if target_list.is_empty() { + rsx! { p { "No DAST targets configured. Add one to get started." } } + } else { + rsx! { + table { class: "table", + thead { + tr { + th { "Name" } + th { "URL" } + th { "Type" } + th { "Rate Limit" } + th { "Destructive" } + th { "Actions" } + } + } + tbody { + for target in target_list { + { + let target_id = target.get("_id").and_then(|v| v.get("$oid")).and_then(|v| v.as_str()).unwrap_or("").to_string(); + rsx! { + tr { + td { "{target.get(\"name\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + td { code { "{target.get(\"base_url\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } } + td { "{target.get(\"target_type\").and_then(|v| v.as_str()).unwrap_or(\"-\")}" } + td { "{target.get(\"rate_limit\").and_then(|v| v.as_u64()).unwrap_or(0)} req/s" } + td { + if target.get("allow_destructive").and_then(|v| v.as_bool()).unwrap_or(false) { + span { class: "badge badge-danger", "Yes" } + } else { + span { class: "badge badge-success", "No" } + } + } + td { + button { + class: "btn btn-sm", + onclick: { + let tid = target_id.clone(); + move |_| { + let tid = tid.clone(); + spawn(async move { + match trigger_dast_scan(tid).await { + Ok(_) => toasts.push(ToastType::Success, "DAST scan triggered"), + Err(e) => toasts.push(ToastType::Error, e.to_string()), + } + }); + } + }, + "Scan" + } + } + } + } + } + } + } + } + } + } + }, + Some(None) => rsx! { p { "Failed to load targets." } }, + None => rsx! { p { "Loading..." } }, + } + } + } +} diff --git a/compliance-dashboard/src/pages/graph_explorer.rs b/compliance-dashboard/src/pages/graph_explorer.rs new file mode 100644 index 0000000..c82bdca --- /dev/null +++ b/compliance-dashboard/src/pages/graph_explorer.rs @@ -0,0 +1,105 @@ +use dioxus::prelude::*; + +use crate::components::page_header::PageHeader; +use crate::components::toast::{ToastType, Toasts}; +use crate::infrastructure::graph::{fetch_graph, trigger_graph_build}; + +#[component] +pub fn GraphExplorerPage(repo_id: String) -> Element { + let repo_id_clone = repo_id.clone(); + let mut graph_data = use_resource(move || { + let rid = repo_id_clone.clone(); + async move { + if rid.is_empty() { + return None; + } + fetch_graph(rid).await.ok() + } + }); + + let mut building = use_signal(|| false); + let mut toasts = use_context::(); + + rsx! { + PageHeader { + title: "Code Knowledge Graph", + description: "Interactive visualization of code structure and relationships", + } + + if repo_id.is_empty() { + div { class: "card", + p { "Select a repository to view its code graph." } + p { "You can trigger a graph build from the Repositories page." } + } + } else { + div { style: "margin-bottom: 16px;", + button { + class: "btn btn-primary", + disabled: building(), + onclick: { + let rid = repo_id.clone(); + move |_| { + let rid = rid.clone(); + building.set(true); + spawn(async move { + match trigger_graph_build(rid).await { + Ok(_) => toasts.push(ToastType::Success, "Graph build triggered"), + Err(e) => toasts.push(ToastType::Error, e.to_string()), + } + building.set(false); + graph_data.restart(); + }); + } + }, + if building() { "Building..." } else { "Build Graph" } + } + } + + div { class: "card", + h3 { "Graph Explorer \u{2014} {repo_id}" } + + match &*graph_data.read() { + Some(Some(data)) => { + let build = data.data.build.clone().unwrap_or_default(); + let node_count = build.get("node_count").and_then(|n| n.as_u64()).unwrap_or(0); + let edge_count = build.get("edge_count").and_then(|n| n.as_u64()).unwrap_or(0); + let community_count = build.get("community_count").and_then(|n| n.as_u64()).unwrap_or(0); + rsx! { + div { class: "grid grid-cols-3 gap-4 mb-4", + div { class: "stat-card", + div { class: "stat-value", "{node_count}" } + div { class: "stat-label", "Nodes" } + } + div { class: "stat-card", + div { class: "stat-value", "{edge_count}" } + div { class: "stat-label", "Edges" } + } + div { class: "stat-card", + div { class: "stat-value", "{community_count}" } + div { class: "stat-label", "Communities" } + } + } + + div { + id: "graph-container", + style: "width: 100%; height: 600px; border: 1px solid var(--border); border-radius: 8px; background: var(--bg-secondary);", + } + + script { + r#" + console.log('Graph explorer loaded'); + "# + } + } + }, + Some(None) => rsx! { + p { "No graph data available. Build the graph first." } + }, + None => rsx! { + p { "Loading graph data..." } + }, + } + } + } + } +} diff --git a/compliance-dashboard/src/pages/graph_index.rs b/compliance-dashboard/src/pages/graph_index.rs new file mode 100644 index 0000000..ab7ba51 --- /dev/null +++ b/compliance-dashboard/src/pages/graph_index.rs @@ -0,0 +1,53 @@ +use dioxus::prelude::*; + +use crate::app::Route; +use crate::components::page_header::PageHeader; +use crate::infrastructure::repositories::fetch_repositories; + +#[component] +pub fn GraphIndexPage() -> Element { + let repos = use_resource(|| async { fetch_repositories(1).await.ok() }); + + rsx! { + PageHeader { + title: "Code Knowledge Graph", + description: "Select a repository to explore its code graph", + } + + div { class: "card", + h3 { "Repositories" } + match &*repos.read() { + Some(Some(data)) => { + let repo_list = &data.data; + if repo_list.is_empty() { + rsx! { p { "No repositories found. Add a repository first." } } + } else { + rsx! { + div { class: "grid grid-cols-1 gap-3", + for repo in repo_list { + { + let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default(); + let name = repo.name.clone(); + let url = repo.git_url.clone(); + rsx! { + Link { + to: Route::GraphExplorerPage { repo_id: repo_id }, + class: "card hover:bg-gray-800 transition-colors cursor-pointer", + h4 { "{name}" } + if !url.is_empty() { + p { class: "text-sm text-muted", "{url}" } + } + } + } + } + } + } + } + } + }, + Some(None) => rsx! { p { "Failed to load repositories." } }, + None => rsx! { p { "Loading repositories..." } }, + } + } + } +} diff --git a/compliance-dashboard/src/pages/impact_analysis.rs b/compliance-dashboard/src/pages/impact_analysis.rs new file mode 100644 index 0000000..a58e77d --- /dev/null +++ b/compliance-dashboard/src/pages/impact_analysis.rs @@ -0,0 +1,97 @@ +use dioxus::prelude::*; + +use crate::components::page_header::PageHeader; +use crate::infrastructure::graph::fetch_impact; + +#[component] +pub fn ImpactAnalysisPage(repo_id: String, finding_id: String) -> Element { + let impact_data = use_resource(move || { + let rid = repo_id.clone(); + let fid = finding_id.clone(); + async move { fetch_impact(rid, fid).await.ok() } + }); + + rsx! { + PageHeader { + title: "Impact Analysis", + description: "Blast radius and affected entry points for a security finding", + } + + div { class: "card", + match &*impact_data.read() { + Some(Some(resp)) => { + let impact = resp.data.clone().unwrap_or_default(); + rsx! { + div { class: "grid grid-cols-2 gap-4 mb-4", + div { class: "stat-card", + div { class: "stat-value", + "{impact.get(\"blast_radius\").and_then(|v| v.as_u64()).unwrap_or(0)}" + } + div { class: "stat-label", "Blast Radius (nodes affected)" } + } + div { class: "stat-card", + div { class: "stat-value", + "{impact.get(\"affected_entry_points\").and_then(|v| v.as_array()).map(|a| a.len()).unwrap_or(0)}" + } + div { class: "stat-label", "Entry Points Affected" } + } + } + + h3 { "Affected Entry Points" } + if let Some(entries) = impact.get("affected_entry_points").and_then(|v| v.as_array()) { + if entries.is_empty() { + p { class: "text-muted", "No entry points affected." } + } else { + ul { class: "list", + for entry in entries { + li { "{entry.as_str().unwrap_or(\"-\")}" } + } + } + } + } + + h3 { class: "mt-4", "Call Chains" } + if let Some(chains) = impact.get("call_chains").and_then(|v| v.as_array()) { + if chains.is_empty() { + p { class: "text-muted", "No call chains found." } + } else { + for (i, chain) in chains.iter().enumerate() { + div { class: "card mb-2", + strong { "Chain {i + 1}: " } + if let Some(steps) = chain.as_array() { + for (j, step) in steps.iter().enumerate() { + span { "{step.as_str().unwrap_or(\"-\")}" } + if j < steps.len() - 1 { + span { class: "text-muted", " → " } + } + } + } + } + } + } + } + + h3 { class: "mt-4", "Direct Callers" } + if let Some(callers) = impact.get("direct_callers").and_then(|v| v.as_array()) { + if callers.is_empty() { + p { class: "text-muted", "No direct callers." } + } else { + ul { class: "list", + for caller in callers { + li { code { "{caller.as_str().unwrap_or(\"-\")}" } } + } + } + } + } + } + }, + Some(None) => rsx! { + p { "No impact analysis data available for this finding." } + }, + None => rsx! { + p { "Loading impact analysis..." } + }, + } + } + } +} diff --git a/compliance-dashboard/src/pages/mod.rs b/compliance-dashboard/src/pages/mod.rs index cfed572..16b8803 100644 --- a/compliance-dashboard/src/pages/mod.rs +++ b/compliance-dashboard/src/pages/mod.rs @@ -1,13 +1,27 @@ +pub mod dast_finding_detail; +pub mod dast_findings; +pub mod dast_overview; +pub mod dast_targets; pub mod finding_detail; pub mod findings; +pub mod graph_explorer; +pub mod graph_index; +pub mod impact_analysis; pub mod issues; pub mod overview; pub mod repositories; pub mod sbom; pub mod settings; +pub use dast_finding_detail::DastFindingDetailPage; +pub use dast_findings::DastFindingsPage; +pub use dast_overview::DastOverviewPage; +pub use dast_targets::DastTargetsPage; pub use finding_detail::FindingDetailPage; pub use findings::FindingsPage; +pub use graph_explorer::GraphExplorerPage; +pub use graph_index::GraphIndexPage; +pub use impact_analysis::ImpactAnalysisPage; pub use issues::IssuesPage; pub use overview::OverviewPage; pub use repositories::RepositoriesPage; diff --git a/compliance-dashboard/src/pages/repositories.rs b/compliance-dashboard/src/pages/repositories.rs index f4208f3..eb6d4f1 100644 --- a/compliance-dashboard/src/pages/repositories.rs +++ b/compliance-dashboard/src/pages/repositories.rs @@ -2,6 +2,7 @@ use dioxus::prelude::*; use crate::components::page_header::PageHeader; use crate::components::pagination::Pagination; +use crate::components::toast::{ToastType, Toasts}; #[component] pub fn RepositoriesPage() -> Element { @@ -10,8 +11,9 @@ pub fn RepositoriesPage() -> Element { let mut name = use_signal(String::new); let mut git_url = use_signal(String::new); let mut branch = use_signal(|| "main".to_string()); + let mut toasts = use_context::(); - let repos = use_resource(move || { + let mut repos = use_resource(move || { let p = page(); async move { crate::infrastructure::repositories::fetch_repositories(p) @@ -71,7 +73,13 @@ pub fn RepositoriesPage() -> Element { let u = git_url(); let b = branch(); spawn(async move { - let _ = crate::infrastructure::repositories::add_repository(n, u, b).await; + match crate::infrastructure::repositories::add_repository(n, u, b).await { + Ok(_) => { + toasts.push(ToastType::Success, "Repository added"); + repos.restart(); + } + Err(e) => toasts.push(ToastType::Error, e.to_string()), + } }); show_add_form.set(false); name.set(String::new()); @@ -125,7 +133,10 @@ pub fn RepositoriesPage() -> Element { onclick: move |_| { let id = repo_id_clone.clone(); spawn(async move { - let _ = crate::infrastructure::repositories::trigger_repo_scan(id).await; + match crate::infrastructure::repositories::trigger_repo_scan(id).await { + Ok(_) => toasts.push(ToastType::Success, "Scan triggered"), + Err(e) => toasts.push(ToastType::Error, e.to_string()), + } }); }, "Scan" diff --git a/compliance-dast/Cargo.toml b/compliance-dast/Cargo.toml new file mode 100644 index 0000000..0abbd2c --- /dev/null +++ b/compliance-dast/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "compliance-dast" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +compliance-core = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +chrono = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } +tokio = { workspace = true } +mongodb = { workspace = true } +reqwest = { workspace = true } + +# HTML parsing +scraper = "0.22" + +# Browser automation +chromiumoxide = { version = "0.7", features = ["tokio-runtime"], default-features = false } + +# Docker sandboxing +bollard = "0.18" + +# Serialization +bson = "2" +url = "2" diff --git a/compliance-dast/src/agents/api_fuzzer.rs b/compliance-dast/src/agents/api_fuzzer.rs new file mode 100644 index 0000000..e426288 --- /dev/null +++ b/compliance-dast/src/agents/api_fuzzer.rs @@ -0,0 +1,307 @@ +use compliance_core::error::CoreError; +use compliance_core::models::dast::{DastEvidence, DastFinding, DastTarget, DastVulnType}; +use compliance_core::models::Severity; +use compliance_core::traits::dast_agent::{DastAgent, DastContext}; +use tracing::info; + +/// API fuzzing agent that tests for misconfigurations and information disclosure +pub struct ApiFuzzerAgent { + http: reqwest::Client, +} + +impl ApiFuzzerAgent { + pub fn new(http: reqwest::Client) -> Self { + Self { http } + } + + /// Common API paths to probe + fn discovery_paths(&self) -> Vec<(&str, &str)> { + vec![ + ("/.env", "Environment file exposure"), + ("/.git/config", "Git config exposure"), + ("/api/swagger.json", "Swagger spec exposure"), + ("/api/openapi.json", "OpenAPI spec exposure"), + ("/api-docs", "API documentation exposure"), + ("/graphql", "GraphQL endpoint"), + ("/debug", "Debug endpoint"), + ("/actuator/health", "Spring actuator"), + ("/wp-config.php.bak", "WordPress config backup"), + ("/.well-known/openid-configuration", "OIDC config"), + ("/server-status", "Apache server status"), + ("/phpinfo.php", "PHP info exposure"), + ("/robots.txt", "Robots.txt"), + ("/sitemap.xml", "Sitemap"), + ("/.htaccess", "htaccess exposure"), + ("/backup.sql", "SQL backup exposure"), + ("/api/v1/users", "User enumeration endpoint"), + ] + } + + /// Patterns indicating sensitive information disclosure + fn sensitive_patterns(&self) -> Vec<&str> { + vec![ + "password", + "api_key", + "apikey", + "secret", + "token", + "private_key", + "aws_access_key", + "jdbc:", + "mongodb://", + "redis://", + "postgresql://", + ] + } +} + +impl DastAgent for ApiFuzzerAgent { + fn name(&self) -> &str { + "api_fuzzer" + } + + async fn run( + &self, + target: &DastTarget, + _context: &DastContext, + ) -> Result, CoreError> { + let mut findings = Vec::new(); + let target_id = target + .id + .map(|oid| oid.to_hex()) + .unwrap_or_else(|| "unknown".to_string()); + let base = target.base_url.trim_end_matches('/'); + + // Phase 1: Path discovery + for (path, description) in self.discovery_paths() { + let url = format!("{base}{path}"); + let response = match self.http.get(&url).send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + if status == 200 { + let body = response.text().await.unwrap_or_default(); + + // Check if it's actually sensitive content (not just a 200 catch-all) + let is_sensitive = !body.is_empty() + && body.len() > 10 + && !body.contains("404") + && !body.contains("not found"); + + if is_sensitive { + let snippet = body.chars().take(500).collect::(); + + // Check for information disclosure + let body_lower = body.to_lowercase(); + let has_secrets = self + .sensitive_patterns() + .iter() + .any(|p| body_lower.contains(p)); + + let severity = if has_secrets { + Severity::Critical + } else if path.contains(".env") + || path.contains(".git") + || path.contains("backup") + { + Severity::High + } else { + Severity::Medium + }; + + let evidence = DastEvidence { + request_method: "GET".to_string(), + request_url: url.clone(), + request_headers: None, + request_body: None, + response_status: status, + response_headers: None, + response_snippet: Some(snippet), + screenshot_path: None, + payload: None, + response_time_ms: None, + }; + + let vuln_type = if has_secrets { + DastVulnType::InformationDisclosure + } else { + DastVulnType::SecurityMisconfiguration + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + vuln_type, + format!("{description}: {path}"), + format!( + "Sensitive resource accessible at {url}. {}", + if has_secrets { + "Response contains potentially sensitive information." + } else { + "This resource should not be publicly accessible." + } + ), + severity, + url, + "GET".to_string(), + ); + finding.exploitable = has_secrets; + finding.evidence = vec![evidence]; + finding.cwe = Some(if has_secrets { + "CWE-200".to_string() + } else { + "CWE-16".to_string() + }); + + findings.push(finding); + } + } + } + + // Phase 2: CORS misconfiguration check + let cors_finding = self.check_cors(base, &target_id).await; + if let Some(f) = cors_finding { + findings.push(f); + } + + // Phase 3: Check for verbose error responses + let error_url = format!("{base}/nonexistent-path-{}", uuid::Uuid::new_v4()); + if let Ok(response) = self.http.get(&error_url).send().await { + let body = response.text().await.unwrap_or_default(); + let body_lower = body.to_lowercase(); + + let has_stack_trace = body_lower.contains("traceback") + || body_lower.contains("stack trace") + || body_lower.contains("at line") + || body_lower.contains("exception in") + || body_lower.contains("error in") + || (body_lower.contains(".py") && body_lower.contains("line")); + + if has_stack_trace { + let snippet = body.chars().take(500).collect::(); + let evidence = DastEvidence { + request_method: "GET".to_string(), + request_url: error_url.clone(), + request_headers: None, + request_body: None, + response_status: 404, + response_headers: None, + response_snippet: Some(snippet), + screenshot_path: None, + payload: None, + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + DastVulnType::InformationDisclosure, + "Verbose error messages expose stack traces".to_string(), + "The application exposes detailed error information including stack traces. \ + This can reveal internal paths, framework versions, and code structure." + .to_string(), + Severity::Low, + error_url, + "GET".to_string(), + ); + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-209".to_string()); + finding.remediation = Some( + "Configure the application to use generic error pages in production. \ + Do not expose stack traces or internal error details to end users." + .to_string(), + ); + + findings.push(finding); + } + } + + info!(findings = findings.len(), "API fuzzing scan complete"); + Ok(findings) + } +} + +impl ApiFuzzerAgent { + async fn check_cors(&self, base_url: &str, target_id: &str) -> Option { + let response = self + .http + .get(base_url) + .header("Origin", "https://evil.com") + .send() + .await + .ok()?; + + let headers = response.headers(); + let acao = headers + .get("access-control-allow-origin")? + .to_str() + .ok()?; + + if acao == "*" || acao == "https://evil.com" { + let acac = headers + .get("access-control-allow-credentials") + .and_then(|v| v.to_str().ok()) + .unwrap_or("false"); + + // Wildcard CORS with credentials is the worst case + let severity = if acac == "true" { + Severity::High + } else if acao == "*" { + Severity::Medium + } else { + Severity::Low + }; + + let evidence = DastEvidence { + request_method: "GET".to_string(), + request_url: base_url.to_string(), + request_headers: Some( + [("Origin".to_string(), "https://evil.com".to_string())] + .into_iter() + .collect(), + ), + request_body: None, + response_status: response.status().as_u16(), + response_headers: Some( + [( + "Access-Control-Allow-Origin".to_string(), + acao.to_string(), + )] + .into_iter() + .collect(), + ), + response_snippet: None, + screenshot_path: None, + payload: None, + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.to_string(), + DastVulnType::SecurityMisconfiguration, + "CORS misconfiguration allows arbitrary origins".to_string(), + format!( + "The server responds with Access-Control-Allow-Origin: {acao} \ + which may allow cross-origin attacks." + ), + severity, + base_url.to_string(), + "GET".to_string(), + ); + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-942".to_string()); + finding.remediation = Some( + "Configure CORS to only allow trusted origins. \ + Never use wildcard (*) with credentials." + .to_string(), + ); + + Some(finding) + } else { + None + } + } +} diff --git a/compliance-dast/src/agents/auth_bypass.rs b/compliance-dast/src/agents/auth_bypass.rs new file mode 100644 index 0000000..ba88fe3 --- /dev/null +++ b/compliance-dast/src/agents/auth_bypass.rs @@ -0,0 +1,219 @@ +use compliance_core::error::CoreError; +use compliance_core::models::dast::{DastEvidence, DastFinding, DastTarget, DastVulnType}; +use compliance_core::models::Severity; +use compliance_core::traits::dast_agent::{DastAgent, DastContext}; +use tracing::info; + +/// Authentication bypass testing agent +pub struct AuthBypassAgent { + http: reqwest::Client, +} + +impl AuthBypassAgent { + pub fn new(http: reqwest::Client) -> Self { + Self { http } + } +} + +impl DastAgent for AuthBypassAgent { + fn name(&self) -> &str { + "auth_bypass" + } + + async fn run( + &self, + target: &DastTarget, + context: &DastContext, + ) -> Result, CoreError> { + let mut findings = Vec::new(); + let target_id = target + .id + .map(|oid| oid.to_hex()) + .unwrap_or_else(|| "unknown".to_string()); + + // Test 1: Access protected endpoints without authentication + for endpoint in &context.endpoints { + if !endpoint.requires_auth { + continue; + } + + // Try accessing without auth + let response = match self.http.get(&endpoint.url).send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + + // If we get 200 on a supposedly auth-required endpoint + if status == 200 { + let body = response.text().await.unwrap_or_default(); + let snippet = body.chars().take(500).collect::(); + + let evidence = DastEvidence { + request_method: "GET".to_string(), + request_url: endpoint.url.clone(), + request_headers: None, + request_body: None, + response_status: status, + response_headers: None, + response_snippet: Some(snippet), + screenshot_path: None, + payload: None, + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + DastVulnType::AuthBypass, + format!("Authentication bypass on {}", endpoint.url), + format!( + "Protected endpoint {} returned HTTP 200 without authentication credentials.", + endpoint.url + ), + Severity::Critical, + endpoint.url.clone(), + "GET".to_string(), + ); + finding.exploitable = true; + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-287".to_string()); + finding.remediation = Some( + "Ensure all protected endpoints validate authentication tokens. \ + Implement server-side authentication checks that cannot be bypassed." + .to_string(), + ); + + findings.push(finding); + } + } + + // Test 2: HTTP method tampering + let methods = ["PUT", "PATCH", "DELETE", "OPTIONS"]; + for endpoint in &context.endpoints { + if endpoint.method != "GET" && endpoint.method != "POST" { + continue; + } + + for method in &methods { + let response = match self + .http + .request( + reqwest::Method::from_bytes(method.as_bytes()) + .unwrap_or(reqwest::Method::GET), + &endpoint.url, + ) + .send() + .await + { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + + // If a non-standard method returns 200 when it shouldn't + if status == 200 && *method == "DELETE" && !target.allow_destructive { + let evidence = DastEvidence { + request_method: method.to_string(), + request_url: endpoint.url.clone(), + request_headers: None, + request_body: None, + response_status: status, + response_headers: None, + response_snippet: None, + screenshot_path: None, + payload: None, + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + DastVulnType::AuthBypass, + format!("HTTP method tampering: {} accepted on {}", method, endpoint.url), + format!( + "Endpoint {} accepts {} requests which may bypass access controls.", + endpoint.url, method + ), + Severity::Medium, + endpoint.url.clone(), + method.to_string(), + ); + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-288".to_string()); + + findings.push(finding); + } + } + } + + // Test 3: Path traversal for auth bypass + let traversal_paths = [ + "/../admin", + "/..;/admin", + "/%2e%2e/admin", + "/admin%00", + "/ADMIN", + "/Admin", + ]; + + for path in &traversal_paths { + let test_url = format!("{}{}", target.base_url.trim_end_matches('/'), path); + let response = match self.http.get(&test_url).send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + if status == 200 { + let body = response.text().await.unwrap_or_default(); + + // Check if response looks like an admin page + let body_lower = body.to_lowercase(); + if body_lower.contains("admin") + || body_lower.contains("dashboard") + || body_lower.contains("management") + { + let snippet = body.chars().take(500).collect::(); + let evidence = DastEvidence { + request_method: "GET".to_string(), + request_url: test_url.clone(), + request_headers: None, + request_body: None, + response_status: status, + response_headers: None, + response_snippet: Some(snippet), + screenshot_path: None, + payload: Some(path.to_string()), + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + DastVulnType::AuthBypass, + format!("Path traversal auth bypass: {path}"), + format!( + "Possible authentication bypass via path traversal. \ + Accessing '{}' returned admin-like content.", + test_url + ), + Severity::High, + test_url, + "GET".to_string(), + ); + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-22".to_string()); + + findings.push(finding); + break; + } + } + } + + info!(findings = findings.len(), "Auth bypass scan complete"); + Ok(findings) + } +} diff --git a/compliance-dast/src/agents/injection.rs b/compliance-dast/src/agents/injection.rs new file mode 100644 index 0000000..74fc143 --- /dev/null +++ b/compliance-dast/src/agents/injection.rs @@ -0,0 +1,195 @@ +use compliance_core::error::CoreError; +use compliance_core::models::dast::{DastEvidence, DastFinding, DastTarget, DastVulnType}; +use compliance_core::models::Severity; +use compliance_core::traits::dast_agent::{DastAgent, DastContext}; +use tracing::{info, warn}; + +/// SQL Injection testing agent +pub struct SqlInjectionAgent { + http: reqwest::Client, +} + +impl SqlInjectionAgent { + pub fn new(http: reqwest::Client) -> Self { + Self { http } + } + + /// Test payloads for SQL injection detection + fn payloads(&self) -> Vec<(&str, &str)> { + vec![ + ("' OR '1'='1", "boolean-based blind"), + ("1' AND SLEEP(2)-- -", "time-based blind"), + ("' UNION SELECT NULL--", "union-based"), + ("1; DROP TABLE test--", "stacked queries"), + ("' OR 1=1#", "mysql boolean"), + ("1' ORDER BY 1--", "order by probe"), + ("') OR ('1'='1", "parenthesis bypass"), + ] + } + + /// Error patterns that indicate SQL injection + fn error_patterns(&self) -> Vec<&str> { + vec![ + "sql syntax", + "mysql_fetch", + "ORA-01756", + "SQLite3::query", + "pg_query", + "unclosed quotation mark", + "quoted string not properly terminated", + "you have an error in your sql", + "warning: mysql", + "microsoft sql native client error", + "postgresql query failed", + "unterminated string", + "syntax error at or near", + ] + } +} + +impl DastAgent for SqlInjectionAgent { + fn name(&self) -> &str { + "sql_injection" + } + + async fn run( + &self, + target: &DastTarget, + context: &DastContext, + ) -> Result, CoreError> { + let mut findings = Vec::new(); + let target_id = target + .id + .map(|oid| oid.to_hex()) + .unwrap_or_else(|| "unknown".to_string()); + + for endpoint in &context.endpoints { + // Only test endpoints with parameters + if endpoint.parameters.is_empty() { + continue; + } + + for param in &endpoint.parameters { + for (payload, technique) in self.payloads() { + // Build the request with the injection payload + let test_url = if endpoint.method == "GET" { + format!( + "{}?{}={}", + endpoint.url, + param.name, + urlencoding::encode(payload) + ) + } else { + endpoint.url.clone() + }; + + let request = if endpoint.method == "POST" { + self.http + .post(&endpoint.url) + .form(&[(param.name.as_str(), payload)]) + } else { + self.http.get(&test_url) + }; + + let response = match request.send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + let headers: std::collections::HashMap = response + .headers() + .iter() + .map(|(k, v)| (k.to_string(), v.to_str().unwrap_or("").to_string())) + .collect(); + let body = response.text().await.unwrap_or_default(); + + // Check for SQL error patterns in response + let body_lower = body.to_lowercase(); + let is_vulnerable = self + .error_patterns() + .iter() + .any(|pattern| body_lower.contains(pattern)); + + if is_vulnerable { + let snippet = body.chars().take(500).collect::(); + + let evidence = DastEvidence { + request_method: endpoint.method.clone(), + request_url: test_url.clone(), + request_headers: None, + request_body: if endpoint.method == "POST" { + Some(format!("{}={}", param.name, payload)) + } else { + None + }, + response_status: status, + response_headers: Some(headers), + response_snippet: Some(snippet), + screenshot_path: None, + payload: Some(payload.to_string()), + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), // scan_run_id set by orchestrator + target_id.clone(), + DastVulnType::SqlInjection, + format!("SQL Injection ({technique}) in parameter '{}'", param.name), + format!( + "SQL injection vulnerability detected in parameter '{}' at {} using {} technique. \ + The server returned SQL error messages in response to the injected payload.", + param.name, endpoint.url, technique + ), + Severity::Critical, + endpoint.url.clone(), + endpoint.method.clone(), + ); + finding.parameter = Some(param.name.clone()); + finding.exploitable = true; + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-89".to_string()); + finding.remediation = Some( + "Use parameterized queries or prepared statements. \ + Never concatenate user input into SQL queries." + .to_string(), + ); + + findings.push(finding); + + warn!( + endpoint = %endpoint.url, + param = %param.name, + technique, + "SQL injection found" + ); + + // Don't test more payloads for same param once confirmed + break; + } + } + } + } + + info!(findings = findings.len(), "SQL injection scan complete"); + Ok(findings) + } +} + +/// URL-encode a string for query parameters +mod urlencoding { + pub fn encode(input: &str) -> String { + let mut encoded = String::new(); + for byte in input.bytes() { + match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + encoded.push(byte as char); + } + _ => { + encoded.push_str(&format!("%{:02X}", byte)); + } + } + } + encoded + } +} diff --git a/compliance-dast/src/agents/mod.rs b/compliance-dast/src/agents/mod.rs new file mode 100644 index 0000000..722469d --- /dev/null +++ b/compliance-dast/src/agents/mod.rs @@ -0,0 +1,5 @@ +pub mod api_fuzzer; +pub mod auth_bypass; +pub mod injection; +pub mod ssrf; +pub mod xss; diff --git a/compliance-dast/src/agents/ssrf.rs b/compliance-dast/src/agents/ssrf.rs new file mode 100644 index 0000000..4cee538 --- /dev/null +++ b/compliance-dast/src/agents/ssrf.rs @@ -0,0 +1,169 @@ +use compliance_core::error::CoreError; +use compliance_core::models::dast::{DastEvidence, DastFinding, DastTarget, DastVulnType}; +use compliance_core::models::Severity; +use compliance_core::traits::dast_agent::{DastAgent, DastContext}; +use tracing::info; + +/// Server-Side Request Forgery (SSRF) testing agent +pub struct SsrfAgent { + http: reqwest::Client, +} + +impl SsrfAgent { + pub fn new(http: reqwest::Client) -> Self { + Self { http } + } + + fn payloads(&self) -> Vec<(&str, &str)> { + vec![ + ("http://127.0.0.1", "localhost IPv4"), + ("http://[::1]", "localhost IPv6"), + ("http://0.0.0.0", "zero address"), + ("http://169.254.169.254/latest/meta-data/", "AWS metadata"), + ( + "http://metadata.google.internal/", + "GCP metadata", + ), + ("http://127.0.0.1:22", "SSH port probe"), + ("http://127.0.0.1:3306", "MySQL port probe"), + ("http://localhost/admin", "localhost admin"), + ] + } + + fn internal_indicators(&self) -> Vec<&str> { + vec![ + "ami-id", + "instance-id", + "local-hostname", + "public-hostname", + "iam/security-credentials", + "computeMetadata", + "OpenSSH", + "mysql_native_password", + "root:x:0:", + "", + ] + } +} + +impl DastAgent for SsrfAgent { + fn name(&self) -> &str { + "ssrf" + } + + async fn run( + &self, + target: &DastTarget, + context: &DastContext, + ) -> Result, CoreError> { + let mut findings = Vec::new(); + let target_id = target + .id + .map(|oid| oid.to_hex()) + .unwrap_or_else(|| "unknown".to_string()); + + // Find endpoints with URL-like parameters + for endpoint in &context.endpoints { + let url_params: Vec<_> = endpoint + .parameters + .iter() + .filter(|p| { + let name_lower = p.name.to_lowercase(); + name_lower.contains("url") + || name_lower.contains("uri") + || name_lower.contains("link") + || name_lower.contains("src") + || name_lower.contains("redirect") + || name_lower.contains("callback") + || name_lower.contains("fetch") + || name_lower.contains("load") + }) + .collect(); + + if url_params.is_empty() { + continue; + } + + for param in &url_params { + for (payload, technique) in self.payloads() { + let request = if endpoint.method == "POST" { + self.http + .post(&endpoint.url) + .form(&[(param.name.as_str(), payload)]) + } else { + let test_url = format!( + "{}?{}={}", + endpoint.url, param.name, payload + ); + self.http.get(&test_url) + }; + + let response = match request.send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + let body = response.text().await.unwrap_or_default(); + + // Check for SSRF indicators + let body_lower = body.to_lowercase(); + let is_vulnerable = self + .internal_indicators() + .iter() + .any(|indicator| body_lower.contains(&indicator.to_lowercase())); + + if is_vulnerable { + let snippet = body.chars().take(500).collect::(); + + let evidence = DastEvidence { + request_method: endpoint.method.clone(), + request_url: endpoint.url.clone(), + request_headers: None, + request_body: Some(format!("{}={}", param.name, payload)), + response_status: status, + response_headers: None, + response_snippet: Some(snippet), + screenshot_path: None, + payload: Some(payload.to_string()), + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + DastVulnType::Ssrf, + format!( + "SSRF ({technique}) via parameter '{}'", + param.name + ), + format!( + "Server-side request forgery detected in parameter '{}' at {}. \ + The application made a request to an internal resource ({}).", + param.name, endpoint.url, payload + ), + Severity::High, + endpoint.url.clone(), + endpoint.method.clone(), + ); + finding.parameter = Some(param.name.clone()); + finding.exploitable = true; + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-918".to_string()); + finding.remediation = Some( + "Validate and sanitize all user-supplied URLs. \ + Use allowlists for permitted domains and block internal IP ranges." + .to_string(), + ); + + findings.push(finding); + break; + } + } + } + } + + info!(findings = findings.len(), "SSRF scan complete"); + Ok(findings) + } +} diff --git a/compliance-dast/src/agents/xss.rs b/compliance-dast/src/agents/xss.rs new file mode 100644 index 0000000..42e602c --- /dev/null +++ b/compliance-dast/src/agents/xss.rs @@ -0,0 +1,147 @@ +use compliance_core::error::CoreError; +use compliance_core::models::dast::{DastEvidence, DastFinding, DastTarget, DastVulnType}; +use compliance_core::models::Severity; +use compliance_core::traits::dast_agent::{DastAgent, DastContext}; +use tracing::info; + +/// Cross-Site Scripting (XSS) testing agent +pub struct XssAgent { + http: reqwest::Client, +} + +impl XssAgent { + pub fn new(http: reqwest::Client) -> Self { + Self { http } + } + + fn payloads(&self) -> Vec<(&str, &str)> { + vec![ + ("", "basic script injection"), + ( + "", + "event handler injection", + ), + ( + "", + "svg event handler", + ), + ( + "javascript:alert(1)", + "javascript protocol", + ), + ( + "'\">", + "attribute breakout", + ), + ( + "", + "body event handler", + ), + ] + } +} + +impl DastAgent for XssAgent { + fn name(&self) -> &str { + "xss" + } + + async fn run( + &self, + target: &DastTarget, + context: &DastContext, + ) -> Result, CoreError> { + let mut findings = Vec::new(); + let target_id = target + .id + .map(|oid| oid.to_hex()) + .unwrap_or_else(|| "unknown".to_string()); + + for endpoint in &context.endpoints { + if endpoint.parameters.is_empty() { + continue; + } + + for param in &endpoint.parameters { + for (payload, technique) in self.payloads() { + let test_url = if endpoint.method == "GET" { + format!( + "{}?{}={}", + endpoint.url, param.name, payload + ) + } else { + endpoint.url.clone() + }; + + let request = if endpoint.method == "POST" { + self.http + .post(&endpoint.url) + .form(&[(param.name.as_str(), payload)]) + } else { + self.http.get(&test_url) + }; + + let response = match request.send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status().as_u16(); + let body = response.text().await.unwrap_or_default(); + + // Check if payload is reflected in response without encoding + if body.contains(payload) { + let snippet = body.chars().take(500).collect::(); + + let evidence = DastEvidence { + request_method: endpoint.method.clone(), + request_url: test_url.clone(), + request_headers: None, + request_body: if endpoint.method == "POST" { + Some(format!("{}={}", param.name, payload)) + } else { + None + }, + response_status: status, + response_headers: None, + response_snippet: Some(snippet), + screenshot_path: None, + payload: Some(payload.to_string()), + response_time_ms: None, + }; + + let mut finding = DastFinding::new( + String::new(), + target_id.clone(), + DastVulnType::Xss, + format!("Reflected XSS ({technique}) in parameter '{}'", param.name), + format!( + "Cross-site scripting vulnerability detected in parameter '{}' at {}. \ + The injected payload was reflected in the response without proper encoding.", + param.name, endpoint.url + ), + Severity::High, + endpoint.url.clone(), + endpoint.method.clone(), + ); + finding.parameter = Some(param.name.clone()); + finding.exploitable = true; + finding.evidence = vec![evidence]; + finding.cwe = Some("CWE-79".to_string()); + finding.remediation = Some( + "Encode all user input before rendering in HTML context. \ + Use Content-Security-Policy headers to mitigate impact." + .to_string(), + ); + + findings.push(finding); + break; + } + } + } + } + + info!(findings = findings.len(), "XSS scan complete"); + Ok(findings) + } +} diff --git a/compliance-dast/src/crawler/mod.rs b/compliance-dast/src/crawler/mod.rs new file mode 100644 index 0000000..5cbb646 --- /dev/null +++ b/compliance-dast/src/crawler/mod.rs @@ -0,0 +1,200 @@ +use std::collections::HashSet; + +use compliance_core::error::CoreError; +use compliance_core::traits::dast_agent::{DiscoveredEndpoint, EndpointParameter}; +use scraper::{Html, Selector}; +use tracing::info; +use url::Url; + +/// Web crawler that discovers endpoints and forms +pub struct WebCrawler { + http: reqwest::Client, + max_depth: u32, + rate_limit_ms: u64, +} + +impl WebCrawler { + pub fn new(http: reqwest::Client, max_depth: u32, rate_limit_ms: u64) -> Self { + Self { + http, + max_depth, + rate_limit_ms, + } + } + + /// Crawl a target starting from the base URL + pub async fn crawl( + &self, + base_url: &str, + excluded_paths: &[String], + ) -> Result, CoreError> { + let base = Url::parse(base_url) + .map_err(|e| CoreError::Dast(format!("Invalid base URL: {e}")))?; + + let mut visited: HashSet = HashSet::new(); + let mut endpoints: Vec = Vec::new(); + let mut queue: Vec<(String, u32)> = vec![(base_url.to_string(), 0)]; + + while let Some((url, depth)) = queue.pop() { + if depth > self.max_depth { + continue; + } + + if visited.contains(&url) { + continue; + } + + // Check exclusions + if excluded_paths + .iter() + .any(|excl| url.contains(excl.as_str())) + { + continue; + } + + visited.insert(url.clone()); + + // Rate limiting + if self.rate_limit_ms > 0 { + tokio::time::sleep(tokio::time::Duration::from_millis(self.rate_limit_ms)).await; + } + + // Fetch the page + let response = match self.http.get(&url).send().await { + Ok(r) => r, + Err(_) => continue, + }; + + let status = response.status(); + let content_type = response + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + + // Record this endpoint + endpoints.push(DiscoveredEndpoint { + url: url.clone(), + method: "GET".to_string(), + parameters: Vec::new(), + content_type: Some(content_type.clone()), + requires_auth: status.as_u16() == 401 || status.as_u16() == 403, + }); + + if !content_type.contains("text/html") { + continue; + } + + let body = match response.text().await { + Ok(b) => b, + Err(_) => continue, + }; + + // Parse HTML for links and forms + let document = Html::parse_document(&body); + + // Extract links + let link_selector = + Selector::parse("a[href]").unwrap_or_else(|_| Selector::parse("a").expect("valid selector")); + for element in document.select(&link_selector) { + if let Some(href) = element.value().attr("href") { + if let Some(absolute_url) = self.resolve_url(&base, &url, href) { + if self.is_same_origin(&base, &absolute_url) && !visited.contains(&absolute_url) + { + queue.push((absolute_url, depth + 1)); + } + } + } + } + + // Extract forms + let form_selector = Selector::parse("form") + .unwrap_or_else(|_| Selector::parse("form").expect("valid selector")); + let input_selector = Selector::parse("input, select, textarea") + .unwrap_or_else(|_| Selector::parse("input").expect("valid selector")); + + for form in document.select(&form_selector) { + let action = form.value().attr("action").unwrap_or(""); + let method = form + .value() + .attr("method") + .unwrap_or("GET") + .to_uppercase(); + + let form_url = self + .resolve_url(&base, &url, action) + .unwrap_or_else(|| url.clone()); + + let mut params = Vec::new(); + for input in form.select(&input_selector) { + let name = input + .value() + .attr("name") + .unwrap_or("") + .to_string(); + if name.is_empty() { + continue; + } + + let input_type = input + .value() + .attr("type") + .unwrap_or("text") + .to_string(); + + let location = if method == "GET" { + "query".to_string() + } else { + "body".to_string() + }; + + params.push(EndpointParameter { + name, + location, + param_type: Some(input_type), + example_value: input.value().attr("value").map(|v| v.to_string()), + }); + } + + endpoints.push(DiscoveredEndpoint { + url: form_url, + method, + parameters: params, + content_type: Some("application/x-www-form-urlencoded".to_string()), + requires_auth: false, + }); + } + } + + info!(endpoints = endpoints.len(), "Crawling complete"); + Ok(endpoints) + } + + fn resolve_url(&self, _base: &Url, current_page: &str, href: &str) -> Option { + // Skip anchors, javascript:, mailto:, etc. + if href.starts_with('#') + || href.starts_with("javascript:") + || href.starts_with("mailto:") + || href.starts_with("tel:") + { + return None; + } + + if let Ok(absolute) = Url::parse(href) { + return Some(absolute.to_string()); + } + + // Relative URL + let current = Url::parse(current_page).ok()?; + current.join(href).ok().map(|u| u.to_string()) + } + + fn is_same_origin(&self, base: &Url, url: &str) -> bool { + if let Ok(parsed) = Url::parse(url) { + parsed.host() == base.host() && parsed.scheme() == base.scheme() + } else { + false + } + } +} diff --git a/compliance-dast/src/lib.rs b/compliance-dast/src/lib.rs new file mode 100644 index 0000000..38c2445 --- /dev/null +++ b/compliance-dast/src/lib.rs @@ -0,0 +1,6 @@ +pub mod agents; +pub mod crawler; +pub mod orchestrator; +pub mod recon; + +pub use orchestrator::DastOrchestrator; diff --git a/compliance-dast/src/orchestrator/mod.rs b/compliance-dast/src/orchestrator/mod.rs new file mode 100644 index 0000000..fc9e7ac --- /dev/null +++ b/compliance-dast/src/orchestrator/mod.rs @@ -0,0 +1,3 @@ +pub mod state_machine; + +pub use state_machine::DastOrchestrator; diff --git a/compliance-dast/src/orchestrator/state_machine.rs b/compliance-dast/src/orchestrator/state_machine.rs new file mode 100644 index 0000000..d5c569f --- /dev/null +++ b/compliance-dast/src/orchestrator/state_machine.rs @@ -0,0 +1,203 @@ +use chrono::Utc; +use compliance_core::error::CoreError; +use compliance_core::models::dast::{ + DastFinding, DastScanPhase, DastScanRun, DastScanStatus, DastTarget, +}; +use compliance_core::traits::dast_agent::DastContext; +use tracing::{error, info}; + +use crate::crawler::WebCrawler; +use crate::recon::ReconAgent; + +/// State machine orchestrator for DAST scanning +pub struct DastOrchestrator { + http: reqwest::Client, + rate_limit_ms: u64, +} + +impl DastOrchestrator { + pub fn new(rate_limit_ms: u64) -> Self { + Self { + http: reqwest::Client::new(), + rate_limit_ms, + } + } + + /// Run a complete DAST scan against a target + pub async fn run_scan( + &self, + target: &DastTarget, + sast_hints: Vec, + ) -> Result<(DastScanRun, Vec), CoreError> { + let target_id = target + .id + .map(|oid| oid.to_hex()) + .unwrap_or_else(|| "unknown".to_string()); + + let mut scan_run = DastScanRun::new(target_id); + let mut all_findings = Vec::new(); + + info!(target = %target.base_url, "Starting DAST scan"); + + // Phase 1: Reconnaissance + scan_run.current_phase = DastScanPhase::Reconnaissance; + let recon = ReconAgent::new(self.http.clone()); + let recon_result = match recon.scan(&target.base_url).await { + Ok(r) => r, + Err(e) => { + error!(error = %e, "Reconnaissance failed"); + scan_run.status = DastScanStatus::Failed; + scan_run.error_message = Some(format!("Reconnaissance failed: {e}")); + scan_run.completed_at = Some(Utc::now()); + return Ok((scan_run, all_findings)); + } + }; + scan_run + .phases_completed + .push(DastScanPhase::Reconnaissance); + + info!( + technologies = ?recon_result.technologies, + headers = recon_result.interesting_headers.len(), + "Reconnaissance complete" + ); + + // Phase 2: Crawling + scan_run.current_phase = DastScanPhase::Crawling; + let crawler = WebCrawler::new( + self.http.clone(), + target.max_crawl_depth, + self.rate_limit_ms, + ); + let endpoints = match crawler + .crawl(&target.base_url, &target.excluded_paths) + .await + { + Ok(e) => e, + Err(e) => { + error!(error = %e, "Crawling failed"); + scan_run.status = DastScanStatus::Failed; + scan_run.error_message = Some(format!("Crawling failed: {e}")); + scan_run.completed_at = Some(Utc::now()); + return Ok((scan_run, all_findings)); + } + }; + scan_run.endpoints_discovered = endpoints.len() as u32; + scan_run.phases_completed.push(DastScanPhase::Crawling); + + info!(endpoints = endpoints.len(), "Crawling complete"); + + // Build context for vulnerability agents + let context = DastContext { + endpoints, + technologies: recon_result.technologies, + sast_hints, + }; + + // Phase 3: Vulnerability Analysis + scan_run.current_phase = DastScanPhase::VulnerabilityAnalysis; + let vuln_findings = self.run_vulnerability_agents(target, &context).await?; + all_findings.extend(vuln_findings); + scan_run + .phases_completed + .push(DastScanPhase::VulnerabilityAnalysis); + + // Phase 4: Exploitation (verify findings) + scan_run.current_phase = DastScanPhase::Exploitation; + // Exploitation is handled within each agent's evidence collection + scan_run.phases_completed.push(DastScanPhase::Exploitation); + + // Phase 5: Reporting + scan_run.current_phase = DastScanPhase::Reporting; + scan_run.findings_count = all_findings.len() as u32; + scan_run.exploitable_count = all_findings.iter().filter(|f| f.exploitable).count() as u32; + scan_run.phases_completed.push(DastScanPhase::Reporting); + + scan_run.status = DastScanStatus::Completed; + scan_run.current_phase = DastScanPhase::Completed; + scan_run.completed_at = Some(Utc::now()); + + info!( + findings = scan_run.findings_count, + exploitable = scan_run.exploitable_count, + "DAST scan complete" + ); + + Ok((scan_run, all_findings)) + } + + /// Run all vulnerability testing agents in parallel + async fn run_vulnerability_agents( + &self, + target: &DastTarget, + context: &DastContext, + ) -> Result, CoreError> { + use compliance_core::traits::DastAgent; + + let http = self.http.clone(); + + // Spawn each agent as a separate tokio task + let t1 = target.clone(); + let c1 = context.clone(); + let h1 = http.clone(); + let sqli_handle = tokio::spawn(async move { + crate::agents::injection::SqlInjectionAgent::new(h1) + .run(&t1, &c1) + .await + }); + + let t2 = target.clone(); + let c2 = context.clone(); + let h2 = http.clone(); + let xss_handle = tokio::spawn(async move { + crate::agents::xss::XssAgent::new(h2) + .run(&t2, &c2) + .await + }); + + let t3 = target.clone(); + let c3 = context.clone(); + let h3 = http.clone(); + let auth_handle = tokio::spawn(async move { + crate::agents::auth_bypass::AuthBypassAgent::new(h3) + .run(&t3, &c3) + .await + }); + + let t4 = target.clone(); + let c4 = context.clone(); + let h4 = http.clone(); + let ssrf_handle = tokio::spawn(async move { + crate::agents::ssrf::SsrfAgent::new(h4) + .run(&t4, &c4) + .await + }); + + let t5 = target.clone(); + let c5 = context.clone(); + let h5 = http; + let api_handle = tokio::spawn(async move { + crate::agents::api_fuzzer::ApiFuzzerAgent::new(h5) + .run(&t5, &c5) + .await + }); + + let handles: Vec, CoreError>>> = + vec![sqli_handle, xss_handle, auth_handle, ssrf_handle, api_handle]; + + let mut all_findings = Vec::new(); + for handle in handles { + match handle.await { + Ok(Ok(findings)) => all_findings.extend(findings), + Ok(Err(e)) => { + error!(error = %e, "Agent failed"); + } + Err(e) => { + error!(error = %e, "Agent task panicked"); + } + } + } + + Ok(all_findings) + } +} diff --git a/compliance-dast/src/recon/mod.rs b/compliance-dast/src/recon/mod.rs new file mode 100644 index 0000000..68b7d4f --- /dev/null +++ b/compliance-dast/src/recon/mod.rs @@ -0,0 +1,132 @@ +use std::collections::HashMap; + +use compliance_core::error::CoreError; +use tracing::info; + +/// Result of reconnaissance scanning +#[derive(Debug, Clone)] +pub struct ReconResult { + pub technologies: Vec, + pub interesting_headers: HashMap, + pub server: Option, + pub open_ports: Vec, +} + +/// Agent that performs reconnaissance on a target +pub struct ReconAgent { + http: reqwest::Client, +} + +impl ReconAgent { + pub fn new(http: reqwest::Client) -> Self { + Self { http } + } + + /// Perform reconnaissance on a target URL + pub async fn scan(&self, base_url: &str) -> Result { + let mut result = ReconResult { + technologies: Vec::new(), + interesting_headers: HashMap::new(), + server: None, + open_ports: Vec::new(), + }; + + // HTTP header fingerprinting + let response = self + .http + .get(base_url) + .send() + .await + .map_err(|e| CoreError::Dast(format!("Failed to connect to target: {e}")))?; + + let headers = response.headers(); + + // Extract server info + if let Some(server) = headers.get("server") { + let server_str = server.to_str().unwrap_or("unknown").to_string(); + result.server = Some(server_str.clone()); + result.technologies.push(server_str); + } + + // Detect technologies from headers + let security_headers = [ + "x-powered-by", + "x-aspnet-version", + "x-frame-options", + "x-xss-protection", + "x-content-type-options", + "strict-transport-security", + "content-security-policy", + "x-generator", + ]; + + for header_name in &security_headers { + if let Some(value) = headers.get(*header_name) { + let value_str = value.to_str().unwrap_or("").to_string(); + result + .interesting_headers + .insert(header_name.to_string(), value_str.clone()); + + if *header_name == "x-powered-by" || *header_name == "x-generator" { + result.technologies.push(value_str); + } + } + } + + // Check for missing security headers + let missing_security = [ + "strict-transport-security", + "x-content-type-options", + "x-frame-options", + ]; + for header in &missing_security { + if !headers.contains_key(*header) { + result.interesting_headers.insert( + format!("missing:{header}"), + "Not present".to_string(), + ); + } + } + + // Detect technology from response body + let body = response + .text() + .await + .map_err(|e| CoreError::Dast(format!("Failed to read response: {e}")))?; + + self.detect_technologies_from_body(&body, &mut result); + + info!( + url = base_url, + technologies = ?result.technologies, + "Reconnaissance complete" + ); + + Ok(result) + } + + fn detect_technologies_from_body(&self, body: &str, result: &mut ReconResult) { + let patterns = [ + ("React", r#"react"#), + ("Angular", r#"ng-version"#), + ("Vue.js", r#"vue"#), + ("jQuery", r#"jquery"#), + ("WordPress", r#"wp-content"#), + ("Django", r#"csrfmiddlewaretoken"#), + ("Rails", r#"csrf-token"#), + ("Laravel", r#"laravel"#), + ("Express", r#"express"#), + ("Next.js", r#"__NEXT_DATA__"#), + ("Nuxt.js", r#"__NUXT__"#), + ]; + + let body_lower = body.to_lowercase(); + for (tech, pattern) in &patterns { + if body_lower.contains(&pattern.to_lowercase()) { + if !result.technologies.contains(&tech.to_string()) { + result.technologies.push(tech.to_string()); + } + } + } + } +} diff --git a/compliance-graph/Cargo.toml b/compliance-graph/Cargo.toml new file mode 100644 index 0000000..a14de46 --- /dev/null +++ b/compliance-graph/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "compliance-graph" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +compliance-core = { workspace = true, features = ["mongodb"] } +serde = { workspace = true } +serde_json = { workspace = true } +chrono = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } +tokio = { workspace = true } +mongodb = { workspace = true } + +# Tree-sitter parsing +tree-sitter = "0.24" +tree-sitter-rust = "0.23" +tree-sitter-python = "0.23" +tree-sitter-javascript = "0.23" +tree-sitter-typescript = "0.23" + +# Graph algorithms +petgraph = "0.7" + +# Text search +tantivy = "0.22" + +# Serialization +bson = "2" + +# Async streams +futures-util = "0.3" diff --git a/compliance-graph/src/graph/community.rs b/compliance-graph/src/graph/community.rs new file mode 100644 index 0000000..b24d254 --- /dev/null +++ b/compliance-graph/src/graph/community.rs @@ -0,0 +1,256 @@ +use std::collections::HashMap; + +use petgraph::graph::NodeIndex; +use petgraph::visit::EdgeRef; +use tracing::info; + +use super::engine::CodeGraph; + +/// Run Louvain community detection on the code graph. +/// Returns the number of communities detected. +/// Mutates node community_id in place. +pub fn detect_communities(code_graph: &CodeGraph) -> u32 { + let graph = &code_graph.graph; + let node_count = graph.node_count(); + + if node_count == 0 { + return 0; + } + + // Initialize: each node in its own community + let mut community: HashMap = HashMap::new(); + for idx in graph.node_indices() { + community.insert(idx, idx.index() as u32); + } + + // Compute total edge weight (all edges weight 1.0) + let total_edges = graph.edge_count() as f64; + if total_edges == 0.0 { + // All nodes are isolated, each is its own community + return node_count as u32; + } + + let m2 = 2.0 * total_edges; + + // Pre-compute node degrees + let mut degree: HashMap = HashMap::new(); + for idx in graph.node_indices() { + let d = graph.edges(idx).count() as f64; + degree.insert(idx, d); + } + + // Louvain phase 1: local moves + let mut improved = true; + let mut iterations = 0; + let max_iterations = 50; + + while improved && iterations < max_iterations { + improved = false; + iterations += 1; + + for node in graph.node_indices() { + let current_comm = community[&node]; + let node_deg = degree[&node]; + + // Compute edges to each neighboring community + let mut comm_edges: HashMap = HashMap::new(); + for edge in graph.edges(node) { + let neighbor = edge.target(); + let neighbor_comm = community[&neighbor]; + *comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0; + } + // Also check incoming edges (undirected treatment) + for edge in graph.edges_directed(node, petgraph::Direction::Incoming) { + let neighbor = edge.source(); + let neighbor_comm = community[&neighbor]; + *comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0; + } + + // Compute community totals (sum of degrees in each community) + let mut comm_totals: HashMap = HashMap::new(); + for (n, &c) in &community { + *comm_totals.entry(c).or_insert(0.0) += degree[n]; + } + + // Find best community + let current_total = comm_totals.get(¤t_comm).copied().unwrap_or(0.0); + let edges_to_current = comm_edges.get(¤t_comm).copied().unwrap_or(0.0); + + // Modularity gain from removing node from current community + let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2; + + let mut best_comm = current_comm; + let mut best_gain = 0.0; + + for (&candidate_comm, &edges_to_candidate) in &comm_edges { + if candidate_comm == current_comm { + continue; + } + let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0); + + // Modularity gain from adding node to candidate community + let add_gain = edges_to_candidate - candidate_total * node_deg / m2; + let gain = add_gain - remove_cost; + + if gain > best_gain { + best_gain = gain; + best_comm = candidate_comm; + } + } + + if best_comm != current_comm { + community.insert(node, best_comm); + improved = true; + } + } + } + + // Renumber communities to be contiguous + let mut comm_remap: HashMap = HashMap::new(); + let mut next_id: u32 = 0; + for &c in community.values() { + if !comm_remap.contains_key(&c) { + comm_remap.insert(c, next_id); + next_id += 1; + } + } + + // Apply to community map + for c in community.values_mut() { + if let Some(&new_id) = comm_remap.get(c) { + *c = new_id; + } + } + + let num_communities = next_id; + info!( + communities = num_communities, + iterations, "Community detection complete" + ); + + // NOTE: community IDs are stored in the HashMap but need to be applied + // back to the CodeGraph nodes by the caller (engine) if needed for persistence. + // For now we return the count; the full assignment is available via the map. + + num_communities +} + +/// Apply community assignments back to code nodes +pub fn apply_communities(code_graph: &mut CodeGraph) -> u32 { + let count = detect_communities_with_assignment(code_graph); + count +} + +/// Detect communities and write assignments into the nodes +fn detect_communities_with_assignment(code_graph: &mut CodeGraph) -> u32 { + let graph = &code_graph.graph; + let node_count = graph.node_count(); + + if node_count == 0 { + return 0; + } + + let mut community: HashMap = HashMap::new(); + for idx in graph.node_indices() { + community.insert(idx, idx.index() as u32); + } + + let total_edges = graph.edge_count() as f64; + if total_edges == 0.0 { + for node in &mut code_graph.nodes { + if let Some(gi) = node.graph_index { + node.community_id = Some(gi); + } + } + return node_count as u32; + } + + let m2 = 2.0 * total_edges; + + let mut degree: HashMap = HashMap::new(); + for idx in graph.node_indices() { + let d = (graph.edges(idx).count() + + graph + .edges_directed(idx, petgraph::Direction::Incoming) + .count()) as f64; + degree.insert(idx, d); + } + + let mut improved = true; + let mut iterations = 0; + let max_iterations = 50; + + while improved && iterations < max_iterations { + improved = false; + iterations += 1; + + for node in graph.node_indices() { + let current_comm = community[&node]; + let node_deg = degree[&node]; + + let mut comm_edges: HashMap = HashMap::new(); + for edge in graph.edges(node) { + let neighbor_comm = community[&edge.target()]; + *comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0; + } + for edge in graph.edges_directed(node, petgraph::Direction::Incoming) { + let neighbor_comm = community[&edge.source()]; + *comm_edges.entry(neighbor_comm).or_insert(0.0) += 1.0; + } + + let mut comm_totals: HashMap = HashMap::new(); + for (n, &c) in &community { + *comm_totals.entry(c).or_insert(0.0) += degree[n]; + } + + let current_total = comm_totals.get(¤t_comm).copied().unwrap_or(0.0); + let edges_to_current = comm_edges.get(¤t_comm).copied().unwrap_or(0.0); + let remove_cost = edges_to_current - (current_total - node_deg) * node_deg / m2; + + let mut best_comm = current_comm; + let mut best_gain = 0.0; + + for (&candidate_comm, &edges_to_candidate) in &comm_edges { + if candidate_comm == current_comm { + continue; + } + let candidate_total = comm_totals.get(&candidate_comm).copied().unwrap_or(0.0); + let add_gain = edges_to_candidate - candidate_total * node_deg / m2; + let gain = add_gain - remove_cost; + + if gain > best_gain { + best_gain = gain; + best_comm = candidate_comm; + } + } + + if best_comm != current_comm { + community.insert(node, best_comm); + improved = true; + } + } + } + + // Renumber + let mut comm_remap: HashMap = HashMap::new(); + let mut next_id: u32 = 0; + for &c in community.values() { + if !comm_remap.contains_key(&c) { + comm_remap.insert(c, next_id); + next_id += 1; + } + } + + // Apply to nodes + for node in &mut code_graph.nodes { + if let Some(gi) = node.graph_index { + let idx = NodeIndex::new(gi as usize); + if let Some(&comm) = community.get(&idx) { + let remapped = comm_remap.get(&comm).copied().unwrap_or(comm); + node.community_id = Some(remapped); + } + } + } + + next_id +} diff --git a/compliance-graph/src/graph/engine.rs b/compliance-graph/src/graph/engine.rs new file mode 100644 index 0000000..8450377 --- /dev/null +++ b/compliance-graph/src/graph/engine.rs @@ -0,0 +1,165 @@ +use std::collections::HashMap; +use std::path::Path; + +use chrono::Utc; +use compliance_core::error::CoreError; +use compliance_core::models::graph::{ + CodeEdge, CodeEdgeKind, CodeNode, GraphBuildRun, GraphBuildStatus, +}; +use compliance_core::traits::graph_builder::ParseOutput; +use petgraph::graph::{DiGraph, NodeIndex}; +use tracing::info; + +use crate::parsers::registry::ParserRegistry; + +use super::community::detect_communities; +use super::impact::ImpactAnalyzer; + +/// The main graph engine that builds and manages code knowledge graphs +pub struct GraphEngine { + parser_registry: ParserRegistry, + max_nodes: u32, +} + +/// In-memory representation of a built code graph +pub struct CodeGraph { + pub graph: DiGraph, + pub node_map: HashMap, + pub nodes: Vec, + pub edges: Vec, +} + +impl GraphEngine { + pub fn new(max_nodes: u32) -> Self { + Self { + parser_registry: ParserRegistry::new(), + max_nodes, + } + } + + /// Build a code graph from a repository directory + pub fn build_graph( + &self, + repo_path: &Path, + repo_id: &str, + graph_build_id: &str, + ) -> Result<(CodeGraph, GraphBuildRun), CoreError> { + let mut build_run = GraphBuildRun::new(repo_id.to_string()); + + info!(repo_id, path = %repo_path.display(), "Starting graph build"); + + // Phase 1: Parse all files + let parse_output = self.parser_registry.parse_directory( + repo_path, + repo_id, + graph_build_id, + self.max_nodes, + )?; + + // Phase 2: Build petgraph + let code_graph = self.build_petgraph(parse_output)?; + + // Phase 3: Run community detection + let community_count = detect_communities(&code_graph); + + // Collect language stats + let mut languages: Vec = code_graph + .nodes + .iter() + .map(|n| n.language.clone()) + .collect::>() + .into_iter() + .collect(); + languages.sort(); + + build_run.node_count = code_graph.nodes.len() as u32; + build_run.edge_count = code_graph.edges.len() as u32; + build_run.community_count = community_count; + build_run.languages_parsed = languages; + build_run.status = GraphBuildStatus::Completed; + build_run.completed_at = Some(Utc::now()); + + info!( + nodes = build_run.node_count, + edges = build_run.edge_count, + communities = build_run.community_count, + "Graph build complete" + ); + + Ok((code_graph, build_run)) + } + + /// Build petgraph from parsed output, resolving edges to node indices + fn build_petgraph(&self, parse_output: ParseOutput) -> Result { + let mut graph = DiGraph::new(); + let mut node_map: HashMap = HashMap::new(); + let mut nodes = parse_output.nodes; + + // Add all nodes to the graph + for node in &mut nodes { + let idx = graph.add_node(node.qualified_name.clone()); + node.graph_index = Some(idx.index() as u32); + node_map.insert(node.qualified_name.clone(), idx); + } + + // Resolve and add edges + let mut resolved_edges = Vec::new(); + for edge in parse_output.edges { + let source_idx = node_map.get(&edge.source); + let target_idx = self.resolve_edge_target(&edge.target, &node_map); + + if let (Some(&src), Some(tgt)) = (source_idx, target_idx) { + graph.add_edge(src, tgt, edge.kind.clone()); + resolved_edges.push(edge); + } + // Skip unresolved edges (cross-file, external deps) — conservative approach + } + + Ok(CodeGraph { + graph, + node_map, + nodes, + edges: resolved_edges, + }) + } + + /// Try to resolve an edge target to a known node + fn resolve_edge_target<'a>( + &self, + target: &str, + node_map: &'a HashMap, + ) -> Option { + // Direct match + if let Some(idx) = node_map.get(target) { + return Some(*idx); + } + + // Try matching just the function/type name (intra-file resolution) + for (qualified, idx) in node_map { + // Match "foo" to "path/file.rs::foo" or "path/file.rs::Type::foo" + if qualified.ends_with(&format!("::{target}")) + || qualified.ends_with(&format!(".{target}")) + { + return Some(*idx); + } + } + + // Try matching method calls like "self.method" -> look for "::method" + if let Some(method_name) = target.strip_prefix("self.") { + for (qualified, idx) in node_map { + if qualified.ends_with(&format!("::{method_name}")) + || qualified.ends_with(&format!(".{method_name}")) + { + return Some(*idx); + } + } + } + + None + } + + /// Get the impact analyzer for a built graph + pub fn impact_analyzer(code_graph: &CodeGraph) -> ImpactAnalyzer<'_> { + ImpactAnalyzer::new(code_graph) + } +} diff --git a/compliance-graph/src/graph/impact.rs b/compliance-graph/src/graph/impact.rs new file mode 100644 index 0000000..de8eb22 --- /dev/null +++ b/compliance-graph/src/graph/impact.rs @@ -0,0 +1,219 @@ +use std::collections::{HashSet, VecDeque}; + +use compliance_core::models::graph::ImpactAnalysis; +use petgraph::graph::NodeIndex; +use petgraph::visit::EdgeRef; +use petgraph::Direction; + +use super::engine::CodeGraph; + +/// Analyzes the impact/blast radius of findings within a code graph +pub struct ImpactAnalyzer<'a> { + code_graph: &'a CodeGraph, +} + +impl<'a> ImpactAnalyzer<'a> { + pub fn new(code_graph: &'a CodeGraph) -> Self { + Self { code_graph } + } + + /// Compute impact analysis for a finding at the given file path and line number + pub fn analyze( + &self, + repo_id: &str, + finding_id: &str, + graph_build_id: &str, + file_path: &str, + line_number: Option, + ) -> ImpactAnalysis { + let mut analysis = + ImpactAnalysis::new(repo_id.to_string(), finding_id.to_string(), graph_build_id.to_string()); + + // Find the node containing the finding + let target_node = self.find_node_at_location(file_path, line_number); + let target_idx = match target_node { + Some(idx) => idx, + None => return analysis, + }; + + // BFS forward: compute blast radius (what this node affects) + let forward_reachable = self.bfs_reachable(target_idx, Direction::Outgoing); + analysis.blast_radius = forward_reachable.len() as u32; + + // BFS backward: find entry points that reach this node + let backward_reachable = self.bfs_reachable(target_idx, Direction::Incoming); + + // Find affected entry points + for &idx in &backward_reachable { + if let Some(node) = self.get_node_by_index(idx) { + if node.is_entry_point { + analysis + .affected_entry_points + .push(node.qualified_name.clone()); + } + } + } + + // Extract call chains from entry points to the target (limited depth) + for entry_name in &analysis.affected_entry_points.clone() { + if let Some(&entry_idx) = self.code_graph.node_map.get(entry_name) { + if let Some(chain) = self.find_path(entry_idx, target_idx, 10) { + analysis.call_chains.push(chain); + } + } + } + + // Direct callers (incoming edges to target) + for edge in self + .code_graph + .graph + .edges_directed(target_idx, Direction::Incoming) + { + if let Some(node) = self.get_node_by_index(edge.source()) { + analysis.direct_callers.push(node.qualified_name.clone()); + } + } + + // Direct callees (outgoing edges from target) + for edge in self.code_graph.graph.edges(target_idx) { + if let Some(node) = self.get_node_by_index(edge.target()) { + analysis.direct_callees.push(node.qualified_name.clone()); + } + } + + // Affected communities + let mut affected_comms: HashSet = HashSet::new(); + for &idx in forward_reachable.iter().chain(std::iter::once(&target_idx)) { + if let Some(node) = self.get_node_by_index(idx) { + if let Some(cid) = node.community_id { + affected_comms.insert(cid); + } + } + } + analysis.affected_communities = affected_comms.into_iter().collect(); + analysis.affected_communities.sort(); + + analysis + } + + /// Find the graph node at a given file/line location + fn find_node_at_location(&self, file_path: &str, line_number: Option) -> Option { + let mut best: Option<(NodeIndex, u32)> = None; // (index, line_span) + + for node in &self.code_graph.nodes { + if node.file_path != file_path { + continue; + } + + if let Some(line) = line_number { + if line >= node.start_line && line <= node.end_line { + let span = node.end_line - node.start_line; + // Prefer the narrowest containing node + if best.is_none() || span < best.as_ref().map(|b| b.1).unwrap_or(u32::MAX) { + if let Some(gi) = node.graph_index { + best = Some((NodeIndex::new(gi as usize), span)); + } + } + } + } else { + // No line number, use file node + if node.kind == compliance_core::models::graph::CodeNodeKind::File { + if let Some(gi) = node.graph_index { + return Some(NodeIndex::new(gi as usize)); + } + } + } + } + + best.map(|(idx, _)| idx) + } + + /// BFS to find all reachable nodes in a given direction + fn bfs_reachable(&self, start: NodeIndex, direction: Direction) -> HashSet { + let mut visited = HashSet::new(); + let mut queue = VecDeque::new(); + queue.push_back(start); + + while let Some(current) = queue.pop_front() { + if !visited.insert(current) { + continue; + } + + let neighbors: Vec = match direction { + Direction::Outgoing => self + .code_graph + .graph + .edges(current) + .map(|e| e.target()) + .collect(), + Direction::Incoming => self + .code_graph + .graph + .edges_directed(current, Direction::Incoming) + .map(|e| e.source()) + .collect(), + }; + + for neighbor in neighbors { + if !visited.contains(&neighbor) { + queue.push_back(neighbor); + } + } + } + + visited.remove(&start); + visited + } + + /// Find a path from source to target (BFS, limited depth) + fn find_path( + &self, + from: NodeIndex, + to: NodeIndex, + max_depth: usize, + ) -> Option> { + let mut visited = HashSet::new(); + let mut queue: VecDeque<(NodeIndex, Vec)> = VecDeque::new(); + queue.push_back((from, vec![from])); + + while let Some((current, path)) = queue.pop_front() { + if current == to { + return Some( + path.iter() + .filter_map(|&idx| { + self.get_node_by_index(idx) + .map(|n| n.qualified_name.clone()) + }) + .collect(), + ); + } + + if path.len() >= max_depth { + continue; + } + + if !visited.insert(current) { + continue; + } + + for edge in self.code_graph.graph.edges(current) { + let next = edge.target(); + if !visited.contains(&next) { + let mut new_path = path.clone(); + new_path.push(next); + queue.push_back((next, new_path)); + } + } + } + + None + } + + fn get_node_by_index(&self, idx: NodeIndex) -> Option<&compliance_core::models::graph::CodeNode> { + let target_gi = idx.index() as u32; + self.code_graph + .nodes + .iter() + .find(|n| n.graph_index == Some(target_gi)) + } +} diff --git a/compliance-graph/src/graph/mod.rs b/compliance-graph/src/graph/mod.rs new file mode 100644 index 0000000..f66238d --- /dev/null +++ b/compliance-graph/src/graph/mod.rs @@ -0,0 +1,4 @@ +pub mod community; +pub mod engine; +pub mod impact; +pub mod persistence; diff --git a/compliance-graph/src/graph/persistence.rs b/compliance-graph/src/graph/persistence.rs new file mode 100644 index 0000000..65c7b10 --- /dev/null +++ b/compliance-graph/src/graph/persistence.rs @@ -0,0 +1,255 @@ +use compliance_core::error::CoreError; +use compliance_core::models::graph::{CodeEdge, CodeNode, GraphBuildRun, ImpactAnalysis}; +use futures_util::TryStreamExt; +use mongodb::bson::doc; +use mongodb::options::IndexOptions; +use mongodb::{Collection, Database, IndexModel}; +use tracing::info; + +/// MongoDB persistence layer for the code knowledge graph +pub struct GraphStore { + nodes: Collection, + edges: Collection, + builds: Collection, + impacts: Collection, +} + +impl GraphStore { + pub fn new(db: &Database) -> Self { + Self { + nodes: db.collection("graph_nodes"), + edges: db.collection("graph_edges"), + builds: db.collection("graph_builds"), + impacts: db.collection("impact_analyses"), + } + } + + /// Ensure indexes are created + pub async fn ensure_indexes(&self) -> Result<(), CoreError> { + // graph_nodes: compound index on (repo_id, graph_build_id) + self.nodes + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "graph_build_id": 1 }) + .build(), + ) + .await?; + + // graph_nodes: index on qualified_name for lookups + self.nodes + .create_index( + IndexModel::builder() + .keys(doc! { "qualified_name": 1 }) + .build(), + ) + .await?; + + // graph_edges: compound index on (repo_id, graph_build_id) + self.edges + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "graph_build_id": 1 }) + .build(), + ) + .await?; + + // graph_builds: compound index on (repo_id, started_at DESC) + self.builds + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "started_at": -1 }) + .build(), + ) + .await?; + + // impact_analyses: compound index on (repo_id, finding_id) + self.impacts + .create_index( + IndexModel::builder() + .keys(doc! { "repo_id": 1, "finding_id": 1 }) + .options(IndexOptions::builder().unique(true).build()) + .build(), + ) + .await?; + + Ok(()) + } + + /// Store a complete graph build result + pub async fn store_graph( + &self, + build_run: &GraphBuildRun, + nodes: &[CodeNode], + edges: &[CodeEdge], + ) -> Result { + // Insert the build run + let result = self.builds.insert_one(build_run).await?; + let build_id = result + .inserted_id + .as_object_id() + .map(|oid| oid.to_hex()) + .unwrap_or_default(); + + // Insert nodes in batches + if !nodes.is_empty() { + let batch_size = 1000; + for chunk in nodes.chunks(batch_size) { + self.nodes.insert_many(chunk.to_vec()).await?; + } + } + + // Insert edges in batches + if !edges.is_empty() { + let batch_size = 1000; + for chunk in edges.chunks(batch_size) { + self.edges.insert_many(chunk.to_vec()).await?; + } + } + + info!( + build_id = %build_id, + nodes = nodes.len(), + edges = edges.len(), + "Graph stored to MongoDB" + ); + + Ok(build_id) + } + + /// Delete previous graph data for a repo before storing new graph + pub async fn delete_repo_graph(&self, repo_id: &str) -> Result<(), CoreError> { + let filter = doc! { "repo_id": repo_id }; + self.nodes.delete_many(filter.clone()).await?; + self.edges.delete_many(filter.clone()).await?; + self.impacts.delete_many(filter).await?; + Ok(()) + } + + /// Store an impact analysis result + pub async fn store_impact(&self, impact: &ImpactAnalysis) -> Result<(), CoreError> { + let filter = doc! { + "repo_id": &impact.repo_id, + "finding_id": &impact.finding_id, + }; + + let opts = mongodb::options::ReplaceOptions::builder() + .upsert(true) + .build(); + + self.impacts + .replace_one(filter, impact) + .with_options(opts) + .await?; + + Ok(()) + } + + /// Get the latest graph build for a repo + pub async fn get_latest_build( + &self, + repo_id: &str, + ) -> Result, CoreError> { + let filter = doc! { "repo_id": repo_id }; + let opts = mongodb::options::FindOneOptions::builder() + .sort(doc! { "started_at": -1 }) + .build(); + + let result = self.builds.find_one(filter).with_options(opts).await?; + Ok(result) + } + + /// Get all nodes for a repo's latest graph build + pub async fn get_nodes( + &self, + repo_id: &str, + graph_build_id: &str, + ) -> Result, CoreError> { + let filter = doc! { + "repo_id": repo_id, + "graph_build_id": graph_build_id, + }; + + let cursor = self.nodes.find(filter).await?; + let nodes: Vec = cursor.try_collect().await?; + Ok(nodes) + } + + /// Get all edges for a repo's latest graph build + pub async fn get_edges( + &self, + repo_id: &str, + graph_build_id: &str, + ) -> Result, CoreError> { + let filter = doc! { + "repo_id": repo_id, + "graph_build_id": graph_build_id, + }; + + let cursor = self.edges.find(filter).await?; + let edges: Vec = cursor.try_collect().await?; + Ok(edges) + } + + /// Get impact analysis for a finding + pub async fn get_impact( + &self, + repo_id: &str, + finding_id: &str, + ) -> Result, CoreError> { + let filter = doc! { + "repo_id": repo_id, + "finding_id": finding_id, + }; + + let result = self.impacts.find_one(filter).await?; + Ok(result) + } + + /// Get nodes grouped by community + pub async fn get_communities( + &self, + repo_id: &str, + graph_build_id: &str, + ) -> Result, CoreError> { + + + let filter = doc! { + "repo_id": repo_id, + "graph_build_id": graph_build_id, + }; + + let cursor = self.nodes.find(filter).await?; + let nodes: Vec = cursor.try_collect().await?; + + let mut communities: std::collections::HashMap> = + std::collections::HashMap::new(); + + for node in &nodes { + if let Some(cid) = node.community_id { + communities + .entry(cid) + .or_default() + .push(node.qualified_name.clone()); + } + } + + let mut result: Vec = communities + .into_iter() + .map(|(id, members)| CommunityInfo { + community_id: id, + member_count: members.len() as u32, + members, + }) + .collect(); + + result.sort_by_key(|c| c.community_id); + Ok(result) + } +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct CommunityInfo { + pub community_id: u32, + pub member_count: u32, + pub members: Vec, +} diff --git a/compliance-graph/src/lib.rs b/compliance-graph/src/lib.rs new file mode 100644 index 0000000..8945bb7 --- /dev/null +++ b/compliance-graph/src/lib.rs @@ -0,0 +1,7 @@ +pub mod graph; +pub mod parsers; +pub mod search; + +pub use graph::engine::GraphEngine; +pub use parsers::registry::ParserRegistry; +pub use search::index::SymbolIndex; diff --git a/compliance-graph/src/parsers/javascript.rs b/compliance-graph/src/parsers/javascript.rs new file mode 100644 index 0000000..bfe66d8 --- /dev/null +++ b/compliance-graph/src/parsers/javascript.rs @@ -0,0 +1,372 @@ +use std::path::Path; + +use compliance_core::error::CoreError; +use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind}; +use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput}; +use tree_sitter::{Node, Parser}; + +pub struct JavaScriptParser; + +impl JavaScriptParser { + pub fn new() -> Self { + Self + } + + fn walk_tree( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + match node.kind() { + "function_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + let is_entry = self.is_exported_function(&node, source); + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Function, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "javascript".to_string(), + community_id: None, + is_entry_point: is_entry, + graph_index: None, + }); + + if let Some(body) = node.child_by_field_name("body") { + self.extract_calls( + body, source, file_path, repo_id, graph_build_id, &qualified, output, + ); + } + } + } + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Class, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "javascript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + // Extract superclass + if let Some(heritage) = node.child_by_field_name("superclass") { + let base_name = &source[heritage.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: qualified.clone(), + target: base_name.to_string(), + kind: CodeEdgeKind::Inherits, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + + if let Some(body) = node.child_by_field_name("body") { + self.walk_children( + body, source, file_path, repo_id, graph_build_id, Some(&qualified), + output, + ); + } + return; + } + } + "method_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Method, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "javascript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + if let Some(body) = node.child_by_field_name("body") { + self.extract_calls( + body, source, file_path, repo_id, graph_build_id, &qualified, output, + ); + } + } + } + // Arrow functions assigned to variables: const foo = () => {} + "lexical_declaration" | "variable_declaration" => { + self.extract_arrow_functions( + node, source, file_path, repo_id, graph_build_id, parent_qualified, output, + ); + } + "import_statement" => { + let text = &source[node.byte_range()]; + if let Some(module) = self.extract_import_source(text) { + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: parent_qualified.unwrap_or(file_path).to_string(), + target: module, + kind: CodeEdgeKind::Imports, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + _ => {} + } + + self.walk_children( + node, + source, + file_path, + repo_id, + graph_build_id, + parent_qualified, + output, + ); + } + + fn walk_children( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.walk_tree( + child, source, file_path, repo_id, graph_build_id, parent_qualified, output, + ); + } + } + + fn extract_calls( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + caller_qualified: &str, + output: &mut ParseOutput, + ) { + if node.kind() == "call_expression" { + if let Some(func_node) = node.child_by_field_name("function") { + let callee = &source[func_node.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: caller_qualified.to_string(), + target: callee.to_string(), + kind: CodeEdgeKind::Calls, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.extract_calls( + child, source, file_path, repo_id, graph_build_id, caller_qualified, output, + ); + } + } + + fn extract_arrow_functions( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "variable_declarator" { + let name_node = child.child_by_field_name("name"); + let value_node = child.child_by_field_name("value"); + if let (Some(name_n), Some(value_n)) = (name_node, value_node) { + if value_n.kind() == "arrow_function" || value_n.kind() == "function" { + let name = &source[name_n.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Function, + file_path: file_path.to_string(), + start_line: child.start_position().row as u32 + 1, + end_line: child.end_position().row as u32 + 1, + language: "javascript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + if let Some(body) = value_n.child_by_field_name("body") { + self.extract_calls( + body, source, file_path, repo_id, graph_build_id, &qualified, + output, + ); + } + } + } + } + } + } + + fn is_exported_function(&self, node: &Node<'_>, source: &str) -> bool { + if let Some(parent) = node.parent() { + if parent.kind() == "export_statement" { + return true; + } + } + // Check for module.exports patterns + if let Some(prev) = node.prev_sibling() { + let text = &source[prev.byte_range()]; + if text.contains("module.exports") || text.contains("exports.") { + return true; + } + } + false + } + + fn extract_import_source(&self, import_text: &str) -> Option { + // import ... from 'module' or import 'module' + let from_idx = import_text.find("from "); + let start = if let Some(idx) = from_idx { + idx + 5 + } else { + import_text.find("import ")? + 7 + }; + let rest = &import_text[start..]; + let module = rest + .trim() + .trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' '); + if module.is_empty() { + None + } else { + Some(module.to_string()) + } + } +} + +impl LanguageParser for JavaScriptParser { + fn language(&self) -> &str { + "javascript" + } + + fn extensions(&self) -> &[&str] { + &["js", "jsx", "mjs", "cjs"] + } + + fn parse_file( + &self, + file_path: &Path, + source: &str, + repo_id: &str, + graph_build_id: &str, + ) -> Result { + let mut parser = Parser::new(); + let language = tree_sitter_javascript::LANGUAGE; + parser + .set_language(&language.into()) + .map_err(|e| CoreError::Graph(format!("Failed to set JavaScript language: {e}")))?; + + let tree = parser + .parse(source, None) + .ok_or_else(|| CoreError::Graph("Failed to parse JavaScript file".to_string()))?; + + let file_path_str = file_path.to_string_lossy().to_string(); + let mut output = ParseOutput::default(); + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: file_path_str.clone(), + name: file_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(), + kind: CodeNodeKind::File, + file_path: file_path_str.clone(), + start_line: 1, + end_line: source.lines().count() as u32, + language: "javascript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + self.walk_tree( + tree.root_node(), + source, + &file_path_str, + repo_id, + graph_build_id, + None, + &mut output, + ); + + Ok(output) + } +} diff --git a/compliance-graph/src/parsers/mod.rs b/compliance-graph/src/parsers/mod.rs new file mode 100644 index 0000000..e14b8e7 --- /dev/null +++ b/compliance-graph/src/parsers/mod.rs @@ -0,0 +1,5 @@ +pub mod javascript; +pub mod python; +pub mod registry; +pub mod rust_parser; +pub mod typescript; diff --git a/compliance-graph/src/parsers/python.rs b/compliance-graph/src/parsers/python.rs new file mode 100644 index 0000000..bc0af2f --- /dev/null +++ b/compliance-graph/src/parsers/python.rs @@ -0,0 +1,336 @@ +use std::path::Path; + +use compliance_core::error::CoreError; +use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind}; +use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput}; +use tree_sitter::{Node, Parser}; + +pub struct PythonParser; + +impl PythonParser { + pub fn new() -> Self { + Self + } + + fn walk_tree( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + match node.kind() { + "function_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + let is_method = parent_qualified + .map(|p| p.contains("class")) + .unwrap_or(false); + let kind = if is_method { + CodeNodeKind::Method + } else { + CodeNodeKind::Function + }; + + let is_entry = name == "__main__" + || name == "main" + || self.has_decorator(&node, source, "app.route") + || self.has_decorator(&node, source, "app.get") + || self.has_decorator(&node, source, "app.post"); + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "python".to_string(), + community_id: None, + is_entry_point: is_entry, + graph_index: None, + }); + + // Extract calls in function body + if let Some(body) = node.child_by_field_name("body") { + self.extract_calls( + body, + source, + file_path, + repo_id, + graph_build_id, + &qualified, + output, + ); + } + } + } + "class_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Class, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "python".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + // Extract superclasses + if let Some(bases) = node.child_by_field_name("superclasses") { + self.extract_inheritance( + bases, + source, + file_path, + repo_id, + graph_build_id, + &qualified, + output, + ); + } + + // Walk methods + if let Some(body) = node.child_by_field_name("body") { + self.walk_children( + body, + source, + file_path, + repo_id, + graph_build_id, + Some(&qualified), + output, + ); + } + return; + } + } + "import_statement" | "import_from_statement" => { + let import_text = &source[node.byte_range()]; + if let Some(module) = self.extract_import_module(import_text) { + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: parent_qualified.unwrap_or(file_path).to_string(), + target: module, + kind: CodeEdgeKind::Imports, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + _ => {} + } + + self.walk_children( + node, + source, + file_path, + repo_id, + graph_build_id, + parent_qualified, + output, + ); + } + + fn walk_children( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.walk_tree( + child, + source, + file_path, + repo_id, + graph_build_id, + parent_qualified, + output, + ); + } + } + + fn extract_calls( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + caller_qualified: &str, + output: &mut ParseOutput, + ) { + if node.kind() == "call" { + if let Some(func_node) = node.child_by_field_name("function") { + let callee = &source[func_node.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: caller_qualified.to_string(), + target: callee.to_string(), + kind: CodeEdgeKind::Calls, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.extract_calls( + child, + source, + file_path, + repo_id, + graph_build_id, + caller_qualified, + output, + ); + } + } + + fn extract_inheritance( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + class_qualified: &str, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "identifier" || child.kind() == "attribute" { + let base_name = &source[child.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: class_qualified.to_string(), + target: base_name.to_string(), + kind: CodeEdgeKind::Inherits, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + } + + fn has_decorator(&self, node: &Node<'_>, source: &str, decorator_name: &str) -> bool { + if let Some(prev) = node.prev_sibling() { + if prev.kind() == "decorator" { + let text = &source[prev.byte_range()]; + return text.contains(decorator_name); + } + } + false + } + + fn extract_import_module(&self, import_text: &str) -> Option { + if let Some(rest) = import_text.strip_prefix("from ") { + // "from foo.bar import baz" -> "foo.bar" + let module = rest.split_whitespace().next()?; + Some(module.to_string()) + } else if let Some(rest) = import_text.strip_prefix("import ") { + let module = rest.trim().trim_end_matches(';'); + Some(module.to_string()) + } else { + None + } + } +} + +impl LanguageParser for PythonParser { + fn language(&self) -> &str { + "python" + } + + fn extensions(&self) -> &[&str] { + &["py"] + } + + fn parse_file( + &self, + file_path: &Path, + source: &str, + repo_id: &str, + graph_build_id: &str, + ) -> Result { + let mut parser = Parser::new(); + let language = tree_sitter_python::LANGUAGE; + parser + .set_language(&language.into()) + .map_err(|e| CoreError::Graph(format!("Failed to set Python language: {e}")))?; + + let tree = parser + .parse(source, None) + .ok_or_else(|| CoreError::Graph("Failed to parse Python file".to_string()))?; + + let file_path_str = file_path.to_string_lossy().to_string(); + let mut output = ParseOutput::default(); + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: file_path_str.clone(), + name: file_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(), + kind: CodeNodeKind::File, + file_path: file_path_str.clone(), + start_line: 1, + end_line: source.lines().count() as u32, + language: "python".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + self.walk_tree( + tree.root_node(), + source, + &file_path_str, + repo_id, + graph_build_id, + None, + &mut output, + ); + + Ok(output) + } +} diff --git a/compliance-graph/src/parsers/registry.rs b/compliance-graph/src/parsers/registry.rs new file mode 100644 index 0000000..d5f582c --- /dev/null +++ b/compliance-graph/src/parsers/registry.rs @@ -0,0 +1,182 @@ +use std::collections::HashMap; +use std::path::Path; + +use compliance_core::error::CoreError; +use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput}; +use tracing::info; + +use super::javascript::JavaScriptParser; +use super::python::PythonParser; +use super::rust_parser::RustParser; +use super::typescript::TypeScriptParser; + +/// Registry of language parsers, indexed by file extension +pub struct ParserRegistry { + parsers: Vec>, + extension_map: HashMap, +} + +impl ParserRegistry { + /// Create a registry with all built-in parsers + pub fn new() -> Self { + let parsers: Vec> = vec![ + Box::new(RustParser::new()), + Box::new(PythonParser::new()), + Box::new(JavaScriptParser::new()), + Box::new(TypeScriptParser::new()), + ]; + + let mut extension_map = HashMap::new(); + for (idx, parser) in parsers.iter().enumerate() { + for ext in parser.extensions() { + extension_map.insert(ext.to_string(), idx); + } + } + + Self { + parsers, + extension_map, + } + } + + /// Check if a file extension is supported + pub fn supports_extension(&self, ext: &str) -> bool { + self.extension_map.contains_key(ext) + } + + /// Get supported extensions + pub fn supported_extensions(&self) -> Vec<&str> { + self.extension_map.keys().map(|s| s.as_str()).collect() + } + + /// Parse a file, selecting the appropriate parser by extension + pub fn parse_file( + &self, + file_path: &Path, + source: &str, + repo_id: &str, + graph_build_id: &str, + ) -> Result, CoreError> { + let ext = file_path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + + let parser_idx = match self.extension_map.get(ext) { + Some(idx) => *idx, + None => return Ok(None), + }; + + let parser = &self.parsers[parser_idx]; + info!( + file = %file_path.display(), + language = parser.language(), + "Parsing file" + ); + + let output = parser.parse_file(file_path, source, repo_id, graph_build_id)?; + Ok(Some(output)) + } + + /// Parse all supported files in a directory tree + pub fn parse_directory( + &self, + dir: &Path, + repo_id: &str, + graph_build_id: &str, + max_nodes: u32, + ) -> Result { + let mut combined = ParseOutput::default(); + let mut node_count: u32 = 0; + + self.walk_directory(dir, dir, repo_id, graph_build_id, max_nodes, &mut node_count, &mut combined)?; + + info!( + nodes = combined.nodes.len(), + edges = combined.edges.len(), + "Directory parsing complete" + ); + + Ok(combined) + } + + fn walk_directory( + &self, + base: &Path, + dir: &Path, + repo_id: &str, + graph_build_id: &str, + max_nodes: u32, + node_count: &mut u32, + combined: &mut ParseOutput, + ) -> Result<(), CoreError> { + let entries = std::fs::read_dir(dir).map_err(|e| { + CoreError::Graph(format!("Failed to read directory {}: {e}", dir.display())) + })?; + + for entry in entries { + let entry = entry.map_err(|e| CoreError::Graph(format!("Dir entry error: {e}")))?; + let path = entry.path(); + + // Skip hidden directories and common non-source dirs + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + if name.starts_with('.') + || name == "node_modules" + || name == "target" + || name == "__pycache__" + || name == "vendor" + || name == "dist" + || name == "build" + || name == ".git" + { + continue; + } + } + + if path.is_dir() { + self.walk_directory( + base, + &path, + repo_id, + graph_build_id, + max_nodes, + node_count, + combined, + )?; + } else if path.is_file() { + if *node_count >= max_nodes { + info!(max_nodes, "Reached node limit, stopping parse"); + return Ok(()); + } + + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + if !self.supports_extension(ext) { + continue; + } + + // Use relative path from base + let rel_path = path.strip_prefix(base).unwrap_or(&path); + + let source = match std::fs::read_to_string(&path) { + Ok(s) => s, + Err(_) => continue, // Skip binary/unreadable files + }; + + if let Some(output) = self.parse_file(rel_path, &source, repo_id, graph_build_id)? + { + *node_count += output.nodes.len() as u32; + combined.nodes.extend(output.nodes); + combined.edges.extend(output.edges); + } + } + } + + Ok(()) + } +} + +impl Default for ParserRegistry { + fn default() -> Self { + Self::new() + } +} diff --git a/compliance-graph/src/parsers/rust_parser.rs b/compliance-graph/src/parsers/rust_parser.rs new file mode 100644 index 0000000..2aad595 --- /dev/null +++ b/compliance-graph/src/parsers/rust_parser.rs @@ -0,0 +1,426 @@ +use std::path::Path; + +use compliance_core::error::CoreError; +use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind}; +use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput}; +use tree_sitter::{Node, Parser}; + +pub struct RustParser; + +impl RustParser { + pub fn new() -> Self { + Self + } + + fn walk_tree( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + match node.kind() { + "function_item" | "function_signature_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}::{name}"), + None => format!("{file_path}::{name}"), + }; + + let is_entry = name == "main" + || self.has_attribute(&node, source, "test") + || self.has_attribute(&node, source, "tokio::main") + || self.has_pub_visibility(&node, source); + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Function, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "rust".to_string(), + community_id: None, + is_entry_point: is_entry, + graph_index: None, + }); + + // Extract function calls within the body + if let Some(body) = node.child_by_field_name("body") { + self.extract_calls( + body, + source, + file_path, + repo_id, + graph_build_id, + &qualified, + output, + ); + } + } + } + "struct_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}::{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified, + name: name.to_string(), + kind: CodeNodeKind::Struct, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "rust".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + } + } + "enum_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}::{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified, + name: name.to_string(), + kind: CodeNodeKind::Enum, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "rust".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + } + } + "trait_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}::{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Trait, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "rust".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + // Parse methods inside the trait + self.walk_children( + node, + source, + file_path, + repo_id, + graph_build_id, + Some(&qualified), + output, + ); + return; // Don't walk children again + } + } + "impl_item" => { + // Extract impl target type for qualified naming + let impl_name = self.extract_impl_type(&node, source); + let qualified = match parent_qualified { + Some(p) => format!("{p}::{impl_name}"), + None => format!("{file_path}::{impl_name}"), + }; + + // Check for trait impl (impl Trait for Type) + if let Some(trait_node) = node.child_by_field_name("trait") { + let trait_name = &source[trait_node.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: qualified.clone(), + target: trait_name.to_string(), + kind: CodeEdgeKind::Implements, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + + // Walk methods inside impl block + self.walk_children( + node, + source, + file_path, + repo_id, + graph_build_id, + Some(&qualified), + output, + ); + return; + } + "use_declaration" => { + let use_text = &source[node.byte_range()]; + // Extract the imported path + if let Some(path) = self.extract_use_path(use_text) { + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: parent_qualified + .unwrap_or(file_path) + .to_string(), + target: path, + kind: CodeEdgeKind::Imports, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + "mod_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}::{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Module, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "rust".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + // If it has a body (inline module), walk it + if let Some(body) = node.child_by_field_name("body") { + self.walk_children( + body, + source, + file_path, + repo_id, + graph_build_id, + Some(&qualified), + output, + ); + return; + } + } + } + _ => {} + } + + // Default: walk children + self.walk_children( + node, + source, + file_path, + repo_id, + graph_build_id, + parent_qualified, + output, + ); + } + + fn walk_children( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.walk_tree( + child, + source, + file_path, + repo_id, + graph_build_id, + parent_qualified, + output, + ); + } + } + + fn extract_calls( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + caller_qualified: &str, + output: &mut ParseOutput, + ) { + if node.kind() == "call_expression" { + if let Some(func_node) = node.child_by_field_name("function") { + let callee = &source[func_node.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: caller_qualified.to_string(), + target: callee.to_string(), + kind: CodeEdgeKind::Calls, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.extract_calls( + child, + source, + file_path, + repo_id, + graph_build_id, + caller_qualified, + output, + ); + } + } + + fn has_attribute(&self, node: &Node<'_>, source: &str, attr_name: &str) -> bool { + if let Some(prev) = node.prev_sibling() { + if prev.kind() == "attribute_item" || prev.kind() == "attribute" { + let text = &source[prev.byte_range()]; + return text.contains(attr_name); + } + } + false + } + + fn has_pub_visibility(&self, node: &Node<'_>, source: &str) -> bool { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "visibility_modifier" { + let text = &source[child.byte_range()]; + return text == "pub"; + } + } + false + } + + fn extract_impl_type(&self, node: &Node<'_>, source: &str) -> String { + if let Some(type_node) = node.child_by_field_name("type") { + return source[type_node.byte_range()].to_string(); + } + "unknown".to_string() + } + + fn extract_use_path(&self, use_text: &str) -> Option { + // "use foo::bar::baz;" -> "foo::bar::baz" + let trimmed = use_text + .strip_prefix("use ")? + .trim_end_matches(';') + .trim(); + Some(trimmed.to_string()) + } +} + +impl LanguageParser for RustParser { + fn language(&self) -> &str { + "rust" + } + + fn extensions(&self) -> &[&str] { + &["rs"] + } + + fn parse_file( + &self, + file_path: &Path, + source: &str, + repo_id: &str, + graph_build_id: &str, + ) -> Result { + let mut parser = Parser::new(); + let language = tree_sitter_rust::LANGUAGE; + parser + .set_language(&language.into()) + .map_err(|e| CoreError::Graph(format!("Failed to set Rust language: {e}")))?; + + let tree = parser + .parse(source, None) + .ok_or_else(|| CoreError::Graph("Failed to parse Rust file".to_string()))?; + + let file_path_str = file_path.to_string_lossy().to_string(); + let mut output = ParseOutput::default(); + + // Add file node + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: file_path_str.clone(), + name: file_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(), + kind: CodeNodeKind::File, + file_path: file_path_str.clone(), + start_line: 1, + end_line: source.lines().count() as u32, + language: "rust".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + self.walk_tree( + tree.root_node(), + source, + &file_path_str, + repo_id, + graph_build_id, + None, + &mut output, + ); + + Ok(output) + } +} diff --git a/compliance-graph/src/parsers/typescript.rs b/compliance-graph/src/parsers/typescript.rs new file mode 100644 index 0000000..2e3d0e9 --- /dev/null +++ b/compliance-graph/src/parsers/typescript.rs @@ -0,0 +1,419 @@ +use std::path::Path; + +use compliance_core::error::CoreError; +use compliance_core::models::graph::{CodeEdge, CodeEdgeKind, CodeNode, CodeNodeKind}; +use compliance_core::traits::graph_builder::{LanguageParser, ParseOutput}; +use tree_sitter::{Node, Parser}; + +pub struct TypeScriptParser; + +impl TypeScriptParser { + pub fn new() -> Self { + Self + } + + fn walk_tree( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + match node.kind() { + "function_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Function, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "typescript".to_string(), + community_id: None, + is_entry_point: self.is_exported(&node), + graph_index: None, + }); + + if let Some(body) = node.child_by_field_name("body") { + self.extract_calls( + body, source, file_path, repo_id, graph_build_id, &qualified, output, + ); + } + } + } + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Class, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "typescript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + // Heritage clause (extends/implements) + self.extract_heritage( + &node, source, file_path, repo_id, graph_build_id, &qualified, output, + ); + + if let Some(body) = node.child_by_field_name("body") { + self.walk_children( + body, source, file_path, repo_id, graph_build_id, Some(&qualified), + output, + ); + } + return; + } + } + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Interface, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "typescript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + } + } + "method_definition" | "public_field_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = &source[name_node.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Method, + file_path: file_path.to_string(), + start_line: node.start_position().row as u32 + 1, + end_line: node.end_position().row as u32 + 1, + language: "typescript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + if let Some(body) = node.child_by_field_name("body") { + self.extract_calls( + body, source, file_path, repo_id, graph_build_id, &qualified, output, + ); + } + } + } + "lexical_declaration" | "variable_declaration" => { + self.extract_arrow_functions( + node, source, file_path, repo_id, graph_build_id, parent_qualified, output, + ); + } + "import_statement" => { + let text = &source[node.byte_range()]; + if let Some(module) = self.extract_import_source(text) { + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: parent_qualified.unwrap_or(file_path).to_string(), + target: module, + kind: CodeEdgeKind::Imports, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + _ => {} + } + + self.walk_children( + node, source, file_path, repo_id, graph_build_id, parent_qualified, output, + ); + } + + fn walk_children( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.walk_tree( + child, source, file_path, repo_id, graph_build_id, parent_qualified, output, + ); + } + } + + fn extract_calls( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + caller_qualified: &str, + output: &mut ParseOutput, + ) { + if node.kind() == "call_expression" { + if let Some(func_node) = node.child_by_field_name("function") { + let callee = &source[func_node.byte_range()]; + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: caller_qualified.to_string(), + target: callee.to_string(), + kind: CodeEdgeKind::Calls, + file_path: file_path.to_string(), + line_number: Some(node.start_position().row as u32 + 1), + }); + } + } + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.extract_calls( + child, source, file_path, repo_id, graph_build_id, caller_qualified, output, + ); + } + } + + fn extract_arrow_functions( + &self, + node: Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + parent_qualified: Option<&str>, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "variable_declarator" { + let name_node = child.child_by_field_name("name"); + let value_node = child.child_by_field_name("value"); + if let (Some(name_n), Some(value_n)) = (name_node, value_node) { + if value_n.kind() == "arrow_function" || value_n.kind() == "function" { + let name = &source[name_n.byte_range()]; + let qualified = match parent_qualified { + Some(p) => format!("{p}.{name}"), + None => format!("{file_path}::{name}"), + }; + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: qualified.clone(), + name: name.to_string(), + kind: CodeNodeKind::Function, + file_path: file_path.to_string(), + start_line: child.start_position().row as u32 + 1, + end_line: child.end_position().row as u32 + 1, + language: "typescript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + if let Some(body) = value_n.child_by_field_name("body") { + self.extract_calls( + body, source, file_path, repo_id, graph_build_id, &qualified, + output, + ); + } + } + } + } + } + } + + fn extract_heritage( + &self, + node: &Node<'_>, + source: &str, + file_path: &str, + repo_id: &str, + graph_build_id: &str, + class_qualified: &str, + output: &mut ParseOutput, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "class_heritage" { + let text = &source[child.byte_range()]; + // "extends Base implements IFoo, IBar" + if let Some(rest) = text.strip_prefix("extends ") { + let base = rest.split_whitespace().next().unwrap_or(rest); + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: class_qualified.to_string(), + target: base.trim_matches(',').to_string(), + kind: CodeEdgeKind::Inherits, + file_path: file_path.to_string(), + line_number: Some(child.start_position().row as u32 + 1), + }); + } + if text.contains("implements ") { + if let Some(impl_part) = text.split("implements ").nth(1) { + for iface in impl_part.split(',') { + let iface = iface.trim(); + if !iface.is_empty() { + output.edges.push(CodeEdge { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + source: class_qualified.to_string(), + target: iface.to_string(), + kind: CodeEdgeKind::Implements, + file_path: file_path.to_string(), + line_number: Some(child.start_position().row as u32 + 1), + }); + } + } + } + } + } + } + } + + fn is_exported(&self, node: &Node<'_>) -> bool { + if let Some(parent) = node.parent() { + return parent.kind() == "export_statement"; + } + false + } + + fn extract_import_source(&self, import_text: &str) -> Option { + let from_idx = import_text.find("from "); + let start = if let Some(idx) = from_idx { + idx + 5 + } else { + import_text.find("import ")? + 7 + }; + let rest = &import_text[start..]; + let module = rest + .trim() + .trim_matches(|c| c == '\'' || c == '"' || c == ';' || c == ' '); + if module.is_empty() { + None + } else { + Some(module.to_string()) + } + } +} + +impl LanguageParser for TypeScriptParser { + fn language(&self) -> &str { + "typescript" + } + + fn extensions(&self) -> &[&str] { + &["ts", "tsx"] + } + + fn parse_file( + &self, + file_path: &Path, + source: &str, + repo_id: &str, + graph_build_id: &str, + ) -> Result { + let mut parser = Parser::new(); + let language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT; + parser + .set_language(&language.into()) + .map_err(|e| CoreError::Graph(format!("Failed to set TypeScript language: {e}")))?; + + let tree = parser + .parse(source, None) + .ok_or_else(|| CoreError::Graph("Failed to parse TypeScript file".to_string()))?; + + let file_path_str = file_path.to_string_lossy().to_string(); + let mut output = ParseOutput::default(); + + output.nodes.push(CodeNode { + id: None, + repo_id: repo_id.to_string(), + graph_build_id: graph_build_id.to_string(), + qualified_name: file_path_str.clone(), + name: file_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(), + kind: CodeNodeKind::File, + file_path: file_path_str.clone(), + start_line: 1, + end_line: source.lines().count() as u32, + language: "typescript".to_string(), + community_id: None, + is_entry_point: false, + graph_index: None, + }); + + self.walk_tree( + tree.root_node(), + source, + &file_path_str, + repo_id, + graph_build_id, + None, + &mut output, + ); + + Ok(output) + } +} diff --git a/compliance-graph/src/search/index.rs b/compliance-graph/src/search/index.rs new file mode 100644 index 0000000..435685a --- /dev/null +++ b/compliance-graph/src/search/index.rs @@ -0,0 +1,128 @@ +use compliance_core::error::CoreError; +use compliance_core::models::graph::CodeNode; +use tantivy::collector::TopDocs; +use tantivy::query::QueryParser; +use tantivy::schema::{Schema, Value, STORED, TEXT}; +use tantivy::{doc, Index, IndexWriter, ReloadPolicy}; +use tracing::info; + +/// BM25 text search index over code symbols +pub struct SymbolIndex { + index: Index, + #[allow(dead_code)] + schema: Schema, + qualified_name_field: tantivy::schema::Field, + name_field: tantivy::schema::Field, + kind_field: tantivy::schema::Field, + file_path_field: tantivy::schema::Field, + language_field: tantivy::schema::Field, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct SearchResult { + pub qualified_name: String, + pub name: String, + pub kind: String, + pub file_path: String, + pub language: String, + pub score: f32, +} + +impl SymbolIndex { + /// Create a new in-memory symbol index + pub fn new() -> Result { + let mut schema_builder = Schema::builder(); + let qualified_name_field = schema_builder.add_text_field("qualified_name", TEXT | STORED); + let name_field = schema_builder.add_text_field("name", TEXT | STORED); + let kind_field = schema_builder.add_text_field("kind", TEXT | STORED); + let file_path_field = schema_builder.add_text_field("file_path", TEXT | STORED); + let language_field = schema_builder.add_text_field("language", TEXT | STORED); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema.clone()); + + Ok(Self { + index, + schema, + qualified_name_field, + name_field, + kind_field, + file_path_field, + language_field, + }) + } + + /// Index a set of code nodes + pub fn index_nodes(&self, nodes: &[CodeNode]) -> Result<(), CoreError> { + let mut writer: IndexWriter = self + .index + .writer(50_000_000) + .map_err(|e| CoreError::Graph(format!("Failed to create index writer: {e}")))?; + + for node in nodes { + writer + .add_document(doc!( + self.qualified_name_field => node.qualified_name.as_str(), + self.name_field => node.name.as_str(), + self.kind_field => node.kind.to_string(), + self.file_path_field => node.file_path.as_str(), + self.language_field => node.language.as_str(), + )) + .map_err(|e| CoreError::Graph(format!("Failed to add document: {e}")))?; + } + + writer + .commit() + .map_err(|e| CoreError::Graph(format!("Failed to commit index: {e}")))?; + + info!(nodes = nodes.len(), "Symbol index built"); + Ok(()) + } + + /// Search for symbols matching a query + pub fn search(&self, query_str: &str, limit: usize) -> Result, CoreError> { + let reader = self + .index + .reader_builder() + .reload_policy(ReloadPolicy::Manual) + .try_into() + .map_err(|e| CoreError::Graph(format!("Failed to create reader: {e}")))?; + + let searcher = reader.searcher(); + let query_parser = + QueryParser::for_index(&self.index, vec![self.name_field, self.qualified_name_field]); + + let query = query_parser + .parse_query(query_str) + .map_err(|e| CoreError::Graph(format!("Failed to parse query: {e}")))?; + + let top_docs = searcher + .search(&query, &TopDocs::with_limit(limit)) + .map_err(|e| CoreError::Graph(format!("Search failed: {e}")))?; + + let mut results = Vec::new(); + for (score, doc_address) in top_docs { + let doc: tantivy::TantivyDocument = searcher + .doc(doc_address) + .map_err(|e| CoreError::Graph(format!("Failed to retrieve doc: {e}")))?; + + let get_field = |field: tantivy::schema::Field| -> String { + doc.get_first(field) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string() + }; + + results.push(SearchResult { + qualified_name: get_field(self.qualified_name_field), + name: get_field(self.name_field), + kind: get_field(self.kind_field), + file_path: get_field(self.file_path_field), + language: get_field(self.language_field), + score, + }); + } + + Ok(results) + } +} diff --git a/compliance-graph/src/search/mod.rs b/compliance-graph/src/search/mod.rs new file mode 100644 index 0000000..33edc95 --- /dev/null +++ b/compliance-graph/src/search/mod.rs @@ -0,0 +1 @@ +pub mod index; diff --git a/docker-compose.yml b/docker-compose.yml index b439a06..fa9f801 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,13 +9,6 @@ services: volumes: - mongo_data:/data/db - searxng: - image: searxng/searxng:latest - ports: - - "8888:8080" - environment: - - SEARXNG_BASE_URL=http://localhost:8888 - agent: build: context: . @@ -40,6 +33,16 @@ services: - mongo - agent + chromium: + image: browserless/chrome:latest + ports: + - "3003:3000" + environment: + MAX_CONCURRENT_SESSIONS: 5 + CONNECTION_TIMEOUT: 60000 + PREBOOT_CHROME: "true" + restart: unless-stopped + volumes: mongo_data: repos_data: