3 Commits

Author SHA1 Message Date
Sharang Parnerkar
b234443324 Run cargo fmt across all crates
All checks were successful
CI / Format (push) Successful in 2s
CI / Clippy (push) Successful in 3m2s
CI / Security Audit (push) Has been skipped
CI / Tests (push) Has been skipped
CI / Format (pull_request) Successful in 3s
CI / Clippy (pull_request) Successful in 3m5s
CI / Security Audit (pull_request) Has been skipped
CI / Tests (pull_request) Has been skipped
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 23:51:08 +00:00
Sharang Parnerkar
fe57e9e9bc Add Keycloak and OpenTelemetry env vars to .env.example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 23:51:08 +00:00
Sharang Parnerkar
c61497420a Add OpenTelemetry tracing and log export via OTLP
Shared telemetry init module in compliance-core (behind `telemetry` feature)
sets up OTLP/gRPC export for traces and logs when OTEL_EXPORTER_OTLP_ENDPOINT
is set. Falls back to console-only output when unset.

Both agent and dashboard now use the shared init. Docker Compose includes
an OTel Collector service with a config template for SigNoz, Grafana
Tempo/Loki, Jaeger, etc.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 23:51:08 +00:00
256 changed files with 2242 additions and 41885 deletions

View File

@@ -38,9 +38,6 @@ GIT_CLONE_BASE_PATH=/tmp/compliance-scanner/repos
DASHBOARD_PORT=8080
AGENT_API_URL=http://localhost:3001
# MCP Server
MCP_ENDPOINT_URL=http://localhost:8090
# Keycloak (required for authentication)
KEYCLOAK_URL=http://localhost:8080
KEYCLOAK_REALM=compliance

View File

@@ -3,8 +3,10 @@ name: CI
on:
push:
branches:
- main
- "**"
pull_request:
branches:
- main
env:
CARGO_TERM_COLOR: always
@@ -21,14 +23,13 @@ concurrency:
jobs:
# ---------------------------------------------------------------------------
# Stage 1: Lint, audit, and test (single job to share cargo cache)
# Stage 1: Code quality checks (run in parallel)
# ---------------------------------------------------------------------------
check:
name: Check
if: github.event_name == 'pull_request'
fmt:
name: Format
runs-on: docker
container:
image: rust:1.94-bookworm
image: rust:1.89-bookworm
steps:
- name: Checkout
run: |
@@ -36,161 +37,90 @@ jobs:
git remote add origin "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git"
git fetch --depth=1 origin "${GITHUB_SHA}"
git checkout FETCH_HEAD
- name: Install tools
run: |
rustup component add rustfmt clippy
curl -fsSL https://github.com/mozilla/sccache/releases/download/v0.9.1/sccache-v0.9.1-x86_64-unknown-linux-musl.tar.gz \
| tar xz --strip-components=1 -C /usr/local/bin/ sccache-v0.9.1-x86_64-unknown-linux-musl/sccache
chmod +x /usr/local/bin/sccache
cargo install cargo-audit --locked
- run: rustup component add rustfmt
# Format check does not compile, so sccache is not needed here.
- run: cargo fmt --all --check
env:
RUSTC_WRAPPER: ""
# Format (no compilation needed)
- name: Format
run: cargo fmt --all --check
env:
RUSTC_WRAPPER: ""
# Clippy (compiles once, sccache reuses across feature sets)
- name: Clippy (agent)
run: cargo clippy -p compliance-agent -- -D warnings
- name: Clippy (dashboard server)
run: cargo clippy -p compliance-dashboard --features server --no-default-features -- -D warnings
- name: Clippy (dashboard web)
run: cargo clippy -p compliance-dashboard --features web --no-default-features -- -D warnings
- name: Clippy (mcp)
run: cargo clippy -p compliance-mcp -- -D warnings
# Security audit
- name: Security Audit
run: cargo audit
env:
RUSTC_WRAPPER: ""
# Tests (reuses compilation artifacts from clippy)
- name: Tests (core + agent)
run: cargo test -p compliance-core -p compliance-agent --lib
- name: Tests (dashboard server)
run: cargo test -p compliance-dashboard --features server --no-default-features
- name: Tests (dashboard web)
run: cargo test -p compliance-dashboard --features web --no-default-features
- name: Show sccache stats
run: sccache --show-stats
if: always()
# ---------------------------------------------------------------------------
# Stage 2: Deploy (only on main, after checks pass)
# Each service only deploys when its relevant files changed.
# ---------------------------------------------------------------------------
detect-changes:
name: Detect Changes
clippy:
name: Clippy
runs-on: docker
if: github.ref == 'refs/heads/main'
container:
image: alpine:latest
outputs:
agent: ${{ steps.changes.outputs.agent }}
dashboard: ${{ steps.changes.outputs.dashboard }}
docs: ${{ steps.changes.outputs.docs }}
mcp: ${{ steps.changes.outputs.mcp }}
image: rust:1.89-bookworm
steps:
- name: Install git
run: apk add --no-cache git
- name: Checkout
run: |
git init
git remote add origin "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git"
git fetch --depth=2 origin "${GITHUB_SHA}"
git fetch --depth=1 origin "${GITHUB_SHA}"
git checkout FETCH_HEAD
- name: Detect changed paths
id: changes
- name: Install sccache
run: |
CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "")
echo "Changed files:"
echo "$CHANGED"
curl -fsSL https://github.com/mozilla/sccache/releases/download/v0.9.1/sccache-v0.9.1-x86_64-unknown-linux-musl.tar.gz \
| tar xz --strip-components=1 -C /usr/local/bin/ sccache-v0.9.1-x86_64-unknown-linux-musl/sccache
chmod +x /usr/local/bin/sccache
- run: rustup component add clippy
# Lint the agent (native only).
- name: Clippy (agent)
run: cargo clippy -p compliance-agent -- -D warnings
# Lint the dashboard for both feature sets independently.
# sccache deduplicates shared crates between the two compilations.
- name: Clippy (dashboard server)
run: cargo clippy -p compliance-dashboard --features server --no-default-features -- -D warnings
- name: Clippy (dashboard web)
run: cargo clippy -p compliance-dashboard --features web --no-default-features -- -D warnings
- name: Show sccache stats
run: sccache --show-stats
if: always()
# Agent: core libs, agent code, agent Dockerfile
if echo "$CHANGED" | grep -qE '^(compliance-core/|compliance-agent/|compliance-graph/|compliance-dast/|Dockerfile\.agent|Cargo\.(toml|lock))'; then
echo "agent=true" >> "$GITHUB_OUTPUT"
else
echo "agent=false" >> "$GITHUB_OUTPUT"
fi
# Dashboard: core libs, dashboard code, dashboard Dockerfile, assets
if echo "$CHANGED" | grep -qE '^(compliance-core/|compliance-dashboard/|Dockerfile\.dashboard|Dioxus\.toml|assets/|bin/|Cargo\.(toml|lock))'; then
echo "dashboard=true" >> "$GITHUB_OUTPUT"
else
echo "dashboard=false" >> "$GITHUB_OUTPUT"
fi
# Docs: docs folder, docs Dockerfile
if echo "$CHANGED" | grep -qE '^(docs/|Dockerfile\.docs)'; then
echo "docs=true" >> "$GITHUB_OUTPUT"
else
echo "docs=false" >> "$GITHUB_OUTPUT"
fi
# MCP: core libs, mcp code, mcp Dockerfile
if echo "$CHANGED" | grep -qE '^(compliance-core/|compliance-mcp/|Dockerfile\.mcp|Cargo\.(toml|lock))'; then
echo "mcp=true" >> "$GITHUB_OUTPUT"
else
echo "mcp=false" >> "$GITHUB_OUTPUT"
fi
deploy-agent:
name: Deploy Agent
audit:
name: Security Audit
runs-on: docker
needs: [detect-changes]
if: needs.detect-changes.outputs.agent == 'true'
if: github.ref == 'refs/heads/main'
container:
image: alpine:latest
image: rust:1.89-bookworm
steps:
- name: Trigger Coolify deploy
- name: Checkout
run: |
apk add --no-cache curl
curl -sf "${{ secrets.COOLIFY_WEBHOOK_AGENT }}" \
-H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
git init
git remote add origin "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git"
git fetch --depth=1 origin "${GITHUB_SHA}"
git checkout FETCH_HEAD
- run: cargo install cargo-audit
env:
RUSTC_WRAPPER: ""
- run: cargo audit
env:
RUSTC_WRAPPER: ""
deploy-dashboard:
name: Deploy Dashboard
# ---------------------------------------------------------------------------
# Stage 2: Tests (only after all quality checks pass)
# ---------------------------------------------------------------------------
test:
name: Tests
runs-on: docker
needs: [detect-changes]
if: needs.detect-changes.outputs.dashboard == 'true'
needs: [fmt, clippy, audit]
container:
image: alpine:latest
image: rust:1.89-bookworm
steps:
- name: Trigger Coolify deploy
- name: Checkout
run: |
apk add --no-cache curl
curl -sf "${{ secrets.COOLIFY_WEBHOOK_DASHBOARD }}" \
-H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
deploy-docs:
name: Deploy Docs
runs-on: docker
needs: [detect-changes]
if: needs.detect-changes.outputs.docs == 'true'
container:
image: alpine:latest
steps:
- name: Trigger Coolify deploy
git init
git remote add origin "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git"
git fetch --depth=1 origin "${GITHUB_SHA}"
git checkout FETCH_HEAD
- name: Install sccache
run: |
apk add --no-cache curl
curl -sf "${{ secrets.COOLIFY_WEBHOOK_DOCS }}" \
-H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
deploy-mcp:
name: Deploy MCP
runs-on: docker
needs: [detect-changes]
if: needs.detect-changes.outputs.mcp == 'true'
container:
image: alpine:latest
steps:
- name: Trigger Coolify deploy
run: |
apk add --no-cache curl
curl -sf "${{ secrets.COOLIFY_WEBHOOK_MCP }}" \
-H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
curl -fsSL https://github.com/mozilla/sccache/releases/download/v0.9.1/sccache-v0.9.1-x86_64-unknown-linux-musl.tar.gz \
| tar xz --strip-components=1 -C /usr/local/bin/ sccache-v0.9.1-x86_64-unknown-linux-musl/sccache
chmod +x /usr/local/bin/sccache
- name: Run tests (core + agent)
run: cargo test -p compliance-core -p compliance-agent
- name: Run tests (dashboard server)
run: cargo test -p compliance-dashboard --features server --no-default-features
- name: Run tests (dashboard web)
run: cargo test -p compliance-dashboard --features web --no-default-features
- name: Show sccache stats
run: sccache --show-stats
if: always()

View File

@@ -1,52 +0,0 @@
name: Nightly E2E Tests
on:
schedule:
- cron: '0 3 * * *' # 3 AM UTC daily
workflow_dispatch: # Allow manual trigger
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-D warnings"
RUSTC_WRAPPER: /usr/local/bin/sccache
SCCACHE_DIR: /tmp/sccache
TEST_MONGODB_URI: "mongodb://root:example@mongo:27017/?authSource=admin"
concurrency:
group: nightly-e2e
cancel-in-progress: true
jobs:
e2e:
name: E2E Tests
runs-on: docker
container:
image: rust:1.94-bookworm
services:
mongo:
image: mongo:7
env:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: example
steps:
- name: Checkout
run: |
git init
git remote add origin "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git"
git fetch --depth=1 origin "${GITHUB_SHA:-refs/heads/main}"
git checkout FETCH_HEAD
- name: Install sccache
run: |
curl -fsSL https://github.com/mozilla/sccache/releases/download/v0.9.1/sccache-v0.9.1-x86_64-unknown-linux-musl.tar.gz \
| tar xz --strip-components=1 -C /usr/local/bin/ sccache-v0.9.1-x86_64-unknown-linux-musl/sccache
chmod +x /usr/local/bin/sccache
env:
RUSTC_WRAPPER: ""
- name: Run E2E tests
run: cargo test -p compliance-agent --test e2e -- --test-threads=4
- name: Show sccache stats
run: sccache --show-stats
if: always()

3
.gitignore vendored
View File

@@ -4,6 +4,3 @@
*.swo
*~
.DS_Store
.playwright-mcp/
report-preview-full.png
compliance-dashboard/attack-chain-final.html

898
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,6 @@ members = [
"compliance-dashboard",
"compliance-graph",
"compliance-dast",
"compliance-mcp",
]
resolver = "2"
@@ -29,7 +28,3 @@ hex = "0.4"
uuid = { version = "1", features = ["v4", "serde"] }
secrecy = { version = "0.10", features = ["serde"] }
regex = "1"
zip = { version = "2", features = ["aes-crypto", "deflate"] }
dashmap = "6"
tokio-stream = { version = "0.1", features = ["sync"] }
aes-gcm = "0.10"

View File

@@ -1,41 +1,17 @@
FROM rust:1.94-bookworm AS builder
FROM rust:1.89-bookworm AS builder
WORKDIR /app
COPY . .
RUN cargo build --release -p compliance-agent
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y ca-certificates libssl3 git curl python3 python3-pip npm golang-go php-cli && rm -rf /var/lib/apt/lists/*
# Install Cargo (minimal, for cargo metadata / cargo audit / generate-lockfile)
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
ENV PATH="/root/.cargo/bin:${PATH}"
RUN cargo install cargo-audit
# Install Composer for PHP dependency resolution
RUN curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/local/bin --filename=composer
# Install Bundler for Ruby dependency resolution
RUN apt-get update && apt-get install -y ruby && rm -rf /var/lib/apt/lists/* && gem install bundler
RUN apt-get update && apt-get install -y ca-certificates libssl3 git curl && rm -rf /var/lib/apt/lists/*
# Install syft for SBOM generation
RUN curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin
# Install gitleaks for secret detection
RUN curl -sSfL https://github.com/gitleaks/gitleaks/releases/download/v8.21.2/gitleaks_8.21.2_linux_x64.tar.gz \
| tar -xz -C /usr/local/bin gitleaks
# Install semgrep for static analysis
RUN pip3 install --break-system-packages semgrep
# Install ruff for Python linting
RUN pip3 install --break-system-packages ruff
COPY --from=builder /app/target/release/compliance-agent /usr/local/bin/compliance-agent
# Ensure SSH key directory exists
RUN mkdir -p /data/compliance-scanner/ssh
EXPOSE 3001 3002
ENTRYPOINT ["compliance-agent"]

View File

@@ -1,12 +1,9 @@
FROM rust:1.94-bookworm AS builder
FROM rust:1.89-bookworm AS builder
RUN cargo install dioxus-cli --version 0.7.3 --locked
ARG DOCS_URL=/docs
RUN cargo install dioxus-cli --version 0.7.3
WORKDIR /app
COPY . .
ENV DOCS_URL=${DOCS_URL}
RUN dx build --release --package compliance-dashboard
FROM debian:bookworm-slim
@@ -16,7 +13,6 @@ WORKDIR /app
COPY --from=builder /app/target/dx/compliance-dashboard/release/web/compliance-dashboard /app/compliance-dashboard
COPY --from=builder /app/target/dx/compliance-dashboard/release/web/public /app/public
ENV IP=0.0.0.0
EXPOSE 8080
ENTRYPOINT ["./compliance-dashboard"]

View File

@@ -1,14 +0,0 @@
FROM node:22-alpine AS builder
WORKDIR /app
COPY docs/package.json docs/package-lock.json ./
RUN npm ci
COPY docs/ .
RUN npm run build
FROM nginx:alpine
RUN rm /etc/nginx/conf.d/default.conf
COPY docs/nginx.conf /etc/nginx/conf.d/default.conf
COPY --from=builder /app/.vitepress/dist /usr/share/nginx/html
EXPOSE 80

View File

@@ -1,16 +0,0 @@
FROM rust:1.94-bookworm AS builder
WORKDIR /app
COPY . .
RUN cargo build --release -p compliance-mcp
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y ca-certificates libssl3 && rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/target/release/compliance-mcp /usr/local/bin/compliance-mcp
EXPOSE 8090
ENV MCP_PORT=8090
ENTRYPOINT ["compliance-mcp"]

124
README.md
View File

@@ -9,7 +9,7 @@
</p>
<p align="center">
<a href="https://www.rust-lang.org/"><img src="https://img.shields.io/badge/Rust-1.94-orange?logo=rust&logoColor=white" alt="Rust" /></a>
<a href="https://www.rust-lang.org/"><img src="https://img.shields.io/badge/Rust-1.89-orange?logo=rust&logoColor=white" alt="Rust" /></a>
<a href="https://dioxuslabs.com/"><img src="https://img.shields.io/badge/Dioxus-0.7-blue?logo=webassembly&logoColor=white" alt="Dioxus" /></a>
<a href="https://www.mongodb.com/"><img src="https://img.shields.io/badge/MongoDB-8.0-47A248?logo=mongodb&logoColor=white" alt="MongoDB" /></a>
<a href="https://axum.rs/"><img src="https://img.shields.io/badge/Axum-0.8-4A4A55?logo=rust&logoColor=white" alt="Axum" /></a>
@@ -28,9 +28,9 @@
## About
Compliance Scanner is an autonomous agent that continuously monitors git repositories for security vulnerabilities, GDPR/OAuth compliance patterns, and dependency risks. It creates issues in external trackers (GitHub/GitLab/Jira/Gitea) with evidence and remediation suggestions, reviews pull requests with multi-pass LLM analysis, runs autonomous penetration tests, and exposes a Dioxus-based dashboard for visualization.
Compliance Scanner is an autonomous agent that continuously monitors git repositories for security vulnerabilities, GDPR/OAuth compliance patterns, and dependency risks. It creates issues in external trackers (GitHub/GitLab/Jira) with evidence and remediation suggestions, reviews pull requests, and exposes a Dioxus-based dashboard for visualization.
> **How it works:** The agent runs as a lazy daemon -- it only scans when new commits are detected, triggered by cron schedules or webhooks. LLM-powered triage filters out false positives and generates actionable remediation with multi-language awareness.
> **How it works:** The agent runs as a lazy daemon -- it only scans when new commits are detected, triggered by cron schedules or webhooks. LLM-powered triage filters out false positives and generates actionable remediation.
## Features
@@ -41,38 +41,31 @@ Compliance Scanner is an autonomous agent that continuously monitors git reposit
| **CVE Monitoring** | OSV.dev batch queries, NVD CVSS enrichment, SearXNG context |
| **GDPR Patterns** | Detect PII logging, missing consent, hardcoded retention, missing deletion |
| **OAuth Patterns** | Detect implicit grant, missing PKCE, token in localStorage, token in URLs |
| **LLM Triage** | Multi-language-aware confidence scoring (Rust, Python, Go, Java, Ruby, PHP, C++) |
| **Issue Creation** | Auto-create issues in GitHub, GitLab, Jira, or Gitea with dedup via fingerprints |
| **PR Reviews** | Multi-pass security review (logic, security, convention, complexity) with dedup |
| **DAST Scanning** | Black-box security testing with endpoint discovery and parameter fuzzing |
| **AI Pentesting** | Autonomous LLM-orchestrated penetration testing with encrypted reports |
| **Code Graph** | Interactive code knowledge graph with impact analysis |
| **AI Chat (RAG)** | Natural language Q&A grounded in repository source code |
| **Help Assistant** | Documentation-grounded help chat accessible from every dashboard page |
| **MCP Server** | Expose live security data to Claude, Cursor, and other AI tools |
| **Dashboard** | Fullstack Dioxus UI with findings, SBOM, issues, DAST, pentest, and graph |
| **Webhooks** | GitHub, GitLab, and Gitea webhook receivers for push/PR events |
| **Finding Dedup** | SHA-256 fingerprint dedup for SAST, CWE-based dedup for DAST findings |
| **LLM Triage** | Confidence scoring via LiteLLM to filter false positives |
| **Issue Creation** | Auto-create issues in GitHub, GitLab, or Jira with code evidence |
| **PR Reviews** | Post security review comments on pull requests |
| **Dashboard** | Fullstack Dioxus UI with findings, SBOM, issues, and statistics |
| **Webhooks** | GitHub (HMAC-SHA256) and GitLab webhook receivers for push/PR events |
## Architecture
```
┌──────────────────────────────────────────────────────────────────────────
Cargo Workspace
├──────────────┬──────────────────┬──────────────┬──────────┬─────────────┤
│ compliance- │ compliance- │ compliance- │ complian-│ compliance-
│ core (lib) │ agent (bin) │ dashboard │ ce-graph │ mcp (bin)
│ │ (bin) │ (lib) │
Models │ Scan Pipeline │ Dioxus 0.7 │ Tree- │ MCP Server
Traits │ LLM Client │ Fullstack UI │ sitter │ Live data
ConfigIssue Trackers │ Help Chat │ Graph │ for AI
ErrorsPentest Engine │ Server Fns │ Embedds │ tools
│ DAST Tools │ RAG │
│ │ REST API │ │ │
│ │ Webhooks │ │ │
└──────────────┴──────────────────┴──────────────┴──────────┴─────────────┘
MongoDB (shared)
┌─────────────────────────────────────────────────────────────┐
│ Cargo Workspace │
├──────────────┬──────────────────┬───────────────────────────┤
│ compliance- │ compliance- │ compliance-
│ core │ agent │ dashboard
(lib) │ (bin) │ (bin, Dioxus 0.7.3)
│ │
Models │ Scan Pipeline │ Fullstack Web UI
TraitsLLM Client │ Server Functions
ConfigIssue Trackers │ Charts + Tables
Errors │ SchedulerSettings Page
│ │ REST API │ │
│ │ Webhooks │ │
└──────────────┴──────────────────┴───────────────────────────┘
MongoDB (shared)
```
## Scan Pipeline (7 Stages)
@@ -91,22 +84,17 @@ Compliance Scanner is an autonomous agent that continuously monitors git reposit
|-------|-----------|
| Shared Library | `compliance-core` -- models, traits, config |
| Agent | Axum REST API, git2, tokio-cron-scheduler, Semgrep, Syft |
| Dashboard | Dioxus 0.7.3 fullstack, Tailwind CSS 4 |
| Code Graph | `compliance-graph` -- tree-sitter parsing, embeddings, RAG |
| MCP Server | `compliance-mcp` -- Model Context Protocol for AI tools |
| DAST | `compliance-dast` -- dynamic application security testing |
| Dashboard | Dioxus 0.7.3 fullstack, Tailwind CSS |
| Database | MongoDB with typed collections |
| LLM | LiteLLM (OpenAI-compatible API for chat, triage, embeddings) |
| Issue Trackers | GitHub (octocrab), GitLab (REST v4), Jira (REST v3), Gitea |
| LLM | LiteLLM (OpenAI-compatible API) |
| Issue Trackers | GitHub (octocrab), GitLab (REST v4), Jira (REST v3) |
| CVE Sources | OSV.dev, NVD, SearXNG |
| Auth | Keycloak (OAuth2/PKCE, SSO) |
| Browser Automation | Chromium (headless, for pentesting and PDF generation) |
## Getting Started
### Prerequisites
- Rust 1.94+
- Rust 1.89+
- [Dioxus CLI](https://dioxuslabs.com/learn/0.7/getting_started) (`dx`)
- MongoDB
- Docker & Docker Compose (optional)
@@ -163,35 +151,20 @@ The agent exposes a REST API on port 3001:
| `GET` | `/api/v1/sbom` | List dependencies |
| `GET` | `/api/v1/issues` | List cross-tracker issues |
| `GET` | `/api/v1/scan-runs` | Scan execution history |
| `GET` | `/api/v1/graph/:repo_id` | Code knowledge graph |
| `POST` | `/api/v1/graph/:repo_id/build` | Trigger graph build |
| `GET` | `/api/v1/dast/targets` | List DAST targets |
| `POST` | `/api/v1/dast/targets` | Add DAST target |
| `GET` | `/api/v1/dast/findings` | List DAST findings |
| `POST` | `/api/v1/chat/:repo_id` | RAG-powered code chat |
| `POST` | `/api/v1/help/chat` | Documentation-grounded help chat |
| `POST` | `/api/v1/pentest/sessions` | Create pentest session |
| `POST` | `/api/v1/pentest/sessions/:id/export` | Export encrypted pentest report |
| `POST` | `/webhook/github` | GitHub webhook (HMAC-SHA256) |
| `POST` | `/webhook/gitlab` | GitLab webhook (token verify) |
| `POST` | `/webhook/gitea` | Gitea webhook |
## Dashboard Pages
| Page | Description |
|------|-------------|
| **Overview** | Stat cards, severity distribution, AI chat cards, MCP status |
| **Repositories** | Add/manage tracked repos, trigger scans, webhook config |
| **Findings** | Filterable table by severity, type, status, scanner |
| **Overview** | Stat cards, severity distribution chart |
| **Repositories** | Add/manage tracked repos, trigger scans |
| **Findings** | Filterable table by severity, type, status |
| **Finding Detail** | Code evidence, remediation, suggested fix, linked issue |
| **SBOM** | Dependency inventory with vulnerability badges, license summary |
| **Issues** | Cross-tracker view (GitHub + GitLab + Jira + Gitea) |
| **Code Graph** | Interactive architecture visualization, impact analysis |
| **AI Chat** | RAG-powered Q&A about repository code |
| **DAST** | Dynamic scanning targets, findings, and scan history |
| **Pentest** | AI-driven pentest sessions, attack chain visualization |
| **MCP Servers** | Model Context Protocol server management |
| **Help Chat** | Floating assistant (available on every page) for product Q&A |
| **SBOM** | Dependency inventory with vulnerability badges |
| **Issues** | Cross-tracker view (GitHub + GitLab + Jira) |
| **Settings** | Configure LiteLLM, tracker tokens, SearXNG URL |
## Project Structure
@@ -200,24 +173,19 @@ compliance-scanner/
├── compliance-core/ Shared library (models, traits, config, errors)
├── compliance-agent/ Agent daemon (pipeline, LLM, trackers, API, webhooks)
│ └── src/
│ ├── pipeline/ 7-stage scan pipeline, dedup, PR reviews, code review
│ ├── llm/ LiteLLM client, triage, descriptions, fixes, review prompts
│ ├── trackers/ GitHub, GitLab, Jira, Gitea integrations
│ ├── pentest/ AI-driven pentest orchestrator, tools, reports
── rag/ RAG pipeline, chunking, embedding
│ ├── api/ REST API (Axum), help chat
│ └── webhooks/ GitHub, GitLab, Gitea webhook receivers
│ ├── pipeline/ 7-stage scan pipeline
│ ├── llm/ LiteLLM client, triage, descriptions, fixes, PR review
│ ├── trackers/ GitHub, GitLab, Jira integrations
│ ├── api/ REST API (Axum)
── webhooks/ GitHub + GitLab webhook receivers
├── compliance-dashboard/ Dioxus fullstack dashboard
│ └── src/
│ ├── components/ Reusable UI (sidebar, help chat, attack chain, etc.)
│ ├── infrastructure/ Server functions, DB, config, auth
│ └── pages/ Full page views (overview, DAST, pentest, graph, etc.)
├── compliance-graph/ Code knowledge graph (tree-sitter, embeddings, RAG)
├── compliance-dast/ Dynamic application security testing
├── compliance-mcp/ Model Context Protocol server
├── docs/ VitePress documentation site
│ ├── components/ Reusable UI components
│ ├── infrastructure/ Server functions, DB, config
│ └── pages/ Full page views
├── assets/ Static assets (CSS, icons)
── styles/ Tailwind input stylesheet
── styles/ Tailwind input stylesheet
└── bin/ Dashboard binary entrypoint
```
## External Services
@@ -225,12 +193,10 @@ compliance-scanner/
| Service | Purpose | Default URL |
|---------|---------|-------------|
| MongoDB | Persistence | `mongodb://localhost:27017` |
| LiteLLM | LLM proxy (chat, triage, embeddings) | `http://localhost:4000` |
| LiteLLM | LLM proxy for triage and generation | `http://localhost:4000` |
| SearXNG | CVE context search | `http://localhost:8888` |
| Keycloak | Authentication (OAuth2/PKCE, SSO) | `http://localhost:8080` |
| Semgrep | SAST scanning | CLI tool |
| Syft | SBOM generation | CLI tool |
| Chromium | Headless browser (pentesting, PDF) | Managed via Docker |
---

View File

@@ -300,84 +300,6 @@ tr:hover {
color: var(--text-secondary);
}
/* Sidebar User Section */
.sidebar-user {
display: flex;
align-items: center;
gap: 10px;
padding: 12px 14px;
margin: 8px;
border-top: 1px solid var(--border);
padding-top: 16px;
}
.sidebar-user-collapsed {
flex-direction: column;
gap: 8px;
padding: 12px 4px;
margin: 8px 4px;
}
.user-avatar {
width: 34px;
height: 34px;
border-radius: 10px;
background: linear-gradient(135deg, rgba(56, 189, 248, 0.2), rgba(56, 189, 248, 0.08));
border: 1px solid rgba(56, 189, 248, 0.15);
display: flex;
align-items: center;
justify-content: center;
flex-shrink: 0;
}
.avatar-initials {
font-size: 13px;
font-weight: 700;
color: var(--accent);
line-height: 1;
}
.avatar-img {
width: 100%;
height: 100%;
border-radius: 10px;
object-fit: cover;
}
.user-name {
flex: 1;
font-size: 13px;
font-weight: 500;
color: var(--text-primary);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
min-width: 0;
}
.logout-btn {
display: flex;
align-items: center;
justify-content: center;
width: 32px;
height: 32px;
border-radius: 8px;
color: var(--text-secondary);
text-decoration: none;
flex-shrink: 0;
transition: all 0.15s;
}
.logout-btn:hover {
background: rgba(239, 68, 68, 0.12);
color: #fca5a5;
}
.logout-btn-collapsed {
width: 34px;
height: 34px;
}
@media (max-width: 768px) {
.sidebar {
transform: translateX(-100%);
@@ -391,218 +313,3 @@ tr:hover {
padding: 16px;
}
}
/* ── Utility classes ────────────────────────────────────── */
.mb-3 { margin-bottom: 12px; }
.mb-4 { margin-bottom: 16px; }
.text-secondary { color: var(--text-secondary); }
.btn-sm {
padding: 4px 10px;
font-size: 12px;
}
.btn-danger {
background: var(--danger);
color: #fff;
}
.btn-danger:hover {
background: #dc2626;
}
.btn-secondary {
background: var(--bg-secondary);
color: var(--text-primary);
border: 1px solid var(--border);
}
.btn-secondary:hover {
background: var(--bg-primary);
}
/* ── Modal ──────────────────────────────────────────────── */
.modal-overlay {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.6);
backdrop-filter: blur(4px);
display: flex;
align-items: center;
justify-content: center;
z-index: 1000;
}
.modal-dialog {
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: 12px;
padding: 24px;
max-width: 440px;
width: 90%;
max-height: 85vh;
overflow-y: auto;
}
.modal-dialog h3 {
margin-bottom: 12px;
}
.modal-dialog p {
margin-bottom: 8px;
font-size: 14px;
color: var(--text-secondary);
}
.modal-warning {
color: var(--warning) !important;
font-size: 13px !important;
}
.modal-actions {
display: flex;
gap: 8px;
justify-content: flex-end;
margin-top: 16px;
}
/* ── MCP Servers ────────────────────────────────────────── */
.mcp-server-card {
padding: 20px;
}
.mcp-server-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: 12px;
}
.mcp-server-title {
display: flex;
align-items: center;
gap: 10px;
}
.mcp-server-title h3 {
font-size: 16px;
font-weight: 600;
margin: 0;
}
.mcp-server-actions {
display: flex;
gap: 6px;
}
.mcp-status {
display: inline-flex;
align-items: center;
padding: 2px 10px;
border-radius: 20px;
font-size: 11px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.mcp-status-running {
background: rgba(34, 197, 94, 0.15);
color: var(--success);
}
.mcp-status-stopped {
background: rgba(148, 163, 184, 0.15);
color: var(--text-secondary);
}
.mcp-status-error {
background: rgba(239, 68, 68, 0.15);
color: var(--danger);
}
.mcp-config-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 12px;
margin-bottom: 16px;
}
.mcp-config-item {
display: flex;
flex-direction: column;
gap: 4px;
}
.mcp-config-label {
font-size: 11px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
color: var(--text-secondary);
}
.mcp-config-value {
font-size: 13px;
color: var(--text-primary);
word-break: break-all;
}
.mcp-form-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0 16px;
}
.mcp-tools-section {
margin-bottom: 16px;
}
.mcp-tools-list {
display: flex;
flex-wrap: wrap;
gap: 6px;
margin-top: 6px;
}
.mcp-tool-badge {
display: inline-block;
padding: 3px 10px;
background: rgba(56, 189, 248, 0.1);
border: 1px solid rgba(56, 189, 248, 0.2);
border-radius: 6px;
font-size: 12px;
font-family: 'JetBrains Mono', monospace;
color: var(--accent);
}
.mcp-token-section {
margin-bottom: 12px;
}
.mcp-token-row {
display: flex;
align-items: center;
gap: 8px;
margin-top: 6px;
}
.mcp-token-value {
flex: 1;
padding: 6px 10px;
background: var(--bg-primary);
border: 1px solid var(--border);
border-radius: 6px;
font-size: 12px;
font-family: 'JetBrains Mono', monospace;
color: var(--text-secondary);
word-break: break-all;
}
.mcp-meta {
padding-top: 12px;
border-top: 1px solid var(--border);
font-size: 12px;
}

View File

@@ -36,20 +36,3 @@ base64 = "0.22"
urlencoding = "2"
futures-util = "0.3"
jsonwebtoken = "9"
zip = { workspace = true }
aes-gcm = { workspace = true }
tokio-tungstenite = { version = "0.26", features = ["rustls-tls-webpki-roots"] }
futures-core = "0.3"
dashmap = { workspace = true }
tokio-stream = { workspace = true }
[dev-dependencies]
compliance-core = { workspace = true, features = ["mongodb"] }
reqwest = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
mongodb = { workspace = true }
uuid = { workspace = true }
secrecy = { workspace = true }
axum = "0.8"
tower-http = { version = "0.6", features = ["cors"] }

View File

@@ -1,30 +1,17 @@
use std::sync::Arc;
use dashmap::DashMap;
use tokio::sync::{broadcast, watch, Semaphore};
use compliance_core::models::pentest::PentestEvent;
use compliance_core::AgentConfig;
use crate::database::Database;
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::PipelineOrchestrator;
/// Default maximum concurrent pentest sessions.
const DEFAULT_MAX_CONCURRENT_SESSIONS: usize = 5;
#[derive(Clone)]
pub struct ComplianceAgent {
pub config: AgentConfig,
pub db: Database,
pub llm: Arc<LlmClient>,
pub http: reqwest::Client,
/// Per-session broadcast senders for SSE streaming.
pub session_streams: Arc<DashMap<String, broadcast::Sender<PentestEvent>>>,
/// Per-session pause controls (true = paused).
pub session_pause: Arc<DashMap<String, watch::Sender<bool>>>,
/// Semaphore limiting concurrent pentest sessions.
pub session_semaphore: Arc<Semaphore>,
}
impl ComplianceAgent {
@@ -40,9 +27,6 @@ impl ComplianceAgent {
db,
llm,
http: reqwest::Client::new(),
session_streams: Arc::new(DashMap::new()),
session_pause: Arc::new(DashMap::new()),
session_semaphore: Arc::new(Semaphore::new(DEFAULT_MAX_CONCURRENT_SESSIONS)),
}
}
@@ -59,85 +43,4 @@ impl ComplianceAgent {
);
orchestrator.run(repo_id, trigger).await
}
/// Run a PR review: scan the diff and post review comments.
pub async fn run_pr_review(
&self,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), crate::error::AgentError> {
let repo = self
.db
.repositories()
.find_one(mongodb::bson::doc! {
"_id": mongodb::bson::oid::ObjectId::parse_str(repo_id)
.map_err(|e| crate::error::AgentError::Other(e.to_string()))?
})
.await?
.ok_or_else(|| {
crate::error::AgentError::Other(format!("Repository {repo_id} not found"))
})?;
let orchestrator = PipelineOrchestrator::new(
self.config.clone(),
self.db.clone(),
self.llm.clone(),
self.http.clone(),
);
orchestrator
.run_pr_review(&repo, repo_id, pr_number, base_sha, head_sha)
.await
}
// ── Session stream management ──────────────────────────────────
/// Register a broadcast sender for a session. Returns the sender.
pub fn register_session_stream(&self, session_id: &str) -> broadcast::Sender<PentestEvent> {
let (tx, _) = broadcast::channel(256);
self.session_streams
.insert(session_id.to_string(), tx.clone());
tx
}
/// Subscribe to a session's broadcast stream.
pub fn subscribe_session(&self, session_id: &str) -> Option<broadcast::Receiver<PentestEvent>> {
self.session_streams
.get(session_id)
.map(|tx| tx.subscribe())
}
// ── Session pause/resume management ────────────────────────────
/// Register a pause control for a session. Returns the watch receiver.
pub fn register_pause_control(&self, session_id: &str) -> watch::Receiver<bool> {
let (tx, rx) = watch::channel(false);
self.session_pause.insert(session_id.to_string(), tx);
rx
}
/// Pause a session.
pub fn pause_session(&self, session_id: &str) -> bool {
if let Some(tx) = self.session_pause.get(session_id) {
tx.send(true).is_ok()
} else {
false
}
}
/// Resume a session.
pub fn resume_session(&self, session_id: &str) -> bool {
if let Some(tx) = self.session_pause.get(session_id) {
tx.send(false).is_ok()
} else {
false
}
}
/// Clean up all per-session resources.
pub fn cleanup_session(&self, session_id: &str) {
self.session_streams.remove(session_id);
self.session_pause.remove(session_id);
}
}

View File

@@ -17,7 +17,6 @@ use super::ApiResponse;
type AgentExt = Extension<Arc<ComplianceAgent>>;
/// POST /api/v1/chat/:repo_id — Send a chat message with RAG context
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn chat(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -90,13 +89,10 @@ pub async fn chat(
};
let system_prompt = format!(
"You are a code assistant for this repository. Answer questions using the code context below.\n\n\
Rules:\n\
- Reference specific files, functions, and line numbers\n\
- Show code snippets when they help explain the answer\n\
- If the context is insufficient, say what's missing rather than guessing\n\
- Be concise — lead with the answer, then explain if needed\n\
- For security questions, note relevant CWEs and link to the finding if one exists\n\n\
"You are an expert code assistant for a software repository. \
Answer the user's question based on the code context below. \
Reference specific files and functions when relevant. \
If the context doesn't contain enough information, say so.\n\n\
## Code Context\n\n{code_context}"
);
@@ -130,7 +126,6 @@ pub async fn chat(
}
/// POST /api/v1/chat/:repo_id/build-embeddings — Trigger embedding build
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn build_embeddings(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -192,13 +187,7 @@ pub async fn build_embeddings(
}
};
let creds = crate::pipeline::git::RepoCredentials {
ssh_key_path: Some(agent_clone.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let git_ops =
crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path, creds);
let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
Ok(p) => p,
Err(e) => {
@@ -231,7 +220,6 @@ pub async fn build_embeddings(
}
/// GET /api/v1/chat/:repo_id/status — Get latest embedding build status
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn embedding_status(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,

View File

@@ -42,7 +42,6 @@ fn default_rate_limit() -> u32 {
}
/// GET /api/v1/dast/targets — List DAST targets
#[tracing::instrument(skip_all)]
pub async fn list_targets(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
@@ -63,10 +62,7 @@ pub async fn list_targets(
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch DAST targets: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
@@ -77,7 +73,6 @@ pub async fn list_targets(
}
/// POST /api/v1/dast/targets — Add a new DAST target
#[tracing::instrument(skip_all)]
pub async fn add_target(
Extension(agent): AgentExt,
Json(req): Json<AddTargetRequest>,
@@ -104,7 +99,6 @@ pub async fn add_target(
}
/// POST /api/v1/dast/targets/:id/scan — Trigger DAST scan
#[tracing::instrument(skip_all, fields(target_id = %id))]
pub async fn trigger_scan(
Extension(agent): AgentExt,
Path(id): Path<String>,
@@ -144,7 +138,6 @@ pub async fn trigger_scan(
}
/// GET /api/v1/dast/scan-runs — List DAST scan runs
#[tracing::instrument(skip_all)]
pub async fn list_scan_runs(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
@@ -166,10 +159,7 @@ pub async fn list_scan_runs(
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch DAST scan runs: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
@@ -180,7 +170,6 @@ pub async fn list_scan_runs(
}
/// GET /api/v1/dast/findings — List DAST findings
#[tracing::instrument(skip_all)]
pub async fn list_findings(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
@@ -202,10 +191,7 @@ pub async fn list_findings(
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch DAST findings: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
@@ -216,7 +202,6 @@ pub async fn list_findings(
}
/// GET /api/v1/dast/findings/:id — Finding detail with evidence
#[tracing::instrument(skip_all, fields(finding_id = %id))]
pub async fn get_finding(
Extension(agent): AgentExt,
Path(id): Path<String>,

View File

@@ -1,481 +0,0 @@
use compliance_core::models::TrackerType;
use serde::{Deserialize, Serialize};
use compliance_core::models::ScanRun;
#[derive(Deserialize)]
pub struct PaginationParams {
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
pub limit: i64,
}
pub(crate) fn default_page() -> u64 {
1
}
pub(crate) fn default_limit() -> i64 {
50
}
#[derive(Deserialize)]
pub struct FindingsFilter {
#[serde(default)]
pub repo_id: Option<String>,
#[serde(default)]
pub severity: Option<String>,
#[serde(default)]
pub scan_type: Option<String>,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub q: Option<String>,
#[serde(default)]
pub sort_by: Option<String>,
#[serde(default)]
pub sort_order: Option<String>,
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
pub limit: i64,
}
#[derive(Serialize)]
pub struct ApiResponse<T: Serialize> {
pub data: T,
#[serde(skip_serializing_if = "Option::is_none")]
pub total: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub page: Option<u64>,
}
#[derive(Serialize)]
pub struct OverviewStats {
pub total_repositories: u64,
pub total_findings: u64,
pub critical_findings: u64,
pub high_findings: u64,
pub medium_findings: u64,
pub low_findings: u64,
pub total_sbom_entries: u64,
pub total_cve_alerts: u64,
pub total_issues: u64,
pub recent_scans: Vec<ScanRun>,
}
#[derive(Deserialize)]
pub struct AddRepositoryRequest {
pub name: String,
pub git_url: String,
#[serde(default = "default_branch")]
pub default_branch: String,
pub auth_token: Option<String>,
pub auth_username: Option<String>,
pub tracker_type: Option<TrackerType>,
pub tracker_owner: Option<String>,
pub tracker_repo: Option<String>,
pub tracker_token: Option<String>,
pub scan_schedule: Option<String>,
}
#[derive(Deserialize)]
pub struct UpdateRepositoryRequest {
pub name: Option<String>,
pub default_branch: Option<String>,
pub auth_token: Option<String>,
pub auth_username: Option<String>,
pub tracker_type: Option<TrackerType>,
pub tracker_owner: Option<String>,
pub tracker_repo: Option<String>,
pub tracker_token: Option<String>,
pub scan_schedule: Option<String>,
}
fn default_branch() -> String {
"main".to_string()
}
#[derive(Deserialize)]
pub struct UpdateStatusRequest {
pub status: String,
}
#[derive(Deserialize)]
pub struct BulkUpdateStatusRequest {
pub ids: Vec<String>,
pub status: String,
}
#[derive(Deserialize)]
pub struct UpdateFeedbackRequest {
pub feedback: String,
}
#[derive(Deserialize)]
pub struct SbomFilter {
#[serde(default)]
pub repo_id: Option<String>,
#[serde(default)]
pub package_manager: Option<String>,
#[serde(default)]
pub q: Option<String>,
#[serde(default)]
pub has_vulns: Option<bool>,
#[serde(default)]
pub license: Option<String>,
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
pub limit: i64,
}
#[derive(Deserialize)]
pub struct SbomExportParams {
pub repo_id: String,
#[serde(default = "default_export_format")]
pub format: String,
}
fn default_export_format() -> String {
"cyclonedx".to_string()
}
#[derive(Deserialize)]
pub struct SbomDiffParams {
pub repo_a: String,
pub repo_b: String,
}
#[derive(Serialize)]
pub struct LicenseSummary {
pub license: String,
pub count: u64,
pub is_copyleft: bool,
pub packages: Vec<String>,
}
#[derive(Serialize)]
pub struct SbomDiffResult {
pub only_in_a: Vec<SbomDiffEntry>,
pub only_in_b: Vec<SbomDiffEntry>,
pub version_changed: Vec<SbomVersionDiff>,
pub common_count: u64,
}
#[derive(Serialize)]
pub struct SbomDiffEntry {
pub name: String,
pub version: String,
pub package_manager: String,
}
#[derive(Serialize)]
pub struct SbomVersionDiff {
pub name: String,
pub package_manager: String,
pub version_a: String,
pub version_b: String,
}
pub(crate) type AgentExt = axum::extract::Extension<std::sync::Arc<crate::agent::ComplianceAgent>>;
pub(crate) type ApiResult<T> = Result<axum::Json<ApiResponse<T>>, axum::http::StatusCode>;
pub(crate) async fn collect_cursor_async<T: serde::de::DeserializeOwned + Unpin + Send>(
mut cursor: mongodb::Cursor<T>,
) -> Vec<T> {
use futures_util::StreamExt;
let mut items = Vec::new();
while let Some(result) = cursor.next().await {
match result {
Ok(item) => items.push(item),
Err(e) => tracing::warn!("Failed to deserialize document: {e}"),
}
}
items
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// ── PaginationParams ─────────────────────────────────────────
#[test]
fn pagination_params_defaults() {
let p: PaginationParams = serde_json::from_str("{}").unwrap();
assert_eq!(p.page, 1);
assert_eq!(p.limit, 50);
}
#[test]
fn pagination_params_custom_values() {
let p: PaginationParams = serde_json::from_str(r#"{"page":3,"limit":10}"#).unwrap();
assert_eq!(p.page, 3);
assert_eq!(p.limit, 10);
}
#[test]
fn pagination_params_partial_override() {
let p: PaginationParams = serde_json::from_str(r#"{"page":5}"#).unwrap();
assert_eq!(p.page, 5);
assert_eq!(p.limit, 50);
}
#[test]
fn pagination_params_zero_page() {
let p: PaginationParams = serde_json::from_str(r#"{"page":0}"#).unwrap();
assert_eq!(p.page, 0);
}
// ── FindingsFilter ───────────────────────────────────────────
#[test]
fn findings_filter_all_defaults() {
let f: FindingsFilter = serde_json::from_str("{}").unwrap();
assert!(f.repo_id.is_none());
assert!(f.severity.is_none());
assert!(f.scan_type.is_none());
assert!(f.status.is_none());
assert!(f.q.is_none());
assert!(f.sort_by.is_none());
assert!(f.sort_order.is_none());
assert_eq!(f.page, 1);
assert_eq!(f.limit, 50);
}
#[test]
fn findings_filter_with_all_fields() {
let f: FindingsFilter = serde_json::from_str(
r#"{
"repo_id": "abc",
"severity": "high",
"scan_type": "sast",
"status": "open",
"q": "sql injection",
"sort_by": "severity",
"sort_order": "desc",
"page": 2,
"limit": 25
}"#,
)
.unwrap();
assert_eq!(f.repo_id.as_deref(), Some("abc"));
assert_eq!(f.severity.as_deref(), Some("high"));
assert_eq!(f.scan_type.as_deref(), Some("sast"));
assert_eq!(f.status.as_deref(), Some("open"));
assert_eq!(f.q.as_deref(), Some("sql injection"));
assert_eq!(f.sort_by.as_deref(), Some("severity"));
assert_eq!(f.sort_order.as_deref(), Some("desc"));
assert_eq!(f.page, 2);
assert_eq!(f.limit, 25);
}
#[test]
fn findings_filter_empty_string_fields() {
let f: FindingsFilter = serde_json::from_str(r#"{"repo_id":"","severity":""}"#).unwrap();
assert_eq!(f.repo_id.as_deref(), Some(""));
assert_eq!(f.severity.as_deref(), Some(""));
}
// ── ApiResponse ──────────────────────────────────────────────
#[test]
fn api_response_serializes_with_all_fields() {
let resp = ApiResponse {
data: vec!["a", "b"],
total: Some(100),
page: Some(1),
};
let v = serde_json::to_value(&resp).unwrap();
assert_eq!(v["data"], json!(["a", "b"]));
assert_eq!(v["total"], 100);
assert_eq!(v["page"], 1);
}
#[test]
fn api_response_skips_none_fields() {
let resp = ApiResponse {
data: "hello",
total: None,
page: None,
};
let v = serde_json::to_value(&resp).unwrap();
assert_eq!(v["data"], "hello");
assert!(v.get("total").is_none());
assert!(v.get("page").is_none());
}
#[test]
fn api_response_with_nested_struct() {
#[derive(Serialize)]
struct Item {
id: u32,
}
let resp = ApiResponse {
data: Item { id: 42 },
total: Some(1),
page: None,
};
let v = serde_json::to_value(&resp).unwrap();
assert_eq!(v["data"]["id"], 42);
assert_eq!(v["total"], 1);
assert!(v.get("page").is_none());
}
#[test]
fn api_response_empty_vec() {
let resp: ApiResponse<Vec<String>> = ApiResponse {
data: vec![],
total: Some(0),
page: Some(1),
};
let v = serde_json::to_value(&resp).unwrap();
assert!(v["data"].as_array().unwrap().is_empty());
}
// ── SbomFilter ───────────────────────────────────────────────
#[test]
fn sbom_filter_defaults() {
let f: SbomFilter = serde_json::from_str("{}").unwrap();
assert!(f.repo_id.is_none());
assert!(f.package_manager.is_none());
assert!(f.q.is_none());
assert!(f.has_vulns.is_none());
assert!(f.license.is_none());
assert_eq!(f.page, 1);
assert_eq!(f.limit, 50);
}
#[test]
fn sbom_filter_has_vulns_bool() {
let f: SbomFilter = serde_json::from_str(r#"{"has_vulns": true}"#).unwrap();
assert_eq!(f.has_vulns, Some(true));
}
// ── SbomExportParams ─────────────────────────────────────────
#[test]
fn sbom_export_params_default_format() {
let p: SbomExportParams = serde_json::from_str(r#"{"repo_id":"r1"}"#).unwrap();
assert_eq!(p.repo_id, "r1");
assert_eq!(p.format, "cyclonedx");
}
#[test]
fn sbom_export_params_custom_format() {
let p: SbomExportParams =
serde_json::from_str(r#"{"repo_id":"r1","format":"spdx"}"#).unwrap();
assert_eq!(p.format, "spdx");
}
// ── AddRepositoryRequest ─────────────────────────────────────
#[test]
fn add_repository_request_defaults() {
let r: AddRepositoryRequest = serde_json::from_str(
r#"{
"name": "my-repo",
"git_url": "https://github.com/x/y.git"
}"#,
)
.unwrap();
assert_eq!(r.name, "my-repo");
assert_eq!(r.git_url, "https://github.com/x/y.git");
assert_eq!(r.default_branch, "main");
assert!(r.auth_token.is_none());
assert!(r.tracker_type.is_none());
assert!(r.scan_schedule.is_none());
}
#[test]
fn add_repository_request_custom_branch() {
let r: AddRepositoryRequest = serde_json::from_str(
r#"{
"name": "repo",
"git_url": "url",
"default_branch": "develop"
}"#,
)
.unwrap();
assert_eq!(r.default_branch, "develop");
}
// ── UpdateStatusRequest / BulkUpdateStatusRequest ────────────
#[test]
fn update_status_request() {
let r: UpdateStatusRequest = serde_json::from_str(r#"{"status":"resolved"}"#).unwrap();
assert_eq!(r.status, "resolved");
}
#[test]
fn bulk_update_status_request() {
let r: BulkUpdateStatusRequest =
serde_json::from_str(r#"{"ids":["a","b"],"status":"dismissed"}"#).unwrap();
assert_eq!(r.ids, vec!["a", "b"]);
assert_eq!(r.status, "dismissed");
}
#[test]
fn bulk_update_status_empty_ids() {
let r: BulkUpdateStatusRequest =
serde_json::from_str(r#"{"ids":[],"status":"x"}"#).unwrap();
assert!(r.ids.is_empty());
}
// ── SbomDiffResult serialization ─────────────────────────────
#[test]
fn sbom_diff_result_serializes() {
let r = SbomDiffResult {
only_in_a: vec![SbomDiffEntry {
name: "pkg-a".to_string(),
version: "1.0".to_string(),
package_manager: "npm".to_string(),
}],
only_in_b: vec![],
version_changed: vec![SbomVersionDiff {
name: "shared".to_string(),
package_manager: "cargo".to_string(),
version_a: "0.1".to_string(),
version_b: "0.2".to_string(),
}],
common_count: 10,
};
let v = serde_json::to_value(&r).unwrap();
assert_eq!(v["only_in_a"].as_array().unwrap().len(), 1);
assert_eq!(v["only_in_b"].as_array().unwrap().len(), 0);
assert_eq!(v["version_changed"][0]["version_a"], "0.1");
assert_eq!(v["common_count"], 10);
}
// ── LicenseSummary ───────────────────────────────────────────
#[test]
fn license_summary_serializes() {
let ls = LicenseSummary {
license: "MIT".to_string(),
count: 42,
is_copyleft: false,
packages: vec!["serde".to_string()],
};
let v = serde_json::to_value(&ls).unwrap();
assert_eq!(v["license"], "MIT");
assert_eq!(v["is_copyleft"], false);
assert_eq!(v["count"], 42);
}
// ── Default helper functions ─────────────────────────────────
#[test]
fn default_page_returns_1() {
assert_eq!(default_page(), 1);
}
#[test]
fn default_limit_returns_50() {
assert_eq!(default_limit(), 50);
}
}

View File

@@ -1,172 +0,0 @@
use axum::extract::{Extension, Path, Query};
use axum::http::StatusCode;
use axum::Json;
use mongodb::bson::doc;
use super::dto::*;
use compliance_core::models::Finding;
#[tracing::instrument(skip_all, fields(repo_id = ?filter.repo_id, severity = ?filter.severity, scan_type = ?filter.scan_type))]
pub async fn list_findings(
Extension(agent): AgentExt,
Query(filter): Query<FindingsFilter>,
) -> ApiResult<Vec<Finding>> {
let db = &agent.db;
let mut query = doc! {};
if let Some(repo_id) = &filter.repo_id {
query.insert("repo_id", repo_id);
}
if let Some(severity) = &filter.severity {
query.insert("severity", severity);
}
if let Some(scan_type) = &filter.scan_type {
query.insert("scan_type", scan_type);
}
if let Some(status) = &filter.status {
query.insert("status", status);
}
// Text search across title, description, file_path, rule_id
if let Some(q) = &filter.q {
if !q.is_empty() {
let regex = doc! { "$regex": q, "$options": "i" };
query.insert(
"$or",
mongodb::bson::bson!([
{ "title": regex.clone() },
{ "description": regex.clone() },
{ "file_path": regex.clone() },
{ "rule_id": regex },
]),
);
}
}
// Dynamic sort
let sort_field = filter.sort_by.as_deref().unwrap_or("created_at");
let sort_dir: i32 = match filter.sort_order.as_deref() {
Some("asc") => 1,
_ => -1,
};
let sort_doc = doc! { sort_field: sort_dir };
let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
let total = db
.findings()
.count_documents(query.clone())
.await
.unwrap_or(0);
let findings = match db
.findings()
.find(query)
.sort(sort_doc)
.skip(skip)
.limit(filter.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch findings: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: findings,
total: Some(total),
page: Some(filter.page),
}))
}
#[tracing::instrument(skip_all, fields(finding_id = %id))]
pub async fn get_finding(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<Finding>>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let finding = agent
.db
.findings()
.find_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
Ok(Json(ApiResponse {
data: finding,
total: None,
page: None,
}))
}
#[tracing::instrument(skip_all, fields(finding_id = %id))]
pub async fn update_finding_status(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<UpdateStatusRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
agent
.db
.findings()
.update_one(
doc! { "_id": oid },
doc! { "$set": { "status": &req.status, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(serde_json::json!({ "status": "updated" })))
}
#[tracing::instrument(skip_all)]
pub async fn bulk_update_finding_status(
Extension(agent): AgentExt,
Json(req): Json<BulkUpdateStatusRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oids: Vec<mongodb::bson::oid::ObjectId> = req
.ids
.iter()
.filter_map(|id| mongodb::bson::oid::ObjectId::parse_str(id).ok())
.collect();
if oids.is_empty() {
return Err(StatusCode::BAD_REQUEST);
}
let result = agent
.db
.findings()
.update_many(
doc! { "_id": { "$in": oids } },
doc! { "$set": { "status": &req.status, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(
serde_json::json!({ "status": "updated", "modified_count": result.modified_count }),
))
}
#[tracing::instrument(skip_all)]
pub async fn update_finding_feedback(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<UpdateFeedbackRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
agent
.db
.findings()
.update_one(
doc! { "_id": oid },
doc! { "$set": { "developer_feedback": &req.feedback, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(serde_json::json!({ "status": "updated" })))
}

View File

@@ -33,7 +33,6 @@ fn default_search_limit() -> usize {
}
/// GET /api/v1/graph/:repo_id — Full graph data
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn get_graph(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -55,17 +54,11 @@ pub async fn get_graph(
let all_nodes: Vec<CodeNode> = match db.graph_nodes().find(filter.clone()).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch graph nodes: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
let edges: Vec<CodeEdge> = match db.graph_edges().find(filter).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch graph edges: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
// Remove disconnected nodes (no edges) to keep the graph clean
@@ -95,7 +88,6 @@ pub async fn get_graph(
}
/// GET /api/v1/graph/:repo_id/nodes — List nodes (paginated)
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn get_nodes(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -105,10 +97,7 @@ pub async fn get_nodes(
let nodes: Vec<CodeNode> = match db.graph_nodes().find(filter).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch graph nodes: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
let total = nodes.len() as u64;
@@ -120,7 +109,6 @@ pub async fn get_nodes(
}
/// GET /api/v1/graph/:repo_id/communities — List detected communities
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn get_communities(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -130,10 +118,7 @@ pub async fn get_communities(
let nodes: Vec<CodeNode> = match db.graph_nodes().find(filter).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch graph nodes for communities: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
let mut communities: std::collections::HashMap<u32, Vec<String>> =
@@ -173,7 +158,6 @@ pub struct CommunityInfo {
}
/// GET /api/v1/graph/:repo_id/impact/:finding_id — Impact analysis
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, finding_id = %finding_id))]
pub async fn get_impact(
Extension(agent): AgentExt,
Path((repo_id, finding_id)): Path<(String, String)>,
@@ -195,7 +179,6 @@ pub async fn get_impact(
}
/// GET /api/v1/graph/:repo_id/search — BM25 symbol search
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, query = %params.q))]
pub async fn search_symbols(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -216,10 +199,7 @@ pub async fn search_symbols(
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to search graph nodes: {e}");
Vec::new()
}
Err(_) => Vec::new(),
};
let total = nodes.len() as u64;
@@ -231,7 +211,6 @@ pub async fn search_symbols(
}
/// GET /api/v1/graph/:repo_id/file-content — Read source file from cloned repo
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn get_file_content(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -293,7 +272,6 @@ pub struct FileContent {
}
/// POST /api/v1/graph/:repo_id/build — Trigger graph rebuild
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub async fn trigger_build(
Extension(agent): AgentExt,
Path(repo_id): Path<String>,
@@ -313,13 +291,7 @@ pub async fn trigger_build(
}
};
let creds = crate::pipeline::git::RepoCredentials {
ssh_key_path: Some(agent_clone.config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
};
let git_ops =
crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path, creds);
let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
Ok(p) => p,
Err(e) => {

View File

@@ -1,84 +0,0 @@
use axum::Json;
use mongodb::bson::doc;
use super::dto::*;
use compliance_core::models::ScanRun;
#[tracing::instrument(skip_all)]
pub async fn health() -> Json<serde_json::Value> {
Json(serde_json::json!({ "status": "ok" }))
}
#[tracing::instrument(skip_all)]
pub async fn stats_overview(axum::extract::Extension(agent): AgentExt) -> ApiResult<OverviewStats> {
let db = &agent.db;
let total_repositories = db
.repositories()
.count_documents(doc! {})
.await
.unwrap_or(0);
let total_findings = db.findings().count_documents(doc! {}).await.unwrap_or(0);
let critical_findings = db
.findings()
.count_documents(doc! { "severity": "critical" })
.await
.unwrap_or(0);
let high_findings = db
.findings()
.count_documents(doc! { "severity": "high" })
.await
.unwrap_or(0);
let medium_findings = db
.findings()
.count_documents(doc! { "severity": "medium" })
.await
.unwrap_or(0);
let low_findings = db
.findings()
.count_documents(doc! { "severity": "low" })
.await
.unwrap_or(0);
let total_sbom_entries = db
.sbom_entries()
.count_documents(doc! {})
.await
.unwrap_or(0);
let total_cve_alerts = db.cve_alerts().count_documents(doc! {}).await.unwrap_or(0);
let total_issues = db
.tracker_issues()
.count_documents(doc! {})
.await
.unwrap_or(0);
let recent_scans: Vec<ScanRun> = match db
.scan_runs()
.find(doc! {})
.sort(doc! { "started_at": -1 })
.limit(10)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch recent scans: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: OverviewStats {
total_repositories,
total_findings,
critical_findings,
high_findings,
medium_findings,
low_findings,
total_sbom_entries,
total_cve_alerts,
total_issues,
recent_scans,
},
total: None,
page: None,
}))
}

View File

@@ -1,187 +0,0 @@
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use axum::extract::Extension;
use axum::http::StatusCode;
use axum::Json;
use serde::{Deserialize, Serialize};
use walkdir::WalkDir;
use super::dto::{AgentExt, ApiResponse};
// ── DTOs ─────────────────────────────────────────────────────────────────────
#[derive(Debug, Deserialize)]
pub struct HelpChatMessage {
pub role: String,
pub content: String,
}
#[derive(Debug, Deserialize)]
pub struct HelpChatRequest {
pub message: String,
#[serde(default)]
pub history: Vec<HelpChatMessage>,
}
#[derive(Debug, Serialize)]
pub struct HelpChatResponse {
pub message: String,
}
// ── Doc cache ────────────────────────────────────────────────────────────────
static DOC_CONTEXT: OnceLock<String> = OnceLock::new();
/// Walk upward from `start` until we find a directory containing both
/// `README.md` and a `docs/` subdirectory.
fn find_project_root(start: &Path) -> Option<PathBuf> {
let mut current = start.to_path_buf();
loop {
if current.join("README.md").is_file() && current.join("docs").is_dir() {
return Some(current);
}
if !current.pop() {
return None;
}
}
}
/// Read README.md + all docs/**/*.md (excluding node_modules).
fn load_docs(root: &Path) -> String {
let mut parts: Vec<String> = Vec::new();
// Root README first
if let Ok(content) = std::fs::read_to_string(root.join("README.md")) {
parts.push(format!("<!-- file: README.md -->\n{content}"));
}
// docs/**/*.md, skipping node_modules
for entry in WalkDir::new(root.join("docs"))
.follow_links(false)
.into_iter()
.filter_entry(|e| {
!e.path()
.components()
.any(|c| c.as_os_str() == "node_modules")
})
.filter_map(|e| e.ok())
{
let path = entry.path();
if !path.is_file() {
continue;
}
if path
.extension()
.and_then(|s| s.to_str())
.map(|s| !s.eq_ignore_ascii_case("md"))
.unwrap_or(true)
{
continue;
}
let rel = path.strip_prefix(root).unwrap_or(path);
if let Ok(content) = std::fs::read_to_string(path) {
parts.push(format!("<!-- file: {} -->\n{content}", rel.display()));
}
}
if parts.is_empty() {
tracing::warn!(
"help_chat: no documentation files found under {}",
root.display()
);
} else {
tracing::info!(
"help_chat: loaded {} documentation file(s) from {}",
parts.len(),
root.display()
);
}
parts.join("\n\n---\n\n")
}
/// Returns a reference to the cached doc context string, initialised on
/// first call via `OnceLock`.
fn doc_context() -> &'static str {
DOC_CONTEXT.get_or_init(|| {
let start = std::env::current_exe()
.ok()
.and_then(|p| p.parent().map(Path::to_path_buf))
.unwrap_or_else(|| PathBuf::from("."));
match find_project_root(&start) {
Some(root) => load_docs(&root),
None => {
// Fallback: try current working directory
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
if cwd.join("README.md").is_file() {
return load_docs(&cwd);
}
tracing::error!(
"help_chat: could not locate project root from {}; doc context will be empty",
start.display()
);
String::new()
}
}
})
}
// ── Handler ──────────────────────────────────────────────────────────────────
/// POST /api/v1/help/chat — Answer questions about the compliance-scanner
/// using the project documentation as grounding context.
#[tracing::instrument(skip_all)]
pub async fn help_chat(
Extension(agent): AgentExt,
Json(req): Json<HelpChatRequest>,
) -> Result<Json<ApiResponse<HelpChatResponse>>, StatusCode> {
let context = doc_context();
let system_prompt = if context.is_empty() {
"You are a helpful assistant for the Compliance Scanner project. \
Answer questions about how to use and configure it. \
No documentation was loaded at startup, so rely on your general knowledge."
.to_string()
} else {
format!(
"You are a helpful assistant for the Compliance Scanner project. \
Answer questions about how to use, configure, and understand it \
using the documentation below as your primary source of truth.\n\n\
Rules:\n\
- Prefer information from the provided docs over general knowledge\n\
- Quote or reference the relevant doc section when it helps\n\
- If the docs do not cover the topic, say so clearly\n\
- Be concise — lead with the answer, then explain if needed\n\
- Use markdown formatting for readability\n\n\
## Project Documentation\n\n{context}"
)
};
let mut messages: Vec<(String, String)> = Vec::with_capacity(req.history.len() + 2);
messages.push(("system".to_string(), system_prompt));
for msg in &req.history {
messages.push((msg.role.clone(), msg.content.clone()));
}
messages.push(("user".to_string(), req.message));
let response_text = agent
.llm
.chat_with_messages(messages, Some(0.3))
.await
.map_err(|e| {
tracing::error!("LLM help chat failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(Json(ApiResponse {
data: HelpChatResponse {
message: response_text,
},
total: None,
page: None,
}))
}

View File

@@ -1,41 +0,0 @@
use axum::extract::{Extension, Query};
use axum::Json;
use mongodb::bson::doc;
use super::dto::*;
use compliance_core::models::TrackerIssue;
#[tracing::instrument(skip_all)]
pub async fn list_issues(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> ApiResult<Vec<TrackerIssue>> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db
.tracker_issues()
.count_documents(doc! {})
.await
.unwrap_or(0);
let issues = match db
.tracker_issues()
.find(doc! {})
.sort(doc! { "created_at": -1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch tracker issues: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: issues,
total: Some(total),
page: Some(params.page),
}))
}

View File

@@ -1,23 +1,827 @@
pub mod chat;
pub mod dast;
pub mod dto;
pub mod findings;
pub mod graph;
pub mod health;
pub mod help_chat;
pub mod issues;
pub mod notifications;
pub mod pentest_handlers;
pub use pentest_handlers as pentest;
pub mod repos;
pub mod sbom;
pub mod scans;
// Re-export all handler functions so routes.rs can use `handlers::function_name`
pub use dto::*;
pub use findings::*;
pub use health::*;
pub use issues::*;
pub use repos::*;
pub use sbom::*;
pub use scans::*;
use std::sync::Arc;
#[allow(unused_imports)]
use axum::extract::{Extension, Path, Query};
use axum::http::{header, StatusCode};
use axum::response::IntoResponse;
use axum::Json;
use mongodb::bson::doc;
use serde::{Deserialize, Serialize};
use compliance_core::models::*;
use crate::agent::ComplianceAgent;
#[derive(Deserialize)]
pub struct PaginationParams {
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
pub limit: i64,
}
fn default_page() -> u64 {
1
}
fn default_limit() -> i64 {
50
}
#[derive(Deserialize)]
pub struct FindingsFilter {
#[serde(default)]
pub repo_id: Option<String>,
#[serde(default)]
pub severity: Option<String>,
#[serde(default)]
pub scan_type: Option<String>,
#[serde(default)]
pub status: Option<String>,
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
pub limit: i64,
}
#[derive(Serialize)]
pub struct ApiResponse<T: Serialize> {
pub data: T,
#[serde(skip_serializing_if = "Option::is_none")]
pub total: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub page: Option<u64>,
}
#[derive(Serialize)]
pub struct OverviewStats {
pub total_repositories: u64,
pub total_findings: u64,
pub critical_findings: u64,
pub high_findings: u64,
pub medium_findings: u64,
pub low_findings: u64,
pub total_sbom_entries: u64,
pub total_cve_alerts: u64,
pub total_issues: u64,
pub recent_scans: Vec<ScanRun>,
}
#[derive(Deserialize)]
pub struct AddRepositoryRequest {
pub name: String,
pub git_url: String,
#[serde(default = "default_branch")]
pub default_branch: String,
pub tracker_type: Option<TrackerType>,
pub tracker_owner: Option<String>,
pub tracker_repo: Option<String>,
pub scan_schedule: Option<String>,
}
fn default_branch() -> String {
"main".to_string()
}
#[derive(Deserialize)]
pub struct UpdateStatusRequest {
pub status: String,
}
#[derive(Deserialize)]
pub struct SbomFilter {
#[serde(default)]
pub repo_id: Option<String>,
#[serde(default)]
pub package_manager: Option<String>,
#[serde(default)]
pub q: Option<String>,
#[serde(default)]
pub has_vulns: Option<bool>,
#[serde(default)]
pub license: Option<String>,
#[serde(default = "default_page")]
pub page: u64,
#[serde(default = "default_limit")]
pub limit: i64,
}
#[derive(Deserialize)]
pub struct SbomExportParams {
pub repo_id: String,
#[serde(default = "default_export_format")]
pub format: String,
}
fn default_export_format() -> String {
"cyclonedx".to_string()
}
#[derive(Deserialize)]
pub struct SbomDiffParams {
pub repo_a: String,
pub repo_b: String,
}
#[derive(Serialize)]
pub struct LicenseSummary {
pub license: String,
pub count: u64,
pub is_copyleft: bool,
pub packages: Vec<String>,
}
#[derive(Serialize)]
pub struct SbomDiffResult {
pub only_in_a: Vec<SbomDiffEntry>,
pub only_in_b: Vec<SbomDiffEntry>,
pub version_changed: Vec<SbomVersionDiff>,
pub common_count: u64,
}
#[derive(Serialize)]
pub struct SbomDiffEntry {
pub name: String,
pub version: String,
pub package_manager: String,
}
#[derive(Serialize)]
pub struct SbomVersionDiff {
pub name: String,
pub package_manager: String,
pub version_a: String,
pub version_b: String,
}
type AgentExt = Extension<Arc<ComplianceAgent>>;
type ApiResult<T> = Result<Json<ApiResponse<T>>, StatusCode>;
pub async fn health() -> Json<serde_json::Value> {
Json(serde_json::json!({ "status": "ok" }))
}
pub async fn stats_overview(Extension(agent): AgentExt) -> ApiResult<OverviewStats> {
let db = &agent.db;
let total_repositories = db
.repositories()
.count_documents(doc! {})
.await
.unwrap_or(0);
let total_findings = db.findings().count_documents(doc! {}).await.unwrap_or(0);
let critical_findings = db
.findings()
.count_documents(doc! { "severity": "critical" })
.await
.unwrap_or(0);
let high_findings = db
.findings()
.count_documents(doc! { "severity": "high" })
.await
.unwrap_or(0);
let medium_findings = db
.findings()
.count_documents(doc! { "severity": "medium" })
.await
.unwrap_or(0);
let low_findings = db
.findings()
.count_documents(doc! { "severity": "low" })
.await
.unwrap_or(0);
let total_sbom_entries = db
.sbom_entries()
.count_documents(doc! {})
.await
.unwrap_or(0);
let total_cve_alerts = db.cve_alerts().count_documents(doc! {}).await.unwrap_or(0);
let total_issues = db
.tracker_issues()
.count_documents(doc! {})
.await
.unwrap_or(0);
let recent_scans: Vec<ScanRun> = match db
.scan_runs()
.find(doc! {})
.sort(doc! { "started_at": -1 })
.limit(10)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
data: OverviewStats {
total_repositories,
total_findings,
critical_findings,
high_findings,
medium_findings,
low_findings,
total_sbom_entries,
total_cve_alerts,
total_issues,
recent_scans,
},
total: None,
page: None,
}))
}
pub async fn list_repositories(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> ApiResult<Vec<TrackedRepository>> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db
.repositories()
.count_documents(doc! {})
.await
.unwrap_or(0);
let repos = match db
.repositories()
.find(doc! {})
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
data: repos,
total: Some(total),
page: Some(params.page),
}))
}
pub async fn add_repository(
Extension(agent): AgentExt,
Json(req): Json<AddRepositoryRequest>,
) -> Result<Json<ApiResponse<TrackedRepository>>, StatusCode> {
let mut repo = TrackedRepository::new(req.name, req.git_url);
repo.default_branch = req.default_branch;
repo.tracker_type = req.tracker_type;
repo.tracker_owner = req.tracker_owner;
repo.tracker_repo = req.tracker_repo;
repo.scan_schedule = req.scan_schedule;
agent
.db
.repositories()
.insert_one(&repo)
.await
.map_err(|_| StatusCode::CONFLICT)?;
Ok(Json(ApiResponse {
data: repo,
total: None,
page: None,
}))
}
pub async fn trigger_scan(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let agent_clone = (*agent).clone();
tokio::spawn(async move {
if let Err(e) = agent_clone.run_scan(&id, ScanTrigger::Manual).await {
tracing::error!("Manual scan failed for {id}: {e}");
}
});
Ok(Json(serde_json::json!({ "status": "scan_triggered" })))
}
pub async fn delete_repository(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let db = &agent.db;
// Delete the repository
let result = db
.repositories()
.delete_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if result.deleted_count == 0 {
return Err(StatusCode::NOT_FOUND);
}
// Cascade delete all related data
let _ = db.findings().delete_many(doc! { "repo_id": &id }).await;
let _ = db.sbom_entries().delete_many(doc! { "repo_id": &id }).await;
let _ = db.scan_runs().delete_many(doc! { "repo_id": &id }).await;
let _ = db.cve_alerts().delete_many(doc! { "repo_id": &id }).await;
let _ = db
.tracker_issues()
.delete_many(doc! { "repo_id": &id })
.await;
let _ = db.graph_nodes().delete_many(doc! { "repo_id": &id }).await;
let _ = db.graph_edges().delete_many(doc! { "repo_id": &id }).await;
let _ = db.graph_builds().delete_many(doc! { "repo_id": &id }).await;
let _ = db
.impact_analyses()
.delete_many(doc! { "repo_id": &id })
.await;
let _ = db
.code_embeddings()
.delete_many(doc! { "repo_id": &id })
.await;
let _ = db
.embedding_builds()
.delete_many(doc! { "repo_id": &id })
.await;
Ok(Json(serde_json::json!({ "status": "deleted" })))
}
pub async fn list_findings(
Extension(agent): AgentExt,
Query(filter): Query<FindingsFilter>,
) -> ApiResult<Vec<Finding>> {
let db = &agent.db;
let mut query = doc! {};
if let Some(repo_id) = &filter.repo_id {
query.insert("repo_id", repo_id);
}
if let Some(severity) = &filter.severity {
query.insert("severity", severity);
}
if let Some(scan_type) = &filter.scan_type {
query.insert("scan_type", scan_type);
}
if let Some(status) = &filter.status {
query.insert("status", status);
}
let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
let total = db
.findings()
.count_documents(query.clone())
.await
.unwrap_or(0);
let findings = match db
.findings()
.find(query)
.sort(doc! { "created_at": -1 })
.skip(skip)
.limit(filter.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
data: findings,
total: Some(total),
page: Some(filter.page),
}))
}
pub async fn get_finding(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<Finding>>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let finding = agent
.db
.findings()
.find_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
Ok(Json(ApiResponse {
data: finding,
total: None,
page: None,
}))
}
pub async fn update_finding_status(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<UpdateStatusRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
agent
.db
.findings()
.update_one(
doc! { "_id": oid },
doc! { "$set": { "status": &req.status, "updated_at": mongodb::bson::DateTime::now() } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(serde_json::json!({ "status": "updated" })))
}
pub async fn list_sbom(
Extension(agent): AgentExt,
Query(filter): Query<SbomFilter>,
) -> ApiResult<Vec<SbomEntry>> {
let db = &agent.db;
let mut query = doc! {};
if let Some(repo_id) = &filter.repo_id {
query.insert("repo_id", repo_id);
}
if let Some(pm) = &filter.package_manager {
query.insert("package_manager", pm);
}
if let Some(q) = &filter.q {
if !q.is_empty() {
query.insert("name", doc! { "$regex": q, "$options": "i" });
}
}
if let Some(has_vulns) = filter.has_vulns {
if has_vulns {
query.insert("known_vulnerabilities", doc! { "$exists": true, "$ne": [] });
} else {
query.insert("known_vulnerabilities", doc! { "$size": 0 });
}
}
if let Some(license) = &filter.license {
query.insert("license", license);
}
let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
let total = db
.sbom_entries()
.count_documents(query.clone())
.await
.unwrap_or(0);
let entries = match db
.sbom_entries()
.find(query)
.sort(doc! { "name": 1 })
.skip(skip)
.limit(filter.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
data: entries,
total: Some(total),
page: Some(filter.page),
}))
}
pub async fn export_sbom(
Extension(agent): AgentExt,
Query(params): Query<SbomExportParams>,
) -> Result<impl IntoResponse, StatusCode> {
let db = &agent.db;
let entries: Vec<SbomEntry> = match db
.sbom_entries()
.find(doc! { "repo_id": &params.repo_id })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
let body = if params.format == "spdx" {
// SPDX 2.3 format
let packages: Vec<serde_json::Value> = entries
.iter()
.enumerate()
.map(|(i, e)| {
serde_json::json!({
"SPDXID": format!("SPDXRef-Package-{i}"),
"name": e.name,
"versionInfo": e.version,
"downloadLocation": "NOASSERTION",
"licenseConcluded": e.license.as_deref().unwrap_or("NOASSERTION"),
"externalRefs": e.purl.as_ref().map(|p| vec![serde_json::json!({
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": p,
})]).unwrap_or_default(),
})
})
.collect();
serde_json::json!({
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"name": format!("sbom-{}", params.repo_id),
"documentNamespace": format!("https://compliance-scanner/sbom/{}", params.repo_id),
"packages": packages,
})
} else {
// CycloneDX 1.5 format
let components: Vec<serde_json::Value> = entries
.iter()
.map(|e| {
let mut comp = serde_json::json!({
"type": "library",
"name": e.name,
"version": e.version,
"group": e.package_manager,
});
if let Some(purl) = &e.purl {
comp["purl"] = serde_json::Value::String(purl.clone());
}
if let Some(license) = &e.license {
comp["licenses"] = serde_json::json!([{ "license": { "id": license } }]);
}
if !e.known_vulnerabilities.is_empty() {
comp["vulnerabilities"] = serde_json::json!(
e.known_vulnerabilities.iter().map(|v| serde_json::json!({
"id": v.id,
"source": { "name": v.source },
"ratings": v.severity.as_ref().map(|s| vec![serde_json::json!({"severity": s})]).unwrap_or_default(),
})).collect::<Vec<_>>()
);
}
comp
})
.collect();
serde_json::json!({
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"version": 1,
"metadata": {
"component": {
"type": "application",
"name": format!("repo-{}", params.repo_id),
}
},
"components": components,
})
};
let json_str =
serde_json::to_string_pretty(&body).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let filename = if params.format == "spdx" {
format!("sbom-{}-spdx.json", params.repo_id)
} else {
format!("sbom-{}-cyclonedx.json", params.repo_id)
};
let disposition = format!("attachment; filename=\"{filename}\"");
Ok((
[
(
header::CONTENT_TYPE,
header::HeaderValue::from_static("application/json"),
),
(
header::CONTENT_DISPOSITION,
header::HeaderValue::from_str(&disposition)
.unwrap_or_else(|_| header::HeaderValue::from_static("attachment")),
),
],
json_str,
))
}
const COPYLEFT_LICENSES: &[&str] = &[
"GPL-2.0",
"GPL-2.0-only",
"GPL-2.0-or-later",
"GPL-3.0",
"GPL-3.0-only",
"GPL-3.0-or-later",
"AGPL-3.0",
"AGPL-3.0-only",
"AGPL-3.0-or-later",
"LGPL-2.1",
"LGPL-2.1-only",
"LGPL-2.1-or-later",
"LGPL-3.0",
"LGPL-3.0-only",
"LGPL-3.0-or-later",
"MPL-2.0",
];
pub async fn license_summary(
Extension(agent): AgentExt,
Query(params): Query<SbomFilter>,
) -> ApiResult<Vec<LicenseSummary>> {
let db = &agent.db;
let mut query = doc! {};
if let Some(repo_id) = &params.repo_id {
query.insert("repo_id", repo_id);
}
let entries: Vec<SbomEntry> = match db.sbom_entries().find(query).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
let mut license_map: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
for entry in &entries {
let lic = entry.license.as_deref().unwrap_or("Unknown").to_string();
license_map.entry(lic).or_default().push(entry.name.clone());
}
let mut summaries: Vec<LicenseSummary> = license_map
.into_iter()
.map(|(license, packages)| {
let is_copyleft = COPYLEFT_LICENSES
.iter()
.any(|c| license.to_uppercase().contains(&c.to_uppercase()));
LicenseSummary {
license,
count: packages.len() as u64,
is_copyleft,
packages,
}
})
.collect();
summaries.sort_by(|a, b| b.count.cmp(&a.count));
Ok(Json(ApiResponse {
data: summaries,
total: None,
page: None,
}))
}
pub async fn sbom_diff(
Extension(agent): AgentExt,
Query(params): Query<SbomDiffParams>,
) -> ApiResult<SbomDiffResult> {
let db = &agent.db;
let entries_a: Vec<SbomEntry> = match db
.sbom_entries()
.find(doc! { "repo_id": &params.repo_a })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
let entries_b: Vec<SbomEntry> = match db
.sbom_entries()
.find(doc! { "repo_id": &params.repo_b })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
// Build maps by (name, package_manager) -> version
let map_a: std::collections::HashMap<(String, String), String> = entries_a
.iter()
.map(|e| {
(
(e.name.clone(), e.package_manager.clone()),
e.version.clone(),
)
})
.collect();
let map_b: std::collections::HashMap<(String, String), String> = entries_b
.iter()
.map(|e| {
(
(e.name.clone(), e.package_manager.clone()),
e.version.clone(),
)
})
.collect();
let mut only_in_a = Vec::new();
let mut version_changed = Vec::new();
let mut common_count: u64 = 0;
for (key, ver_a) in &map_a {
match map_b.get(key) {
None => only_in_a.push(SbomDiffEntry {
name: key.0.clone(),
version: ver_a.clone(),
package_manager: key.1.clone(),
}),
Some(ver_b) if ver_a != ver_b => {
version_changed.push(SbomVersionDiff {
name: key.0.clone(),
package_manager: key.1.clone(),
version_a: ver_a.clone(),
version_b: ver_b.clone(),
});
}
Some(_) => common_count += 1,
}
}
let only_in_b: Vec<SbomDiffEntry> = map_b
.iter()
.filter(|(key, _)| !map_a.contains_key(key))
.map(|(key, ver)| SbomDiffEntry {
name: key.0.clone(),
version: ver.clone(),
package_manager: key.1.clone(),
})
.collect();
Ok(Json(ApiResponse {
data: SbomDiffResult {
only_in_a,
only_in_b,
version_changed,
common_count,
},
total: None,
page: None,
}))
}
pub async fn list_issues(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> ApiResult<Vec<TrackerIssue>> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db
.tracker_issues()
.count_documents(doc! {})
.await
.unwrap_or(0);
let issues = match db
.tracker_issues()
.find(doc! {})
.sort(doc! { "created_at": -1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
data: issues,
total: Some(total),
page: Some(params.page),
}))
}
pub async fn list_scan_runs(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> ApiResult<Vec<ScanRun>> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db.scan_runs().count_documents(doc! {}).await.unwrap_or(0);
let scans = match db
.scan_runs()
.find(doc! {})
.sort(doc! { "started_at": -1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
Ok(Json(ApiResponse {
data: scans,
total: Some(total),
page: Some(params.page),
}))
}
async fn collect_cursor_async<T: serde::de::DeserializeOwned + Unpin + Send>(
mut cursor: mongodb::Cursor<T>,
) -> Vec<T> {
use futures_util::StreamExt;
let mut items = Vec::new();
while let Some(result) = cursor.next().await {
match result {
Ok(item) => items.push(item),
Err(e) => tracing::warn!("Failed to deserialize document: {e}"),
}
}
items
}

View File

@@ -1,178 +0,0 @@
use axum::extract::Extension;
use axum::http::StatusCode;
use axum::Json;
use mongodb::bson::doc;
use serde::Deserialize;
use compliance_core::models::notification::CveNotification;
use super::dto::{AgentExt, ApiResponse};
/// GET /api/v1/notifications — List CVE notifications (newest first)
#[tracing::instrument(skip_all)]
pub async fn list_notifications(
Extension(agent): AgentExt,
axum::extract::Query(params): axum::extract::Query<NotificationFilter>,
) -> Result<Json<ApiResponse<Vec<CveNotification>>>, StatusCode> {
let mut filter = doc! {};
// Filter by status (default: show new + read, exclude dismissed)
match params.status.as_deref() {
Some("all") => {}
Some(s) => {
filter.insert("status", s);
}
None => {
filter.insert("status", doc! { "$in": ["new", "read"] });
}
}
// Filter by severity
if let Some(ref sev) = params.severity {
filter.insert("severity", sev.as_str());
}
// Filter by repo
if let Some(ref repo_id) = params.repo_id {
filter.insert("repo_id", repo_id.as_str());
}
let page = params.page.unwrap_or(1).max(1);
let limit = params.limit.unwrap_or(50).min(200);
let skip = (page - 1) * limit as u64;
let total = agent
.db
.cve_notifications()
.count_documents(filter.clone())
.await
.unwrap_or(0);
let notifications: Vec<CveNotification> = match agent
.db
.cve_notifications()
.find(filter)
.sort(doc! { "created_at": -1 })
.skip(skip)
.limit(limit)
.await
{
Ok(cursor) => {
use futures_util::StreamExt;
let mut items = Vec::new();
let mut cursor = cursor;
while let Some(Ok(n)) = cursor.next().await {
items.push(n);
}
items
}
Err(e) => {
tracing::error!("Failed to list notifications: {e}");
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
};
Ok(Json(ApiResponse {
data: notifications,
total: Some(total),
page: Some(page),
}))
}
/// GET /api/v1/notifications/count — Count of unread notifications
#[tracing::instrument(skip_all)]
pub async fn notification_count(
Extension(agent): AgentExt,
) -> Result<Json<serde_json::Value>, StatusCode> {
let count = agent
.db
.cve_notifications()
.count_documents(doc! { "status": "new" })
.await
.unwrap_or(0);
Ok(Json(serde_json::json!({ "count": count })))
}
/// PATCH /api/v1/notifications/:id/read — Mark a notification as read
#[tracing::instrument(skip_all, fields(id = %id))]
pub async fn mark_read(
Extension(agent): AgentExt,
axum::extract::Path(id): axum::extract::Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let result = agent
.db
.cve_notifications()
.update_one(
doc! { "_id": oid },
doc! { "$set": {
"status": "read",
"read_at": mongodb::bson::DateTime::now(),
}},
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if result.matched_count == 0 {
return Err(StatusCode::NOT_FOUND);
}
Ok(Json(serde_json::json!({ "status": "read" })))
}
/// PATCH /api/v1/notifications/:id/dismiss — Dismiss a notification
#[tracing::instrument(skip_all, fields(id = %id))]
pub async fn dismiss_notification(
Extension(agent): AgentExt,
axum::extract::Path(id): axum::extract::Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let result = agent
.db
.cve_notifications()
.update_one(
doc! { "_id": oid },
doc! { "$set": { "status": "dismissed" } },
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if result.matched_count == 0 {
return Err(StatusCode::NOT_FOUND);
}
Ok(Json(serde_json::json!({ "status": "dismissed" })))
}
/// POST /api/v1/notifications/read-all — Mark all new notifications as read
#[tracing::instrument(skip_all)]
pub async fn mark_all_read(
Extension(agent): AgentExt,
) -> Result<Json<serde_json::Value>, StatusCode> {
let result = agent
.db
.cve_notifications()
.update_many(
doc! { "status": "new" },
doc! { "$set": {
"status": "read",
"read_at": mongodb::bson::DateTime::now(),
}},
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Ok(Json(
serde_json::json!({ "updated": result.modified_count }),
))
}
#[derive(Debug, Deserialize)]
pub struct NotificationFilter {
pub status: Option<String>,
pub severity: Option<String>,
pub repo_id: Option<String>,
pub page: Option<u64>,
pub limit: Option<i64>,
}

View File

@@ -1,238 +0,0 @@
use std::sync::Arc;
use axum::extract::{Extension, Path};
use axum::http::StatusCode;
use axum::response::IntoResponse;
use axum::Json;
use mongodb::bson::doc;
use serde::Deserialize;
use futures_util::StreamExt;
use compliance_core::models::dast::DastFinding;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::*;
use compliance_core::models::sbom::SbomEntry;
use crate::agent::ComplianceAgent;
use super::super::dto::collect_cursor_async;
type AgentExt = Extension<Arc<ComplianceAgent>>;
#[derive(Deserialize)]
pub struct ExportBody {
pub password: String,
/// Requester display name (from auth)
#[serde(default)]
pub requester_name: String,
/// Requester email (from auth)
#[serde(default)]
pub requester_email: String,
}
/// POST /api/v1/pentest/sessions/:id/export — Export an encrypted pentest report archive
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn export_session_report(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(body): Json<ExportBody>,
) -> Result<axum::response::Response, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
if body.password.len() < 8 {
return Err((
StatusCode::BAD_REQUEST,
"Password must be at least 8 characters".to_string(),
));
}
// Fetch session
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
// Resolve target name
let target = if let Ok(tid) = mongodb::bson::oid::ObjectId::parse_str(&session.target_id) {
agent
.db
.dast_targets()
.find_one(doc! { "_id": tid })
.await
.ok()
.flatten()
} else {
None
};
let target_name = target
.as_ref()
.map(|t| t.name.clone())
.unwrap_or_else(|| "Unknown Target".to_string());
let target_url = target
.as_ref()
.map(|t| t.base_url.clone())
.unwrap_or_default();
// Fetch attack chain nodes
let nodes: Vec<AttackChainNode> = match agent
.db
.attack_chain_nodes()
.find(doc! { "session_id": &id })
.sort(doc! { "started_at": 1 })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
// Fetch DAST findings for this session, then deduplicate
let raw_findings: Vec<DastFinding> = match agent
.db
.dast_findings()
.find(doc! { "session_id": &id })
.sort(doc! { "severity": -1, "created_at": -1 })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
let raw_count = raw_findings.len();
let findings = crate::pipeline::dedup::dedup_dast_findings(raw_findings);
if findings.len() < raw_count {
tracing::info!(
"Deduped DAST findings for session {id}: {raw_count} → {}",
findings.len()
);
}
// Fetch SAST findings, SBOM, and code context for the linked repository
let repo_id = session
.repo_id
.clone()
.or_else(|| target.as_ref().and_then(|t| t.repo_id.clone()));
let (sast_findings, sbom_entries, code_context) = if let Some(ref rid) = repo_id {
let sast: Vec<Finding> = match agent
.db
.findings()
.find(doc! {
"repo_id": rid,
"status": { "$in": ["open", "triaged"] },
})
.sort(doc! { "severity": -1 })
.limit(100)
.await
{
Ok(mut cursor) => {
let mut results = Vec::new();
while let Some(Ok(f)) = cursor.next().await {
results.push(f);
}
results
}
Err(_) => Vec::new(),
};
let sbom: Vec<SbomEntry> = match agent
.db
.sbom_entries()
.find(doc! {
"repo_id": rid,
"known_vulnerabilities": { "$exists": true, "$ne": [] },
})
.limit(50)
.await
{
Ok(mut cursor) => {
let mut results = Vec::new();
while let Some(Ok(e)) = cursor.next().await {
results.push(e);
}
results
}
Err(_) => Vec::new(),
};
// Build code context from graph nodes
let code_ctx: Vec<CodeContextHint> = match agent
.db
.graph_nodes()
.find(doc! { "repo_id": rid, "is_entry_point": true })
.limit(50)
.await
{
Ok(mut cursor) => {
let mut nodes_vec = Vec::new();
while let Some(Ok(n)) = cursor.next().await {
let linked_vulns: Vec<String> = sast
.iter()
.filter(|f| f.file_path.as_deref() == Some(&n.file_path))
.map(|f| {
format!(
"[{}] {}: {} (line {})",
f.severity,
f.scanner,
f.title,
f.line_number.unwrap_or(0)
)
})
.collect();
nodes_vec.push(CodeContextHint {
endpoint_pattern: n.qualified_name.clone(),
handler_function: n.name.clone(),
file_path: n.file_path.clone(),
code_snippet: String::new(),
known_vulnerabilities: linked_vulns,
});
}
nodes_vec
}
Err(_) => Vec::new(),
};
(sast, sbom, code_ctx)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let config = session.config.clone();
let ctx = crate::pentest::report::ReportContext {
session,
target_name,
target_url,
findings,
attack_chain: nodes,
requester_name: if body.requester_name.is_empty() {
"Unknown".to_string()
} else {
body.requester_name
},
requester_email: body.requester_email,
config,
sast_findings,
sbom_entries,
code_context,
};
let report = crate::pentest::generate_encrypted_report(&ctx, &body.password)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
let response = serde_json::json!({
"archive_base64": base64::Engine::encode(&base64::engine::general_purpose::STANDARD, &report.archive),
"sha256": report.sha256,
"filename": format!("pentest-report-{id}.zip"),
});
Ok(Json(response).into_response())
}

View File

@@ -1,9 +0,0 @@
mod export;
mod session;
mod stats;
mod stream;
pub use export::*;
pub use session::*;
pub use stats::*;
pub use stream::*;

View File

@@ -1,834 +0,0 @@
use std::sync::Arc;
use axum::extract::{Extension, Path, Query};
use axum::http::StatusCode;
use axum::Json;
use mongodb::bson::doc;
use serde::Deserialize;
use compliance_core::models::pentest::*;
use crate::agent::ComplianceAgent;
use crate::pentest::PentestOrchestrator;
use super::super::dto::{collect_cursor_async, ApiResponse, PaginationParams};
type AgentExt = Extension<Arc<ComplianceAgent>>;
#[derive(Deserialize)]
pub struct CreateSessionRequest {
pub target_id: Option<String>,
#[serde(default = "default_strategy")]
pub strategy: String,
pub message: Option<String>,
/// Wizard configuration — if present, takes precedence over legacy fields
pub config: Option<PentestConfig>,
}
fn default_strategy() -> String {
"comprehensive".to_string()
}
#[derive(Deserialize)]
pub struct SendMessageRequest {
pub message: String,
}
#[derive(Deserialize)]
pub struct LookupRepoQuery {
pub url: String,
}
/// POST /api/v1/pentest/sessions — Create a new pentest session and start the orchestrator
#[tracing::instrument(skip_all)]
pub async fn create_session(
Extension(agent): AgentExt,
Json(req): Json<CreateSessionRequest>,
) -> Result<Json<ApiResponse<PentestSession>>, (StatusCode, String)> {
// Try to acquire a concurrency permit
let permit = agent
.session_semaphore
.clone()
.try_acquire_owned()
.map_err(|_| {
(
StatusCode::TOO_MANY_REQUESTS,
"Maximum concurrent pentest sessions reached. Try again later.".to_string(),
)
})?;
if let Some(ref config) = req.config {
// ── Wizard path ──────────────────────────────────────────────
if !config.disclaimer_accepted {
return Err((
StatusCode::BAD_REQUEST,
"Disclaimer must be accepted".to_string(),
));
}
// Look up or auto-create DastTarget by app_url
let target = match agent
.db
.dast_targets()
.find_one(doc! { "base_url": &config.app_url })
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("DB error: {e}")))?
{
Some(t) => t,
None => {
use compliance_core::models::dast::{DastTarget, DastTargetType};
let mut t = DastTarget::new(
config.app_url.clone(),
config.app_url.clone(),
DastTargetType::WebApp,
);
if let Some(rl) = config.rate_limit {
t.rate_limit = rl;
}
t.allow_destructive = config.allow_destructive;
t.excluded_paths = config.scope_exclusions.clone();
let res = agent.db.dast_targets().insert_one(&t).await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create target: {e}"),
)
})?;
t.id = res.inserted_id.as_object_id();
t
}
};
let target_id = target.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Parse strategy from config or request
let strat_str = config.strategy.as_deref().unwrap_or(req.strategy.as_str());
let strategy = parse_strategy(strat_str);
let mut session = PentestSession::new(target_id, strategy);
session.config = Some(config.clone());
session.repo_id = target.repo_id.clone();
// Resolve repo_id from git_repo_url if provided
if let Some(ref git_url) = config.git_repo_url {
if let Ok(Some(repo)) = agent
.db
.repositories()
.find_one(doc! { "git_url": git_url })
.await
{
session.repo_id = repo.id.map(|oid| oid.to_hex());
}
}
let insert_result = agent
.db
.pentest_sessions()
.insert_one(&session)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create session: {e}"),
)
})?;
session.id = insert_result.inserted_id.as_object_id();
let session_id_str = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Register broadcast stream and pause control
let event_tx = agent.register_session_stream(&session_id_str);
let pause_rx = agent.register_pause_control(&session_id_str);
// Merge server-default IMAP/email settings where wizard left blanks
if let Some(ref mut cfg) = session.config {
if cfg.auth.mode == AuthMode::AutoRegister {
if cfg.auth.verification_email.is_none() {
cfg.auth.verification_email = agent.config.pentest_verification_email.clone();
}
if cfg.auth.imap_host.is_none() {
cfg.auth.imap_host = agent.config.pentest_imap_host.clone();
}
if cfg.auth.imap_port.is_none() {
cfg.auth.imap_port = agent.config.pentest_imap_port;
}
if cfg.auth.imap_username.is_none() {
cfg.auth.imap_username = agent.config.pentest_imap_username.clone();
}
if cfg.auth.imap_password.is_none() {
cfg.auth.imap_password = agent.config.pentest_imap_password.as_ref().map(|s| {
use secrecy::ExposeSecret;
s.expose_secret().to_string()
});
}
}
}
// Pre-populate test user record for auto-register sessions
if let Some(ref cfg) = session.config {
if cfg.auth.mode == AuthMode::AutoRegister {
let verification_email = cfg.auth.verification_email.clone();
// Build plus-addressed email for this session
let test_email = verification_email.as_deref().map(|email| {
let parts: Vec<&str> = email.splitn(2, '@').collect();
if parts.len() == 2 {
format!("{}+{}@{}", parts[0], session_id_str, parts[1])
} else {
email.to_string()
}
});
// Detect identity provider from keycloak config
let provider = if agent.config.keycloak_url.is_some() {
Some(compliance_core::models::pentest::IdentityProvider::Keycloak)
} else {
None
};
session.test_user = Some(compliance_core::models::pentest::TestUserRecord {
username: None, // LLM will choose; updated after registration
email: test_email,
provider_user_id: None,
provider,
cleaned_up: false,
});
}
}
// Encrypt credentials before they linger in memory
let mut session_for_task = session.clone();
if let Some(ref mut cfg) = session_for_task.config {
cfg.auth.username = cfg
.auth
.username
.as_ref()
.map(|u| crate::pentest::crypto::encrypt(u));
cfg.auth.password = cfg
.auth
.password
.as_ref()
.map(|p| crate::pentest::crypto::encrypt(p));
}
// Persist encrypted credentials to DB
if session_for_task.config.is_some() {
if let Some(sid) = session.id {
let _ = agent
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"config.auth.username": session_for_task.config.as_ref()
.and_then(|c| c.auth.username.as_deref())
.map(|s| mongodb::bson::Bson::String(s.to_string()))
.unwrap_or(mongodb::bson::Bson::Null),
"config.auth.password": session_for_task.config.as_ref()
.and_then(|c| c.auth.password.as_deref())
.map(|s| mongodb::bson::Bson::String(s.to_string()))
.unwrap_or(mongodb::bson::Bson::Null),
}},
)
.await;
}
}
let initial_message = config
.initial_instructions
.clone()
.or(req.message.clone())
.unwrap_or_else(|| {
format!(
"Begin a {} penetration test against {} ({}). \
Identify vulnerabilities and provide evidence for each finding.",
session.strategy, target.name, target.base_url,
)
});
let llm = agent.llm.clone();
let db = agent.db.clone();
let session_clone = session.clone();
let target_clone = target.clone();
let agent_ref = agent.clone();
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db, event_tx, Some(pause_rx));
orchestrator
.run_session_guarded(&session_clone, &target_clone, &initial_message)
.await;
// Clean up session resources
agent_ref.cleanup_session(&session_id_str);
// Release concurrency permit
drop(permit);
});
// Redact credentials in response
let mut response_session = session;
if let Some(ref mut cfg) = response_session.config {
if cfg.auth.username.is_some() {
cfg.auth.username = Some("********".to_string());
}
if cfg.auth.password.is_some() {
cfg.auth.password = Some("********".to_string());
}
}
Ok(Json(ApiResponse {
data: response_session,
total: None,
page: None,
}))
} else {
// ── Legacy path ──────────────────────────────────────────────
let target_id = req.target_id.clone().ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
"target_id is required for legacy creation".to_string(),
)
})?;
let oid = mongodb::bson::oid::ObjectId::parse_str(&target_id).map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid target_id format".to_string(),
)
})?;
let target = agent
.db
.dast_targets()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Target not found".to_string()))?;
let strategy = parse_strategy(&req.strategy);
let mut session = PentestSession::new(target_id, strategy);
session.repo_id = target.repo_id.clone();
let insert_result = agent
.db
.pentest_sessions()
.insert_one(&session)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to create session: {e}"),
)
})?;
session.id = insert_result.inserted_id.as_object_id();
let session_id_str = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Register broadcast stream and pause control
let event_tx = agent.register_session_stream(&session_id_str);
let pause_rx = agent.register_pause_control(&session_id_str);
let initial_message = req.message.unwrap_or_else(|| {
format!(
"Begin a {} penetration test against {} ({}). \
Identify vulnerabilities and provide evidence for each finding.",
session.strategy, target.name, target.base_url,
)
});
let llm = agent.llm.clone();
let db = agent.db.clone();
let session_clone = session.clone();
let target_clone = target.clone();
let agent_ref = agent.clone();
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db, event_tx, Some(pause_rx));
orchestrator
.run_session_guarded(&session_clone, &target_clone, &initial_message)
.await;
agent_ref.cleanup_session(&session_id_str);
drop(permit);
});
Ok(Json(ApiResponse {
data: session,
total: None,
page: None,
}))
}
}
fn parse_strategy(s: &str) -> PentestStrategy {
match s {
"quick" => PentestStrategy::Quick,
"targeted" => PentestStrategy::Targeted,
"aggressive" => PentestStrategy::Aggressive,
"stealth" => PentestStrategy::Stealth,
_ => PentestStrategy::Comprehensive,
}
}
/// GET /api/v1/pentest/lookup-repo — Look up a tracked repository by git URL
#[tracing::instrument(skip_all)]
pub async fn lookup_repo(
Extension(agent): AgentExt,
Query(params): Query<LookupRepoQuery>,
) -> Result<Json<ApiResponse<serde_json::Value>>, StatusCode> {
let repo = agent
.db
.repositories()
.find_one(doc! { "git_url": &params.url })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let data = match repo {
Some(r) => serde_json::json!({
"name": r.name,
"default_branch": r.default_branch,
"last_scanned_commit": r.last_scanned_commit,
}),
None => serde_json::Value::Null,
};
Ok(Json(ApiResponse {
data,
total: None,
page: None,
}))
}
/// GET /api/v1/pentest/sessions — List pentest sessions
#[tracing::instrument(skip_all)]
pub async fn list_sessions(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> Result<Json<ApiResponse<Vec<PentestSession>>>, StatusCode> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db
.pentest_sessions()
.count_documents(doc! {})
.await
.unwrap_or(0);
let sessions = match db
.pentest_sessions()
.find(doc! {})
.sort(doc! { "started_at": -1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch pentest sessions: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: sessions,
total: Some(total),
page: Some(params.page),
}))
}
/// GET /api/v1/pentest/sessions/:id — Get a single pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn get_session(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<PentestSession>>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let mut session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// Redact credentials in response
if let Some(ref mut cfg) = session.config {
if cfg.auth.username.is_some() {
cfg.auth.username = Some("********".to_string());
}
if cfg.auth.password.is_some() {
cfg.auth.password = Some("********".to_string());
}
}
Ok(Json(ApiResponse {
data: session,
total: None,
page: None,
}))
}
/// POST /api/v1/pentest/sessions/:id/chat — Send a user message and trigger next orchestrator iteration
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn send_message(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<SendMessageRequest>,
) -> Result<Json<ApiResponse<PentestMessage>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
// Verify session exists and is running
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Running && session.status != PentestStatus::Paused {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, cannot send messages", session.status),
));
}
// Look up the target
let target_oid = mongodb::bson::oid::ObjectId::parse_str(&session.target_id).map_err(|_| {
(
StatusCode::INTERNAL_SERVER_ERROR,
"Invalid target_id in session".to_string(),
)
})?;
let target = agent
.db
.dast_targets()
.find_one(doc! { "_id": target_oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
"Target for session not found".to_string(),
)
})?;
// Store user message
let session_id = id.clone();
let user_msg = PentestMessage::user(session_id.clone(), req.message.clone());
let _ = agent.db.pentest_messages().insert_one(&user_msg).await;
let response_msg = user_msg.clone();
// Spawn orchestrator to continue the session
let llm = agent.llm.clone();
let db = agent.db.clone();
let message = req.message.clone();
// Use existing broadcast sender if available, otherwise create a new one
let event_tx = agent
.subscribe_session(&session_id)
.and_then(|_| {
agent
.session_streams
.get(&session_id)
.map(|entry| entry.value().clone())
})
.unwrap_or_else(|| agent.register_session_stream(&session_id));
tokio::spawn(async move {
let orchestrator = PentestOrchestrator::new(llm, db, event_tx, None);
orchestrator
.run_session_guarded(&session, &target, &message)
.await;
});
Ok(Json(ApiResponse {
data: response_msg,
total: None,
page: None,
}))
}
/// POST /api/v1/pentest/sessions/:id/stop — Stop a running pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn stop_session(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<PentestSession>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Running && session.status != PentestStatus::Paused {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, not running or paused", session.status),
));
}
agent
.db
.pentest_sessions()
.update_one(
doc! { "_id": oid },
doc! { "$set": {
"status": "failed",
"completed_at": mongodb::bson::DateTime::now(),
"error_message": "Stopped by user",
}},
)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?;
// Clean up session resources
agent.cleanup_session(&id);
let updated = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
"Session not found after update".to_string(),
)
})?;
Ok(Json(ApiResponse {
data: updated,
total: None,
page: None,
}))
}
/// POST /api/v1/pentest/sessions/:id/pause — Pause a running pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn pause_session(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<serde_json::Value>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Running {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, not running", session.status),
));
}
if !agent.pause_session(&id) {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to send pause signal".to_string(),
));
}
Ok(Json(ApiResponse {
data: serde_json::json!({ "status": "paused" }),
total: None,
page: None,
}))
}
/// POST /api/v1/pentest/sessions/:id/resume — Resume a paused pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn resume_session(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<serde_json::Value>>, (StatusCode, String)> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id)
.map_err(|_| (StatusCode::BAD_REQUEST, "Invalid session ID".to_string()))?;
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Database error: {e}"),
)
})?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Session not found".to_string()))?;
if session.status != PentestStatus::Paused {
return Err((
StatusCode::BAD_REQUEST,
format!("Session is {}, not paused", session.status),
));
}
if !agent.resume_session(&id) {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to send resume signal".to_string(),
));
}
Ok(Json(ApiResponse {
data: serde_json::json!({ "status": "running" }),
total: None,
page: None,
}))
}
/// GET /api/v1/pentest/sessions/:id/attack-chain — Get attack chain nodes for a session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn get_attack_chain(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<ApiResponse<Vec<AttackChainNode>>>, StatusCode> {
let _oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let nodes = match agent
.db
.attack_chain_nodes()
.find(doc! { "session_id": &id })
.sort(doc! { "started_at": 1 })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch attack chain nodes: {e}");
Vec::new()
}
};
let total = nodes.len() as u64;
Ok(Json(ApiResponse {
data: nodes,
total: Some(total),
page: None,
}))
}
/// GET /api/v1/pentest/sessions/:id/messages — Get messages for a session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn get_messages(
Extension(agent): AgentExt,
Path(id): Path<String>,
Query(params): Query<PaginationParams>,
) -> Result<Json<ApiResponse<Vec<PentestMessage>>>, StatusCode> {
let _oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = agent
.db
.pentest_messages()
.count_documents(doc! { "session_id": &id })
.await
.unwrap_or(0);
let messages = match agent
.db
.pentest_messages()
.find(doc! { "session_id": &id })
.sort(doc! { "created_at": 1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch pentest messages: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: messages,
total: Some(total),
page: Some(params.page),
}))
}
/// GET /api/v1/pentest/sessions/:id/findings — Get DAST findings for a pentest session
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn get_session_findings(
Extension(agent): AgentExt,
Path(id): Path<String>,
Query(params): Query<PaginationParams>,
) -> Result<Json<ApiResponse<Vec<compliance_core::models::dast::DastFinding>>>, StatusCode> {
let _oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = agent
.db
.dast_findings()
.count_documents(doc! { "session_id": &id })
.await
.unwrap_or(0);
let findings = match agent
.db
.dast_findings()
.find(doc! { "session_id": &id })
.sort(doc! { "created_at": -1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch pentest session findings: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: findings,
total: Some(total),
page: Some(params.page),
}))
}

View File

@@ -1,102 +0,0 @@
use std::sync::Arc;
use axum::extract::Extension;
use axum::http::StatusCode;
use axum::Json;
use mongodb::bson::doc;
use compliance_core::models::pentest::*;
use crate::agent::ComplianceAgent;
use super::super::dto::{collect_cursor_async, ApiResponse};
type AgentExt = Extension<Arc<ComplianceAgent>>;
/// GET /api/v1/pentest/stats — Aggregated pentest statistics
#[tracing::instrument(skip_all)]
pub async fn pentest_stats(
Extension(agent): AgentExt,
) -> Result<Json<ApiResponse<PentestStats>>, StatusCode> {
let db = &agent.db;
let running_sessions = db
.pentest_sessions()
.count_documents(doc! { "status": "running" })
.await
.unwrap_or(0) as u32;
// Count DAST findings from pentest sessions
let total_vulnerabilities = db
.dast_findings()
.count_documents(doc! { "session_id": { "$exists": true, "$ne": null } })
.await
.unwrap_or(0) as u32;
// Aggregate tool invocations from all sessions
let sessions: Vec<PentestSession> = match db.pentest_sessions().find(doc! {}).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
let total_tool_invocations: u32 = sessions.iter().map(|s| s.tool_invocations).sum();
let total_successes: u32 = sessions.iter().map(|s| s.tool_successes).sum();
let tool_success_rate = if total_tool_invocations == 0 {
100.0
} else {
(total_successes as f64 / total_tool_invocations as f64) * 100.0
};
// Severity distribution from pentest-related DAST findings
let critical = db
.dast_findings()
.count_documents(
doc! { "session_id": { "$exists": true, "$ne": null }, "severity": "critical" },
)
.await
.unwrap_or(0) as u32;
let high = db
.dast_findings()
.count_documents(
doc! { "session_id": { "$exists": true, "$ne": null }, "severity": "high" },
)
.await
.unwrap_or(0) as u32;
let medium = db
.dast_findings()
.count_documents(
doc! { "session_id": { "$exists": true, "$ne": null }, "severity": "medium" },
)
.await
.unwrap_or(0) as u32;
let low = db
.dast_findings()
.count_documents(doc! { "session_id": { "$exists": true, "$ne": null }, "severity": "low" })
.await
.unwrap_or(0) as u32;
let info = db
.dast_findings()
.count_documents(
doc! { "session_id": { "$exists": true, "$ne": null }, "severity": "info" },
)
.await
.unwrap_or(0) as u32;
Ok(Json(ApiResponse {
data: PentestStats {
running_sessions,
total_vulnerabilities,
total_tool_invocations,
tool_success_rate,
severity_distribution: SeverityDistribution {
critical,
high,
medium,
low,
info,
},
},
total: None,
page: None,
}))
}

View File

@@ -1,158 +0,0 @@
use std::convert::Infallible;
use std::sync::Arc;
use std::time::Duration;
use axum::extract::{Extension, Path};
use axum::http::StatusCode;
use axum::response::sse::{Event, KeepAlive, Sse};
use futures_util::stream;
use mongodb::bson::doc;
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::StreamExt;
use compliance_core::models::pentest::*;
use crate::agent::ComplianceAgent;
use super::super::dto::collect_cursor_async;
type AgentExt = Extension<Arc<ComplianceAgent>>;
/// GET /api/v1/pentest/sessions/:id/stream — SSE endpoint for real-time events
///
/// Replays stored messages/nodes as initial burst, then subscribes to the
/// broadcast channel for live updates. Sends keepalive comments every 15s.
#[tracing::instrument(skip_all, fields(session_id = %id))]
pub async fn session_stream(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Sse<impl futures_util::Stream<Item = Result<Event, Infallible>>>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
// Verify session exists
let _session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// ── Initial burst: replay stored data ──────────────────────────
let mut initial_events: Vec<Result<Event, Infallible>> = Vec::new();
// Fetch recent messages for this session
let messages: Vec<PentestMessage> = match agent
.db
.pentest_messages()
.find(doc! { "session_id": &id })
.sort(doc! { "created_at": 1 })
.limit(100)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
// Fetch recent attack chain nodes
let nodes: Vec<AttackChainNode> = match agent
.db
.attack_chain_nodes()
.find(doc! { "session_id": &id })
.sort(doc! { "started_at": 1 })
.limit(100)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(_) => Vec::new(),
};
for msg in &messages {
let event_data = serde_json::json!({
"type": "message",
"role": msg.role,
"content": msg.content,
"created_at": msg.created_at.to_rfc3339(),
});
if let Ok(data) = serde_json::to_string(&event_data) {
initial_events.push(Ok(Event::default().event("message").data(data)));
}
}
for node in &nodes {
let event_data = serde_json::json!({
"type": "tool_execution",
"node_id": node.node_id,
"tool_name": node.tool_name,
"status": node.status,
"findings_produced": node.findings_produced,
});
if let Ok(data) = serde_json::to_string(&event_data) {
initial_events.push(Ok(Event::default().event("tool").data(data)));
}
}
// Add current session status event
let session = agent
.db
.pentest_sessions()
.find_one(doc! { "_id": oid })
.await
.ok()
.flatten();
if let Some(s) = session {
let status_data = serde_json::json!({
"type": "status",
"status": s.status,
"findings_count": s.findings_count,
"tool_invocations": s.tool_invocations,
});
if let Ok(data) = serde_json::to_string(&status_data) {
initial_events.push(Ok(Event::default().event("status").data(data)));
}
}
// ── Live stream: subscribe to broadcast ────────────────────────
let live_stream = if let Some(rx) = agent.subscribe_session(&id) {
let broadcast = BroadcastStream::new(rx).filter_map(|result| match result {
Ok(event) => {
if let Ok(data) = serde_json::to_string(&event) {
let event_type = match &event {
PentestEvent::ToolStart { .. } => "tool_start",
PentestEvent::ToolComplete { .. } => "tool_complete",
PentestEvent::Finding { .. } => "finding",
PentestEvent::Message { .. } => "message",
PentestEvent::Complete { .. } => "complete",
PentestEvent::Error { .. } => "error",
PentestEvent::Thinking { .. } => "thinking",
PentestEvent::Paused => "paused",
PentestEvent::Resumed => "resumed",
};
Some(Ok(Event::default().event(event_type).data(data)))
} else {
None
}
}
Err(_) => None,
});
// Box to unify types
Box::pin(broadcast)
as std::pin::Pin<Box<dyn futures_util::Stream<Item = Result<Event, Infallible>> + Send>>
} else {
// No active broadcast — return empty stream
Box::pin(stream::empty())
as std::pin::Pin<Box<dyn futures_util::Stream<Item = Result<Event, Infallible>> + Send>>
};
// Chain initial burst + live stream
let combined = stream::iter(initial_events).chain(live_stream);
Ok(Sse::new(combined).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keepalive"),
))
}

View File

@@ -1,328 +0,0 @@
use axum::extract::{Extension, Path, Query};
use axum::http::StatusCode;
use axum::Json;
use mongodb::bson::doc;
use super::dto::*;
use compliance_core::models::*;
#[tracing::instrument(skip_all)]
pub async fn list_repositories(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> ApiResult<Vec<TrackedRepository>> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db
.repositories()
.count_documents(doc! {})
.await
.unwrap_or(0);
let repos = match db
.repositories()
.find(doc! {})
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch repositories: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: repos,
total: Some(total),
page: Some(params.page),
}))
}
#[tracing::instrument(skip_all)]
pub async fn add_repository(
Extension(agent): AgentExt,
Json(req): Json<AddRepositoryRequest>,
) -> Result<Json<ApiResponse<TrackedRepository>>, (StatusCode, String)> {
// Validate repository access before saving
let creds = crate::pipeline::git::RepoCredentials {
ssh_key_path: Some(agent.config.ssh_key_path.clone()),
auth_token: req.auth_token.clone(),
auth_username: req.auth_username.clone(),
};
if let Err(e) = crate::pipeline::git::GitOps::test_access(&req.git_url, &creds) {
return Err((
StatusCode::BAD_REQUEST,
format!("Cannot access repository: {e}"),
));
}
let mut repo = TrackedRepository::new(req.name, req.git_url);
repo.default_branch = req.default_branch;
repo.auth_token = req.auth_token;
repo.auth_username = req.auth_username;
repo.tracker_type = req.tracker_type;
repo.tracker_owner = req.tracker_owner;
repo.tracker_repo = req.tracker_repo;
repo.tracker_token = req.tracker_token;
repo.scan_schedule = req.scan_schedule;
agent
.db
.repositories()
.insert_one(&repo)
.await
.map_err(|_| {
(
StatusCode::CONFLICT,
"Repository already exists".to_string(),
)
})?;
Ok(Json(ApiResponse {
data: repo,
total: None,
page: None,
}))
}
#[tracing::instrument(skip_all, fields(repo_id = %id))]
pub async fn update_repository(
Extension(agent): AgentExt,
Path(id): Path<String>,
Json(req): Json<UpdateRepositoryRequest>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let mut set_doc = doc! { "updated_at": mongodb::bson::DateTime::now() };
if let Some(name) = &req.name {
set_doc.insert("name", name);
}
if let Some(branch) = &req.default_branch {
set_doc.insert("default_branch", branch);
}
if let Some(token) = &req.auth_token {
set_doc.insert("auth_token", token);
}
if let Some(username) = &req.auth_username {
set_doc.insert("auth_username", username);
}
if let Some(tracker_type) = &req.tracker_type {
set_doc.insert("tracker_type", tracker_type.to_string());
}
if let Some(owner) = &req.tracker_owner {
set_doc.insert("tracker_owner", owner);
}
if let Some(repo) = &req.tracker_repo {
set_doc.insert("tracker_repo", repo);
}
if let Some(token) = &req.tracker_token {
set_doc.insert("tracker_token", token);
}
if let Some(schedule) = &req.scan_schedule {
set_doc.insert("scan_schedule", schedule);
}
let result = agent
.db
.repositories()
.update_one(doc! { "_id": oid }, doc! { "$set": set_doc })
.await
.map_err(|e| {
tracing::warn!("Failed to update repository: {e}");
StatusCode::INTERNAL_SERVER_ERROR
})?;
if result.matched_count == 0 {
return Err(StatusCode::NOT_FOUND);
}
Ok(Json(serde_json::json!({ "status": "updated" })))
}
#[tracing::instrument(skip_all)]
pub async fn get_ssh_public_key(
Extension(agent): AgentExt,
) -> Result<Json<serde_json::Value>, StatusCode> {
let public_path = format!("{}.pub", agent.config.ssh_key_path);
let public_key = std::fs::read_to_string(&public_path).map_err(|_| StatusCode::NOT_FOUND)?;
Ok(Json(serde_json::json!({ "public_key": public_key.trim() })))
}
#[tracing::instrument(skip_all, fields(repo_id = %id))]
pub async fn trigger_scan(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let agent_clone = (*agent).clone();
tokio::spawn(async move {
if let Err(e) = agent_clone.run_scan(&id, ScanTrigger::Manual).await {
tracing::error!("Manual scan failed for {id}: {e}");
}
});
Ok(Json(serde_json::json!({ "status": "scan_triggered" })))
}
/// Return the webhook secret for a repository (used by dashboard to display it)
pub async fn get_webhook_config(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let repo = agent
.db
.repositories()
.find_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
let tracker_type = repo
.tracker_type
.as_ref()
.map(|t| t.to_string())
.unwrap_or_else(|| "gitea".to_string());
Ok(Json(serde_json::json!({
"webhook_secret": repo.webhook_secret,
"tracker_type": tracker_type,
})))
}
#[tracing::instrument(skip_all, fields(repo_id = %id))]
pub async fn delete_repository(
Extension(agent): AgentExt,
Path(id): Path<String>,
) -> Result<Json<serde_json::Value>, StatusCode> {
let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
let db = &agent.db;
// Delete the repository
let result = db
.repositories()
.delete_one(doc! { "_id": oid })
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if result.deleted_count == 0 {
return Err(StatusCode::NOT_FOUND);
}
// Cascade delete all related data
let _ = db.findings().delete_many(doc! { "repo_id": &id }).await;
let _ = db.sbom_entries().delete_many(doc! { "repo_id": &id }).await;
let _ = db.scan_runs().delete_many(doc! { "repo_id": &id }).await;
let _ = db.cve_alerts().delete_many(doc! { "repo_id": &id }).await;
let _ = db
.tracker_issues()
.delete_many(doc! { "repo_id": &id })
.await;
let _ = db.graph_nodes().delete_many(doc! { "repo_id": &id }).await;
let _ = db.graph_edges().delete_many(doc! { "repo_id": &id }).await;
let _ = db.graph_builds().delete_many(doc! { "repo_id": &id }).await;
let _ = db
.impact_analyses()
.delete_many(doc! { "repo_id": &id })
.await;
let _ = db
.code_embeddings()
.delete_many(doc! { "repo_id": &id })
.await;
let _ = db
.embedding_builds()
.delete_many(doc! { "repo_id": &id })
.await;
// Cascade delete DAST targets linked to this repo, and all their downstream data
// (scan runs, findings, pentest sessions, attack chains, messages)
if let Ok(mut cursor) = db.dast_targets().find(doc! { "repo_id": &id }).await {
use futures_util::StreamExt;
while let Some(Ok(target)) = cursor.next().await {
let target_id = target.id.map(|oid| oid.to_hex()).unwrap_or_default();
if !target_id.is_empty() {
cascade_delete_dast_target(db, &target_id).await;
}
}
}
// Also delete pentest sessions linked directly to this repo (not via target)
if let Ok(mut cursor) = db.pentest_sessions().find(doc! { "repo_id": &id }).await {
use futures_util::StreamExt;
while let Some(Ok(session)) = cursor.next().await {
let session_id = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
if !session_id.is_empty() {
let _ = db
.attack_chain_nodes()
.delete_many(doc! { "session_id": &session_id })
.await;
let _ = db
.pentest_messages()
.delete_many(doc! { "session_id": &session_id })
.await;
// Delete DAST findings produced by this session
let _ = db
.dast_findings()
.delete_many(doc! { "session_id": &session_id })
.await;
}
}
}
let _ = db
.pentest_sessions()
.delete_many(doc! { "repo_id": &id })
.await;
Ok(Json(serde_json::json!({ "status": "deleted" })))
}
/// Cascade-delete a DAST target and all its downstream data.
async fn cascade_delete_dast_target(db: &crate::database::Database, target_id: &str) {
// Delete pentest sessions for this target (and their attack chains + messages)
if let Ok(mut cursor) = db
.pentest_sessions()
.find(doc! { "target_id": target_id })
.await
{
use futures_util::StreamExt;
while let Some(Ok(session)) = cursor.next().await {
let session_id = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
if !session_id.is_empty() {
let _ = db
.attack_chain_nodes()
.delete_many(doc! { "session_id": &session_id })
.await;
let _ = db
.pentest_messages()
.delete_many(doc! { "session_id": &session_id })
.await;
let _ = db
.dast_findings()
.delete_many(doc! { "session_id": &session_id })
.await;
}
}
}
let _ = db
.pentest_sessions()
.delete_many(doc! { "target_id": target_id })
.await;
// Delete DAST scan runs and their findings
let _ = db
.dast_findings()
.delete_many(doc! { "target_id": target_id })
.await;
let _ = db
.dast_scan_runs()
.delete_many(doc! { "target_id": target_id })
.await;
// Delete the target itself
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(target_id) {
let _ = db.dast_targets().delete_one(doc! { "_id": oid }).await;
}
}

View File

@@ -1,379 +0,0 @@
use axum::extract::{Extension, Query};
use axum::http::{header, StatusCode};
use axum::response::IntoResponse;
use axum::Json;
use mongodb::bson::doc;
use super::dto::*;
use compliance_core::models::SbomEntry;
const COPYLEFT_LICENSES: &[&str] = &[
"GPL-2.0",
"GPL-2.0-only",
"GPL-2.0-or-later",
"GPL-3.0",
"GPL-3.0-only",
"GPL-3.0-or-later",
"AGPL-3.0",
"AGPL-3.0-only",
"AGPL-3.0-or-later",
"LGPL-2.1",
"LGPL-2.1-only",
"LGPL-2.1-or-later",
"LGPL-3.0",
"LGPL-3.0-only",
"LGPL-3.0-or-later",
"MPL-2.0",
];
#[tracing::instrument(skip_all)]
pub async fn sbom_filters(
Extension(agent): AgentExt,
) -> Result<Json<serde_json::Value>, StatusCode> {
let db = &agent.db;
let managers: Vec<String> = db
.sbom_entries()
.distinct("package_manager", doc! {})
.await
.unwrap_or_default()
.into_iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.filter(|s| !s.is_empty() && s != "unknown" && s != "file")
.collect();
let licenses: Vec<String> = db
.sbom_entries()
.distinct("license", doc! {})
.await
.unwrap_or_default()
.into_iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.filter(|s| !s.is_empty())
.collect();
Ok(Json(serde_json::json!({
"package_managers": managers,
"licenses": licenses,
})))
}
#[tracing::instrument(skip_all, fields(repo_id = ?filter.repo_id, package_manager = ?filter.package_manager))]
pub async fn list_sbom(
Extension(agent): AgentExt,
Query(filter): Query<SbomFilter>,
) -> ApiResult<Vec<SbomEntry>> {
let db = &agent.db;
let mut query = doc! {};
if let Some(repo_id) = &filter.repo_id {
query.insert("repo_id", repo_id);
}
if let Some(pm) = &filter.package_manager {
query.insert("package_manager", pm);
}
if let Some(q) = &filter.q {
if !q.is_empty() {
query.insert("name", doc! { "$regex": q, "$options": "i" });
}
}
if let Some(has_vulns) = filter.has_vulns {
if has_vulns {
query.insert("known_vulnerabilities", doc! { "$exists": true, "$ne": [] });
} else {
query.insert("known_vulnerabilities", doc! { "$size": 0 });
}
}
if let Some(license) = &filter.license {
query.insert("license", license);
}
let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
let total = db
.sbom_entries()
.count_documents(query.clone())
.await
.unwrap_or(0);
let entries = match db
.sbom_entries()
.find(query)
.sort(doc! { "name": 1 })
.skip(skip)
.limit(filter.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch SBOM entries: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: entries,
total: Some(total),
page: Some(filter.page),
}))
}
#[tracing::instrument(skip_all)]
pub async fn export_sbom(
Extension(agent): AgentExt,
Query(params): Query<SbomExportParams>,
) -> Result<impl IntoResponse, StatusCode> {
let db = &agent.db;
let entries: Vec<SbomEntry> = match db
.sbom_entries()
.find(doc! { "repo_id": &params.repo_id })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch SBOM entries for export: {e}");
Vec::new()
}
};
let body = if params.format == "spdx" {
// SPDX 2.3 format
let packages: Vec<serde_json::Value> = entries
.iter()
.enumerate()
.map(|(i, e)| {
serde_json::json!({
"SPDXID": format!("SPDXRef-Package-{i}"),
"name": e.name,
"versionInfo": e.version,
"downloadLocation": "NOASSERTION",
"licenseConcluded": e.license.as_deref().unwrap_or("NOASSERTION"),
"externalRefs": e.purl.as_ref().map(|p| vec![serde_json::json!({
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": p,
})]).unwrap_or_default(),
})
})
.collect();
serde_json::json!({
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"name": format!("sbom-{}", params.repo_id),
"documentNamespace": format!("https://compliance-scanner/sbom/{}", params.repo_id),
"packages": packages,
})
} else {
// CycloneDX 1.5 format
let components: Vec<serde_json::Value> = entries
.iter()
.map(|e| {
let mut comp = serde_json::json!({
"type": "library",
"name": e.name,
"version": e.version,
"group": e.package_manager,
});
if let Some(purl) = &e.purl {
comp["purl"] = serde_json::Value::String(purl.clone());
}
if let Some(license) = &e.license {
comp["licenses"] = serde_json::json!([{ "license": { "id": license } }]);
}
if !e.known_vulnerabilities.is_empty() {
comp["vulnerabilities"] = serde_json::json!(
e.known_vulnerabilities.iter().map(|v| serde_json::json!({
"id": v.id,
"source": { "name": v.source },
"ratings": v.severity.as_ref().map(|s| vec![serde_json::json!({"severity": s})]).unwrap_or_default(),
})).collect::<Vec<_>>()
);
}
comp
})
.collect();
serde_json::json!({
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"version": 1,
"metadata": {
"component": {
"type": "application",
"name": format!("repo-{}", params.repo_id),
}
},
"components": components,
})
};
let json_str =
serde_json::to_string_pretty(&body).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let filename = if params.format == "spdx" {
format!("sbom-{}-spdx.json", params.repo_id)
} else {
format!("sbom-{}-cyclonedx.json", params.repo_id)
};
let disposition = format!("attachment; filename=\"{filename}\"");
Ok((
[
(
header::CONTENT_TYPE,
header::HeaderValue::from_static("application/json"),
),
(
header::CONTENT_DISPOSITION,
header::HeaderValue::from_str(&disposition)
.unwrap_or_else(|_| header::HeaderValue::from_static("attachment")),
),
],
json_str,
))
}
#[tracing::instrument(skip_all)]
pub async fn license_summary(
Extension(agent): AgentExt,
Query(params): Query<SbomFilter>,
) -> ApiResult<Vec<LicenseSummary>> {
let db = &agent.db;
let mut query = doc! {};
if let Some(repo_id) = &params.repo_id {
query.insert("repo_id", repo_id);
}
let entries: Vec<SbomEntry> = match db.sbom_entries().find(query).await {
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch SBOM entries for license summary: {e}");
Vec::new()
}
};
let mut license_map: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
for entry in &entries {
let lic = entry.license.as_deref().unwrap_or("Unknown").to_string();
license_map.entry(lic).or_default().push(entry.name.clone());
}
let mut summaries: Vec<LicenseSummary> = license_map
.into_iter()
.map(|(license, packages)| {
let is_copyleft = COPYLEFT_LICENSES
.iter()
.any(|c| license.to_uppercase().contains(&c.to_uppercase()));
LicenseSummary {
license,
count: packages.len() as u64,
is_copyleft,
packages,
}
})
.collect();
summaries.sort_by(|a, b| b.count.cmp(&a.count));
Ok(Json(ApiResponse {
data: summaries,
total: None,
page: None,
}))
}
#[tracing::instrument(skip_all)]
pub async fn sbom_diff(
Extension(agent): AgentExt,
Query(params): Query<SbomDiffParams>,
) -> ApiResult<SbomDiffResult> {
let db = &agent.db;
let entries_a: Vec<SbomEntry> = match db
.sbom_entries()
.find(doc! { "repo_id": &params.repo_a })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch SBOM entries for repo_a: {e}");
Vec::new()
}
};
let entries_b: Vec<SbomEntry> = match db
.sbom_entries()
.find(doc! { "repo_id": &params.repo_b })
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch SBOM entries for repo_b: {e}");
Vec::new()
}
};
// Build maps by (name, package_manager) -> version
let map_a: std::collections::HashMap<(String, String), String> = entries_a
.iter()
.map(|e| {
(
(e.name.clone(), e.package_manager.clone()),
e.version.clone(),
)
})
.collect();
let map_b: std::collections::HashMap<(String, String), String> = entries_b
.iter()
.map(|e| {
(
(e.name.clone(), e.package_manager.clone()),
e.version.clone(),
)
})
.collect();
let mut only_in_a = Vec::new();
let mut version_changed = Vec::new();
let mut common_count: u64 = 0;
for (key, ver_a) in &map_a {
match map_b.get(key) {
None => only_in_a.push(SbomDiffEntry {
name: key.0.clone(),
version: ver_a.clone(),
package_manager: key.1.clone(),
}),
Some(ver_b) if ver_a != ver_b => {
version_changed.push(SbomVersionDiff {
name: key.0.clone(),
package_manager: key.1.clone(),
version_a: ver_a.clone(),
version_b: ver_b.clone(),
});
}
Some(_) => common_count += 1,
}
}
let only_in_b: Vec<SbomDiffEntry> = map_b
.iter()
.filter(|(key, _)| !map_a.contains_key(key))
.map(|(key, ver)| SbomDiffEntry {
name: key.0.clone(),
version: ver.clone(),
package_manager: key.1.clone(),
})
.collect();
Ok(Json(ApiResponse {
data: SbomDiffResult {
only_in_a,
only_in_b,
version_changed,
common_count,
},
total: None,
page: None,
}))
}

View File

@@ -1,37 +0,0 @@
use axum::extract::{Extension, Query};
use axum::Json;
use mongodb::bson::doc;
use super::dto::*;
use compliance_core::models::ScanRun;
#[tracing::instrument(skip_all)]
pub async fn list_scan_runs(
Extension(agent): AgentExt,
Query(params): Query<PaginationParams>,
) -> ApiResult<Vec<ScanRun>> {
let db = &agent.db;
let skip = (params.page.saturating_sub(1)) * params.limit as u64;
let total = db.scan_runs().count_documents(doc! {}).await.unwrap_or(0);
let scans = match db
.scan_runs()
.find(doc! {})
.sort(doc! { "started_at": -1 })
.skip(skip)
.limit(params.limit)
.await
{
Ok(cursor) => collect_cursor_async(cursor).await,
Err(e) => {
tracing::warn!("Failed to fetch scan runs: {e}");
Vec::new()
}
};
Ok(Json(ApiResponse {
data: scans,
total: Some(total),
page: Some(params.page),
}))
}

View File

@@ -2,16 +2,11 @@ use axum::routing::{delete, get, patch, post};
use axum::Router;
use crate::api::handlers;
use crate::webhooks;
pub fn build_router() -> Router {
Router::new()
.route("/api/v1/health", get(handlers::health))
.route("/api/v1/stats/overview", get(handlers::stats_overview))
.route(
"/api/v1/settings/ssh-public-key",
get(handlers::get_ssh_public_key),
)
.route("/api/v1/repositories", get(handlers::list_repositories))
.route("/api/v1/repositories", post(handlers::add_repository))
.route(
@@ -20,11 +15,7 @@ pub fn build_router() -> Router {
)
.route(
"/api/v1/repositories/{id}",
delete(handlers::delete_repository).patch(handlers::update_repository),
)
.route(
"/api/v1/repositories/{id}/webhook-config",
get(handlers::get_webhook_config),
delete(handlers::delete_repository),
)
.route("/api/v1/findings", get(handlers::list_findings))
.route("/api/v1/findings/{id}", get(handlers::get_finding))
@@ -32,16 +23,7 @@ pub fn build_router() -> Router {
"/api/v1/findings/{id}/status",
patch(handlers::update_finding_status),
)
.route(
"/api/v1/findings/bulk-status",
patch(handlers::bulk_update_finding_status),
)
.route(
"/api/v1/findings/{id}/feedback",
patch(handlers::update_finding_feedback),
)
.route("/api/v1/sbom", get(handlers::list_sbom))
.route("/api/v1/sbom/filters", get(handlers::sbom_filters))
.route("/api/v1/sbom/export", get(handlers::export_sbom))
.route("/api/v1/sbom/licenses", get(handlers::license_summary))
.route("/api/v1/sbom/diff", get(handlers::sbom_diff))
@@ -99,93 +81,4 @@ pub fn build_router() -> Router {
"/api/v1/chat/{repo_id}/status",
get(handlers::chat::embedding_status),
)
// Help chat (documentation-grounded Q&A)
.route("/api/v1/help/chat", post(handlers::help_chat::help_chat))
// CVE notification endpoints
.route(
"/api/v1/notifications",
get(handlers::notifications::list_notifications),
)
.route(
"/api/v1/notifications/count",
get(handlers::notifications::notification_count),
)
.route(
"/api/v1/notifications/read-all",
post(handlers::notifications::mark_all_read),
)
.route(
"/api/v1/notifications/{id}/read",
patch(handlers::notifications::mark_read),
)
.route(
"/api/v1/notifications/{id}/dismiss",
patch(handlers::notifications::dismiss_notification),
)
// Pentest API endpoints
.route(
"/api/v1/pentest/lookup-repo",
get(handlers::pentest::lookup_repo),
)
.route(
"/api/v1/pentest/sessions",
get(handlers::pentest::list_sessions).post(handlers::pentest::create_session),
)
.route(
"/api/v1/pentest/sessions/{id}",
get(handlers::pentest::get_session),
)
.route(
"/api/v1/pentest/sessions/{id}/chat",
post(handlers::pentest::send_message),
)
.route(
"/api/v1/pentest/sessions/{id}/stop",
post(handlers::pentest::stop_session),
)
.route(
"/api/v1/pentest/sessions/{id}/pause",
post(handlers::pentest::pause_session),
)
.route(
"/api/v1/pentest/sessions/{id}/resume",
post(handlers::pentest::resume_session),
)
.route(
"/api/v1/pentest/sessions/{id}/stream",
get(handlers::pentest::session_stream),
)
.route(
"/api/v1/pentest/sessions/{id}/attack-chain",
get(handlers::pentest::get_attack_chain),
)
.route(
"/api/v1/pentest/sessions/{id}/messages",
get(handlers::pentest::get_messages),
)
.route(
"/api/v1/pentest/sessions/{id}/findings",
get(handlers::pentest::get_session_findings),
)
.route(
"/api/v1/pentest/sessions/{id}/export",
post(handlers::pentest::export_session_report),
)
.route(
"/api/v1/pentest/stats",
get(handlers::pentest::pentest_stats),
)
// Webhook endpoints (proxied through dashboard)
.route(
"/webhook/github/{repo_id}",
post(webhooks::github::handle_github_webhook),
)
.route(
"/webhook/gitlab/{repo_id}",
post(webhooks::gitlab::handle_gitlab_webhook),
)
.route(
"/webhook/gitea/{repo_id}",
post(webhooks::gitea::handle_gitea_webhook),
)
}

View File

@@ -42,22 +42,10 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
.unwrap_or(3001),
scan_schedule: env_var_opt("SCAN_SCHEDULE").unwrap_or_else(|| "0 0 */6 * * *".to_string()),
cve_monitor_schedule: env_var_opt("CVE_MONITOR_SCHEDULE")
.unwrap_or_else(|| "0 0 * * * *".to_string()),
.unwrap_or_else(|| "0 0 0 * * *".to_string()),
git_clone_base_path: env_var_opt("GIT_CLONE_BASE_PATH")
.unwrap_or_else(|| "/tmp/compliance-scanner/repos".to_string()),
ssh_key_path: env_var_opt("SSH_KEY_PATH")
.unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
keycloak_url: env_var_opt("KEYCLOAK_URL"),
keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
keycloak_admin_username: env_var_opt("KEYCLOAK_ADMIN_USERNAME"),
keycloak_admin_password: env_secret_opt("KEYCLOAK_ADMIN_PASSWORD"),
pentest_verification_email: env_var_opt("PENTEST_VERIFICATION_EMAIL"),
pentest_imap_host: env_var_opt("PENTEST_IMAP_HOST"),
pentest_imap_port: env_var_opt("PENTEST_IMAP_PORT").and_then(|p| p.parse().ok()),
pentest_imap_tls: env_var_opt("PENTEST_IMAP_TLS")
.map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(true),
pentest_imap_username: env_var_opt("PENTEST_IMAP_USERNAME"),
pentest_imap_password: env_secret_opt("PENTEST_IMAP_PASSWORD"),
})
}

View File

@@ -78,25 +78,6 @@ impl Database {
)
.await?;
// cve_notifications: unique cve_id + repo_id + package, status filter
self.cve_notifications()
.create_index(
IndexModel::builder()
.keys(
doc! { "cve_id": 1, "repo_id": 1, "package_name": 1, "package_version": 1 },
)
.options(IndexOptions::builder().unique(true).build())
.build(),
)
.await?;
self.cve_notifications()
.create_index(
IndexModel::builder()
.keys(doc! { "status": 1, "created_at": -1 })
.build(),
)
.await?;
// tracker_issues: unique finding_id
self.tracker_issues()
.create_index(
@@ -185,38 +166,6 @@ impl Database {
)
.await?;
// pentest_sessions: compound (target_id, started_at DESC)
self.pentest_sessions()
.create_index(
IndexModel::builder()
.keys(doc! { "target_id": 1, "started_at": -1 })
.build(),
)
.await?;
// pentest_sessions: status index
self.pentest_sessions()
.create_index(IndexModel::builder().keys(doc! { "status": 1 }).build())
.await?;
// attack_chain_nodes: compound (session_id, node_id)
self.attack_chain_nodes()
.create_index(
IndexModel::builder()
.keys(doc! { "session_id": 1, "node_id": 1 })
.build(),
)
.await?;
// pentest_messages: compound (session_id, created_at)
self.pentest_messages()
.create_index(
IndexModel::builder()
.keys(doc! { "session_id": 1, "created_at": 1 })
.build(),
)
.await?;
tracing::info!("Database indexes ensured");
Ok(())
}
@@ -241,12 +190,6 @@ impl Database {
self.inner.collection("cve_alerts")
}
pub fn cve_notifications(
&self,
) -> Collection<compliance_core::models::notification::CveNotification> {
self.inner.collection("cve_notifications")
}
pub fn tracker_issues(&self) -> Collection<TrackerIssue> {
self.inner.collection("tracker_issues")
}
@@ -292,19 +235,6 @@ impl Database {
self.inner.collection("embedding_builds")
}
// Pentest collections
pub fn pentest_sessions(&self) -> Collection<PentestSession> {
self.inner.collection("pentest_sessions")
}
pub fn attack_chain_nodes(&self) -> Collection<AttackChainNode> {
self.inner.collection("attack_chain_nodes")
}
pub fn pentest_messages(&self) -> Collection<PentestMessage> {
self.inner.collection("pentest_messages")
}
#[allow(dead_code)]
pub fn raw_collection(&self, name: &str) -> Collection<mongodb::bson::Document> {
self.inner.collection(name)

View File

@@ -1,16 +0,0 @@
// Library entrypoint — re-exports for integration tests and the binary.
pub mod agent;
pub mod api;
pub mod config;
pub mod database;
pub mod error;
pub mod llm;
pub mod pentest;
pub mod pipeline;
pub mod rag;
pub mod scheduler;
pub mod ssh;
#[allow(dead_code)]
pub mod trackers;
pub mod webhooks;

View File

@@ -1,15 +1,66 @@
use secrecy::{ExposeSecret, SecretString};
use serde::{Deserialize, Serialize};
use super::types::*;
use crate::error::AgentError;
#[derive(Clone)]
pub struct LlmClient {
pub(crate) base_url: String,
pub(crate) api_key: SecretString,
pub(crate) model: String,
pub(crate) embed_model: String,
pub(crate) http: reqwest::Client,
base_url: String,
api_key: SecretString,
model: String,
embed_model: String,
http: reqwest::Client,
}
#[derive(Serialize)]
struct ChatMessage {
role: String,
content: String,
}
#[derive(Serialize)]
struct ChatCompletionRequest {
model: String,
messages: Vec<ChatMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
max_tokens: Option<u32>,
}
#[derive(Deserialize)]
struct ChatCompletionResponse {
choices: Vec<ChatChoice>,
}
#[derive(Deserialize)]
struct ChatChoice {
message: ChatResponseMessage,
}
#[derive(Deserialize)]
struct ChatResponseMessage {
content: String,
}
/// Request body for the embeddings API
#[derive(Serialize)]
struct EmbeddingRequest {
model: String,
input: Vec<String>,
}
/// Response from the embeddings API
#[derive(Deserialize)]
struct EmbeddingResponse {
data: Vec<EmbeddingData>,
}
/// A single embedding result
#[derive(Deserialize)]
struct EmbeddingData {
embedding: Vec<f64>,
index: usize,
}
impl LlmClient {
@@ -28,142 +79,46 @@ impl LlmClient {
}
}
pub(crate) fn chat_url(&self) -> String {
format!(
"{}/v1/chat/completions",
self.base_url.trim_end_matches('/')
)
pub fn embed_model(&self) -> &str {
&self.embed_model
}
pub(crate) fn auth_header(&self) -> Option<String> {
let key = self.api_key.expose_secret();
if key.is_empty() {
None
} else {
Some(format!("Bearer {key}"))
}
}
/// Simple chat: system + user prompt → text response
pub async fn chat(
&self,
system_prompt: &str,
user_prompt: &str,
temperature: Option<f64>,
) -> Result<String, AgentError> {
let messages = vec![
ChatMessage {
role: "system".to_string(),
content: Some(system_prompt.to_string()),
tool_calls: None,
tool_call_id: None,
},
ChatMessage {
role: "user".to_string(),
content: Some(user_prompt.to_string()),
tool_calls: None,
tool_call_id: None,
},
];
let url = format!(
"{}/v1/chat/completions",
self.base_url.trim_end_matches('/')
);
let request_body = ChatCompletionRequest {
model: self.model.clone(),
messages,
temperature,
max_tokens: Some(4096),
tools: None,
};
self.send_chat_request(&request_body).await.map(|resp| {
match resp {
LlmResponse::Content(c) => c,
LlmResponse::ToolCalls { .. } => String::new(), // shouldn't happen without tools
}
})
}
/// Chat with a list of (role, content) messages → text response
#[allow(dead_code)]
pub async fn chat_with_messages(
&self,
messages: Vec<(String, String)>,
temperature: Option<f64>,
) -> Result<String, AgentError> {
let messages = messages
.into_iter()
.map(|(role, content)| ChatMessage {
role,
content: Some(content),
tool_calls: None,
tool_call_id: None,
})
.collect();
let request_body = ChatCompletionRequest {
model: self.model.clone(),
messages,
temperature,
max_tokens: Some(4096),
tools: None,
};
self.send_chat_request(&request_body)
.await
.map(|resp| match resp {
LlmResponse::Content(c) => c,
LlmResponse::ToolCalls { .. } => String::new(),
})
}
/// Chat with tool definitions — returns either content or tool calls.
/// Use this for the AI pentest orchestrator loop.
pub async fn chat_with_tools(
&self,
messages: Vec<ChatMessage>,
tools: &[ToolDefinition],
temperature: Option<f64>,
max_tokens: Option<u32>,
) -> Result<LlmResponse, AgentError> {
let tool_payloads: Vec<ToolDefinitionPayload> = tools
.iter()
.map(|t| ToolDefinitionPayload {
r#type: "function".to_string(),
function: ToolFunctionPayload {
name: t.name.clone(),
description: t.description.clone(),
parameters: t.parameters.clone(),
messages: vec![
ChatMessage {
role: "system".to_string(),
content: system_prompt.to_string(),
},
})
.collect();
let request_body = ChatCompletionRequest {
model: self.model.clone(),
messages,
ChatMessage {
role: "user".to_string(),
content: user_prompt.to_string(),
},
],
temperature,
max_tokens: Some(max_tokens.unwrap_or(8192)),
tools: if tool_payloads.is_empty() {
None
} else {
Some(tool_payloads)
},
max_tokens: Some(4096),
};
self.send_chat_request(&request_body).await
}
/// Internal method to send a chat completion request and parse the response
async fn send_chat_request(
&self,
request_body: &ChatCompletionRequest,
) -> Result<LlmResponse, AgentError> {
let mut req = self
.http
.post(self.chat_url())
.post(&url)
.header("content-type", "application/json")
.json(request_body);
.json(&request_body);
if let Some(auth) = self.auth_header() {
req = req.header("Authorization", auth);
let key = self.api_key.expose_secret();
if !key.is_empty() {
req = req.header("Authorization", format!("Bearer {key}"));
}
let resp = req
@@ -184,34 +139,110 @@ impl LlmClient {
.await
.map_err(|e| AgentError::Other(format!("Failed to parse LiteLLM response: {e}")))?;
let choice = body
.choices
body.choices
.first()
.ok_or_else(|| AgentError::Other("Empty response from LiteLLM".to_string()))?;
.map(|c| c.message.content.clone())
.ok_or_else(|| AgentError::Other("Empty response from LiteLLM".to_string()))
}
// Check for tool calls first
if let Some(tool_calls) = &choice.message.tool_calls {
if !tool_calls.is_empty() {
let calls: Vec<LlmToolCall> = tool_calls
.iter()
.map(|tc| {
let arguments = serde_json::from_str(&tc.function.arguments)
.unwrap_or(serde_json::Value::Object(serde_json::Map::new()));
LlmToolCall {
id: tc.id.clone(),
name: tc.function.name.clone(),
arguments,
}
})
.collect();
// Capture any reasoning text the LLM included alongside tool calls
let reasoning = choice.message.content.clone().unwrap_or_default();
return Ok(LlmResponse::ToolCalls { calls, reasoning });
}
#[allow(dead_code)]
pub async fn chat_with_messages(
&self,
messages: Vec<(String, String)>,
temperature: Option<f64>,
) -> Result<String, AgentError> {
let url = format!(
"{}/v1/chat/completions",
self.base_url.trim_end_matches('/')
);
let request_body = ChatCompletionRequest {
model: self.model.clone(),
messages: messages
.into_iter()
.map(|(role, content)| ChatMessage { role, content })
.collect(),
temperature,
max_tokens: Some(4096),
};
let mut req = self
.http
.post(&url)
.header("content-type", "application/json")
.json(&request_body);
let key = self.api_key.expose_secret();
if !key.is_empty() {
req = req.header("Authorization", format!("Bearer {key}"));
}
// Otherwise return content
let content = choice.message.content.clone().unwrap_or_default();
Ok(LlmResponse::Content(content))
let resp = req
.send()
.await
.map_err(|e| AgentError::Other(format!("LiteLLM request failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(AgentError::Other(format!(
"LiteLLM returned {status}: {body}"
)));
}
let body: ChatCompletionResponse = resp
.json()
.await
.map_err(|e| AgentError::Other(format!("Failed to parse LiteLLM response: {e}")))?;
body.choices
.first()
.map(|c| c.message.content.clone())
.ok_or_else(|| AgentError::Other("Empty response from LiteLLM".to_string()))
}
/// Generate embeddings for a batch of texts
pub async fn embed(&self, texts: Vec<String>) -> Result<Vec<Vec<f64>>, AgentError> {
let url = format!("{}/v1/embeddings", self.base_url.trim_end_matches('/'));
let request_body = EmbeddingRequest {
model: self.embed_model.clone(),
input: texts,
};
let mut req = self
.http
.post(&url)
.header("content-type", "application/json")
.json(&request_body);
let key = self.api_key.expose_secret();
if !key.is_empty() {
req = req.header("Authorization", format!("Bearer {key}"));
}
let resp = req
.send()
.await
.map_err(|e| AgentError::Other(format!("Embedding request failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(AgentError::Other(format!(
"Embedding API returned {status}: {body}"
)));
}
let body: EmbeddingResponse = resp
.json()
.await
.map_err(|e| AgentError::Other(format!("Failed to parse embedding response: {e}")))?;
// Sort by index to maintain input order
let mut data = body.data;
data.sort_by_key(|d| d.index);
Ok(data.into_iter().map(|d| d.embedding).collect())
}
}

View File

@@ -5,20 +5,15 @@ use compliance_core::models::Finding;
use crate::error::AgentError;
use crate::llm::LlmClient;
const DESCRIPTION_SYSTEM_PROMPT: &str = r#"You are a security engineer writing a bug tracker issue for a developer to fix. Be direct and actionable — developers skim issue descriptions, so lead with what matters.
const DESCRIPTION_SYSTEM_PROMPT: &str = r#"You are a security engineer writing issue descriptions for a bug tracker. Generate a clear, actionable issue body in Markdown format that includes:
Format in Markdown:
1. **Summary**: 1-2 sentence overview
2. **Evidence**: Code location, snippet, and what was detected
3. **Impact**: What could happen if not fixed
4. **Remediation**: Step-by-step fix instructions
5. **References**: Relevant CWE/CVE links if applicable
1. **What**: 1 sentence — what's wrong and where (file:line)
2. **Why it matters**: 1-2 sentences — concrete impact if not fixed. Avoid generic "could lead to" phrasing; describe the specific attack or failure scenario.
3. **Fix**: The specific code change needed. Use a code block with the corrected code if possible. If the fix is configuration-based, show the exact config change.
4. **References**: CWE/CVE link if applicable (one line, not a section)
Rules:
- No filler paragraphs or background explanations
- No restating the finding title in the body
- Code blocks should show the FIX, not the vulnerable code (the developer can see that in the diff)
- If the remediation is a one-liner, just say it — don't wrap it in a section header"#;
Keep it concise and professional. Use code blocks for code snippets."#;
pub async fn generate_issue_description(
llm: &Arc<LlmClient>,

View File

@@ -1,74 +0,0 @@
use serde::{Deserialize, Serialize};
use super::client::LlmClient;
use crate::error::AgentError;
// ── Embedding types ────────────────────────────────────────────
#[derive(Serialize)]
struct EmbeddingRequest {
model: String,
input: Vec<String>,
}
#[derive(Deserialize)]
struct EmbeddingResponse {
data: Vec<EmbeddingData>,
}
#[derive(Deserialize)]
struct EmbeddingData {
embedding: Vec<f64>,
index: usize,
}
// ── Embedding implementation ───────────────────────────────────
impl LlmClient {
pub fn embed_model(&self) -> &str {
&self.embed_model
}
/// Generate embeddings for a batch of texts
pub async fn embed(&self, texts: Vec<String>) -> Result<Vec<Vec<f64>>, AgentError> {
let url = format!("{}/v1/embeddings", self.base_url.trim_end_matches('/'));
let request_body = EmbeddingRequest {
model: self.embed_model.clone(),
input: texts,
};
let mut req = self
.http
.post(&url)
.header("content-type", "application/json")
.json(&request_body);
if let Some(auth) = self.auth_header() {
req = req.header("Authorization", auth);
}
let resp = req
.send()
.await
.map_err(|e| AgentError::Other(format!("Embedding request failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(AgentError::Other(format!(
"Embedding API returned {status}: {body}"
)));
}
let body: EmbeddingResponse = resp
.json()
.await
.map_err(|e| AgentError::Other(format!("Failed to parse embedding response: {e}")))?;
let mut data = body.data;
data.sort_by_key(|d| d.index);
Ok(data.into_iter().map(|d| d.embedding).collect())
}
}

View File

@@ -5,24 +5,7 @@ use compliance_core::models::Finding;
use crate::error::AgentError;
use crate::llm::LlmClient;
const FIX_SYSTEM_PROMPT: &str = r#"You are a security engineer suggesting a code fix. Return ONLY the corrected code that replaces the vulnerable snippet — no explanations, no markdown fences, no before/after comparison.
Rules:
- The fix must be a drop-in replacement for the vulnerable code
- Preserve the original code's style, indentation, and naming conventions
- Add at most one brief inline comment on the changed line explaining the security fix
- If the fix requires importing a new module, include the import on a separate line prefixed with the language's comment syntax + "Add import: "
- Do not refactor, rename variables, or "improve" unrelated code
- If the vulnerability is a false positive and the code is actually safe, return the original code unchanged with a comment explaining why no fix is needed
Language-specific fix guidance:
- Rust: use `?` for error propagation, prefer `SecretString` for secrets, use parameterized queries with `sqlx`/`diesel`
- Python: use parameterized queries (never f-strings in SQL), use `secrets` module not `random`, use `subprocess.run([...])` list form, use `markupsafe.escape()` for HTML
- Go: use `sql.Query` with `$1`/`?` placeholders, use `crypto/rand` not `math/rand`, use `html/template` not `text/template`, return errors don't panic
- Java/Kotlin: use `PreparedStatement` with `?` params, use `SecureRandom`, use `Jsoup.clean()` for HTML sanitization, use `@Valid` for input validation
- Ruby: use ActiveRecord parameterized finders, use `SecureRandom`, use `ERB::Util.html_escape`, use `strong_parameters`
- PHP: use PDO prepared statements with `:param` or `?`, use `random_bytes()`/`random_int()`, use `htmlspecialchars()` with `ENT_QUOTES`, use `password_hash(PASSWORD_BCRYPT)`
- C/C++: use `snprintf` not `sprintf`, use bounds-checked APIs, free resources in reverse allocation order, use `memset_s` for secret cleanup"#;
const FIX_SYSTEM_PROMPT: &str = r#"You are a security engineer. Given a security finding with code context, suggest a concrete code fix. Return ONLY the fixed code snippet that can directly replace the vulnerable code. Include brief inline comments explaining the fix."#;
pub async fn suggest_fix(llm: &Arc<LlmClient>, finding: &Finding) -> Result<String, AgentError> {
let user_prompt = format!(

View File

@@ -1,16 +1,10 @@
pub mod client;
#[allow(dead_code)]
pub mod descriptions;
pub mod embedding;
#[allow(dead_code)]
pub mod fixes;
#[allow(dead_code)]
pub mod pr_review;
pub mod review_prompts;
pub mod triage;
pub mod types;
pub use client::LlmClient;
pub use types::{
ChatMessage, LlmResponse, ToolCallRequest, ToolCallRequestFunction, ToolDefinition,
};

View File

@@ -1,146 +0,0 @@
// System prompts for multi-pass LLM code review.
// Each pass focuses on a different aspect to avoid overloading a single prompt.
pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing a code diff. Report ONLY genuine logic bugs that would cause incorrect behavior at runtime.
Report:
- Off-by-one errors, wrong comparisons, missing edge cases that cause wrong results
- Incorrect control flow that produces wrong output (not style preferences)
- Actual race conditions with concrete shared-state mutation (not theoretical ones)
- Resource leaks where cleanup is truly missing (not just "could be improved")
- Wrong variable used (copy-paste errors) — must be provably wrong, not just suspicious
- Swallowed errors that silently hide failures in a way that matters
Do NOT report:
- Style, naming, formatting, documentation, or code organization preferences
- Theoretical issues without a concrete triggering scenario
- "Potential" problems that require assumptions not supported by the visible code
- Complexity or function length — that's a separate review pass
Language-idiomatic patterns that are NOT bugs (do not flag these):
- Rust: `||`/`&&` short-circuit evaluation, variable shadowing, `let` rebinding, `clone()`, `impl` blocks, `match` arms with guards, `?` operator chaining, `unsafe` blocks with safety comments
- Python: duck typing, EAFP pattern (try/except vs check-first), `*args`/`**kwargs`, walrus operator `:=`, truthiness checks on containers, bare `except:` in top-level handlers
- Go: multiple return values for errors, `if err != nil` patterns, goroutine + channel patterns, blank identifier `_`, named returns, `defer` for cleanup, `init()` functions
- Java/Kotlin: checked exception patterns, method overloading, `Optional` vs null checks, Kotlin `?.` safe calls, `!!` non-null assertions in tests, `when` exhaustive matching, companion objects, `lateinit`
- Ruby: monkey patching in libraries, method_missing, blocks/procs/lambdas, `rescue => e` patterns, `send`/`respond_to?` metaprogramming, `nil` checks via `&.` safe navigation
- PHP: loose comparisons with `==` (only flag if `===` was clearly intended), `@` error suppression in legacy code, `isset()`/`empty()` patterns, magic methods (`__get`, `__call`), array functions as callbacks
- C/C++: RAII patterns, move semantics, `const_cast`/`static_cast` in appropriate contexts, macro usage for platform compat, pointer arithmetic in low-level code, `goto` for cleanup in C
Severity guide:
- high: Will cause incorrect behavior in normal usage
- medium: Will cause incorrect behavior in edge cases
- low: Minor correctness concern with limited blast radius
Prefer returning [] over reporting low-confidence guesses. A false positive wastes more developer time than a missed low-severity issue.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing a code diff. Report ONLY exploitable security vulnerabilities with a realistic attack scenario.
Report:
- Injection vulnerabilities (SQL, command, XSS, template) where untrusted input reaches a sink
- Authentication/authorization bypasses with a concrete exploit path
- Sensitive data exposure: secrets in code, credentials in logs, PII leaks
- Insecure cryptography: weak algorithms, predictable randomness, hardcoded keys
- Path traversal, SSRF, open redirects — only where user input reaches the vulnerable API
- Unsafe deserialization of untrusted data
- Missing input validation at EXTERNAL trust boundaries (user input, API responses)
Do NOT report:
- Internal code that only handles trusted/validated data
- Hash functions used for non-security purposes (dedup fingerprints, cache keys, content addressing)
- Logging of non-sensitive operational data (finding titles, counts, performance metrics)
- "Information disclosure" for data that is already public or user-facing
- Code style, performance, or general quality issues
- Missing validation on internal function parameters (trust the caller within the same module/crate/package)
- Theoretical attacks that require preconditions not present in the code
Language-specific patterns that are NOT vulnerabilities (do not flag these):
- Python: `pickle` used on trusted internal data, `eval()`/`exec()` on hardcoded strings, `subprocess` with hardcoded commands, Django `mark_safe()` on static content, `assert` in non-security contexts
- Go: `crypto/rand` is secure (don't confuse with `math/rand`), `sql.DB` with parameterized queries is safe, `http.ListenAndServe` without TLS in dev/internal, error strings in responses (Go convention)
- Java/Kotlin: Spring Security annotations are sufficient auth checks, `@Transactional` provides atomicity, JPA parameterized queries are safe, Kotlin `require()`/`check()` are assertion patterns not vulnerabilities
- Ruby: Rails `params.permit()` is input validation, `render html:` with `html_safe` on generated content, ActiveRecord parameterized finders are safe, Devise/Warden patterns for auth
- PHP: PDO prepared statements are safe, Laravel Eloquent is parameterized, `htmlspecialchars()` is XSS mitigation, Symfony security voters are auth checks, `password_hash()`/`password_verify()` are correct bcrypt usage
- C/C++: `strncpy`/`snprintf` are bounds-checked (vs `strcpy`/`sprintf`), smart pointers manage memory, RAII handles cleanup, `static_assert` is compile-time only, OpenSSL with proper context setup
- Rust: `sha2`/`blake3` for fingerprinting is not "weak crypto", `unsafe` with documented invariants, `secrecy::SecretString` properly handles secrets
Severity guide:
- critical: Remote code execution, auth bypass, or data breach with no preconditions
- high: Exploitable vulnerability requiring minimal preconditions
- medium: Vulnerability requiring specific conditions or limited impact
Prefer returning [] over reporting speculative vulnerabilities. Every false positive erodes trust in the scanner.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking for convention violations that indicate likely bugs. Report ONLY deviations from the project's visible patterns that could cause real problems.
Report:
- Inconsistent error handling within the same module where the inconsistency could hide failures
- Public API that breaks the module's established contract (not just different style)
- Anti-patterns that are bugs in this language: e.g. `unwrap()` in Rust library code where the CI enforces `clippy::unwrap_used`, `any` defeating TypeScript's type system
Do NOT report:
- Style preferences, formatting, naming conventions, or documentation
- Code organization suggestions ("this function should be split")
- Patterns that are valid in the language even if you'd write them differently
- "Missing type annotations" unless the code literally won't compile or causes a type inference bug
Language-specific patterns that are conventional (do not flag these):
- Rust: variable shadowing, `||`/`&&` short-circuit, `let` rebinding, builder patterns, `clone()`, `From`/`Into` impl chains, `#[allow(...)]` attributes
- Python: `**kwargs` forwarding, `@property` setters, `__dunder__` methods, list comprehensions with conditions, `if TYPE_CHECKING` imports, `noqa` comments
- Go: stuttering names (`http.HTTPClient`) discouraged but not a bug, `context.Context` as first param, init() functions, `//nolint` directives, returning concrete types vs interfaces in internal code
- Java/Kotlin: builder pattern boilerplate, Lombok annotations (`@Data`, `@Builder`), Kotlin data classes, `companion object` factories, `@Suppress` annotations, checked exception wrapping
- Ruby: `attr_accessor` usage, `Enumerable` mixin patterns, `module_function`, `class << self` syntax, DSL blocks (Rake, RSpec, Sinatra routes)
- PHP: `__construct` with property promotion, Laravel facades, static factory methods, nullable types with `?`, attribute syntax `#[...]`
- C/C++: header guards vs `#pragma once`, forward declarations, `const` correctness patterns, template specialization, `auto` type deduction
Severity guide:
- medium: Convention violation that will likely cause a bug or maintenance problem
- low: Convention violation that is a minor concern
Return at most 3 findings. Prefer [] over marginal findings.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for complexity that is likely to cause bugs. Report ONLY complexity that makes the code demonstrably harder to reason about.
Report:
- Functions over 80 lines with multiple interleaved responsibilities (not just long)
- Deeply nested control flow (5+ levels) where flattening would prevent bugs
- Complex boolean expressions that a reader would likely misinterpret
Do NOT report:
- Functions that are long but linear and easy to follow
- Acceptable complexity: configuration setup, CLI parsing, test helpers, builder patterns
- Code that is complex because the problem is complex — only report if restructuring would reduce bug risk
- "This function does multiple things" unless you can identify a specific bug risk from the coupling
- Suggestions that would just move complexity elsewhere without reducing it
Severity guide:
- medium: Complexity that has a concrete risk of causing bugs during future changes
- low: Complexity that makes review harder but is unlikely to cause bugs
Return at most 2 findings. Prefer [] over reporting complexity that is justified.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#;
/// All review types with their prompts
pub const REVIEW_PASSES: &[(&str, &str)] = &[
("logic", LOGIC_REVIEW_PROMPT),
("security", SECURITY_REVIEW_PROMPT),
("convention", CONVENTION_REVIEW_PROMPT),
("complexity", COMPLEXITY_REVIEW_PROMPT),
];

View File

@@ -5,49 +5,13 @@ use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::GraphContext;
/// Maximum number of findings to include in a single LLM triage call.
const TRIAGE_CHUNK_SIZE: usize = 30;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
1. Is this a true positive? (yes/no)
2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
3. Brief remediation suggestion (1-2 sentences)
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a pragmatic security triage expert. Your job is to filter out noise and keep only findings that a developer should actually fix. Be aggressive about dismissing false positives — a clean, high-signal list is more valuable than a comprehensive one.
Actions:
- "confirm": True positive with real impact. Keep severity as-is.
- "downgrade": Real issue but over-reported severity. Lower it.
- "upgrade": Under-reported — higher severity warranted.
- "dismiss": False positive, not exploitable, or not actionable. Remove it.
Dismiss when:
- The scanner flagged a language idiom as a bug (see examples below)
- The finding is in test/example/generated/vendored code
- The "vulnerability" requires preconditions that don't exist in the code
- The finding is about code style, complexity, or theoretical concerns rather than actual bugs
- A hash function is used for non-security purposes (dedup, caching, content addressing)
- Internal logging of non-sensitive operational data is flagged as "information disclosure"
- The finding duplicates another finding already in the list
- Framework-provided security is already in place (e.g. ORM parameterized queries, CSRF middleware, auth decorators)
Common false positive patterns by language (dismiss these):
- Rust: short-circuit `||`/`&&`, variable shadowing, `clone()`, `unsafe` with safety docs, `sha2` for fingerprinting
- Python: EAFP try/except, `subprocess` with hardcoded args, `pickle` on trusted data, Django `mark_safe` on static content
- Go: `if err != nil` is not "swallowed error", `crypto/rand` is secure, returning errors is not "information disclosure"
- Java/Kotlin: Spring Security annotations are valid auth, JPA parameterized queries are safe, Kotlin `!!` in tests is fine
- Ruby: Rails `params.permit` is validation, ActiveRecord finders are parameterized, `html_safe` on generated content
- PHP: PDO prepared statements are safe, Laravel Eloquent is parameterized, `htmlspecialchars` is XSS mitigation
- C/C++: `strncpy`/`snprintf` are bounds-checked, smart pointers manage memory, RAII handles cleanup
Confirm only when:
- You can describe a concrete scenario where the bug manifests or the vulnerability is exploitable
- The fix is actionable (developer can change specific code to resolve it)
- The finding is in production code that handles external input or sensitive data
Confidence scoring (0-10):
- 8-10: Certain true positive with clear exploit/bug scenario
- 5-7: Likely true positive, some assumptions required
- 3-4: Uncertain, needs manual review
- 0-2: Almost certainly a false positive
Respond with a JSON array, one entry per finding in the same order presented (no markdown fences):
[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "1-2 sentences", "remediation": "optional fix"}, ...]"#;
Respond in JSON format:
{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
pub async fn triage_findings(
llm: &Arc<LlmClient>,
@@ -56,162 +20,94 @@ pub async fn triage_findings(
) -> usize {
let mut passed = 0;
// Process findings in chunks to avoid overflowing the LLM context window.
for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
let chunk = &mut findings[chunk_start..chunk_end];
for finding in findings.iter_mut() {
let mut user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
);
// Build a combined prompt for the entire chunk.
let mut user_prompt = String::new();
let mut file_classifications: Vec<String> = Vec::new();
for (i, finding) in chunk.iter().enumerate() {
let file_classification = classify_file_path(finding.file_path.as_deref());
user_prompt.push_str(&format!(
"\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
i + 1,
finding.fingerprint,
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
));
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|i| i.finding_id == finding.fingerprint)
{
user_prompt.push_str(&format!(
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|im| im.finding_id == finding.fingerprint)
{
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
));
}
}
user_prompt.push('\n');
file_classifications.push(file_classification);
}
// Send the batch to the LLM.
match llm
.chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
.await
{
Ok(response) => {
// Strip markdown code fences if present (e.g. ```json ... ```)
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
cleaned
let inner = cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
.trim();
inner
} else {
cleaned
};
match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
Ok(results) => {
for (idx, finding) in chunk.iter_mut().enumerate() {
// Match result by position; fall back to keeping the finding.
let Some(result) = results.get(idx) else {
finding.status = FindingStatus::Triaged;
passed += 1;
continue;
};
let file_classification = file_classifications
.get(idx)
.map(|s| s.as_str())
.unwrap_or("unknown");
let adjusted_confidence =
adjust_confidence(result.confidence, file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale.clone());
if let Some(ref remediation) = result.remediation {
finding.remediation = Some(remediation.clone());
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
}
}
}
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
finding.confidence = Some(result.confidence);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
Err(_) => {
// Batch parse failure — keep all findings in the chunk.
tracing::warn!(
"Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
);
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
if result.confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
} else {
// If LLM response doesn't parse, keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!(
"Failed to parse triage response for {}: {response}",
finding.fingerprint
);
}
}
Err(e) => {
// On LLM error, keep all findings in the chunk.
tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
// On LLM error, keep the finding
tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
@@ -221,343 +117,12 @@ pub async fn triage_findings(
passed
}
/// Read ~50 lines of surrounding code from the file at the finding's location
fn read_surrounding_context(finding: &Finding) -> Option<String> {
let file_path = finding.file_path.as_deref()?;
let line = finding.line_number? as usize;
// Try to read the file — this works because the repo is cloned locally
let content = std::fs::read_to_string(file_path).ok()?;
let lines: Vec<&str> = content.lines().collect();
let start = line.saturating_sub(25);
let end = (line + 25).min(lines.len());
Some(
lines[start..end]
.iter()
.enumerate()
.map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
.collect::<Vec<_>>()
.join("\n"),
)
}
/// Classify a file path to inform triage confidence adjustment
fn classify_file_path(path: Option<&str>) -> String {
let path = match path {
Some(p) => p.to_lowercase(),
None => return "unknown".to_string(),
};
if path.contains("/test/")
|| path.contains("/tests/")
|| path.contains("_test.")
|| path.contains(".test.")
|| path.contains(".spec.")
|| path.contains("/fixtures/")
|| path.contains("/testdata/")
{
return "test".to_string();
}
if path.contains("/example")
|| path.contains("/examples/")
|| path.contains("/demo/")
|| path.contains("/sample")
{
return "example".to_string();
}
if path.contains("/generated/")
|| path.contains("/gen/")
|| path.contains(".generated.")
|| path.contains(".pb.go")
|| path.contains("_generated.rs")
{
return "generated".to_string();
}
if path.contains("/vendor/")
|| path.contains("/node_modules/")
|| path.contains("/third_party/")
{
return "vendored".to_string();
}
"production".to_string()
}
/// Adjust confidence based on file classification
fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 {
let multiplier = match classification {
"test" => 0.5,
"example" => 0.6,
"generated" => 0.3,
"vendored" => 0.4,
_ => 1.0,
};
raw_confidence * multiplier
}
fn downgrade_severity(
severity: &compliance_core::models::Severity,
) -> compliance_core::models::Severity {
use compliance_core::models::Severity;
match severity {
Severity::Critical => Severity::High,
Severity::High => Severity::Medium,
Severity::Medium => Severity::Low,
Severity::Low => Severity::Info,
Severity::Info => Severity::Info,
}
}
fn upgrade_severity(
severity: &compliance_core::models::Severity,
) -> compliance_core::models::Severity {
use compliance_core::models::Severity;
match severity {
Severity::Info => Severity::Low,
Severity::Low => Severity::Medium,
Severity::Medium => Severity::High,
Severity::High => Severity::Critical,
Severity::Critical => Severity::Critical,
}
}
#[derive(serde::Deserialize)]
struct TriageResult {
/// Finding fingerprint echoed back by the LLM (optional).
#[serde(default)]
#[allow(dead_code)]
id: String,
#[serde(default = "default_action")]
action: String,
true_positive: bool,
#[serde(default)]
confidence: f64,
#[serde(default)]
rationale: String,
remediation: Option<String>,
}
fn default_action() -> String {
"confirm".to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::Severity;
// ── classify_file_path ───────────────────────────────────────
#[test]
fn classify_none_path() {
assert_eq!(classify_file_path(None), "unknown");
}
#[test]
fn classify_production_path() {
assert_eq!(classify_file_path(Some("src/main.rs")), "production");
assert_eq!(classify_file_path(Some("lib/core/engine.py")), "production");
}
#[test]
fn classify_test_paths() {
assert_eq!(classify_file_path(Some("src/test/helper.rs")), "test");
assert_eq!(classify_file_path(Some("src/tests/unit.rs")), "test");
assert_eq!(classify_file_path(Some("foo_test.go")), "test");
assert_eq!(classify_file_path(Some("bar.test.js")), "test");
assert_eq!(classify_file_path(Some("baz.spec.ts")), "test");
assert_eq!(
classify_file_path(Some("data/fixtures/sample.json")),
"test"
);
assert_eq!(classify_file_path(Some("src/testdata/input.txt")), "test");
}
#[test]
fn classify_example_paths() {
assert_eq!(
classify_file_path(Some("docs/examples/basic.rs")),
"example"
);
// /example matches because contains("/example")
assert_eq!(classify_file_path(Some("src/example/main.py")), "example");
assert_eq!(classify_file_path(Some("src/demo/run.sh")), "example");
assert_eq!(classify_file_path(Some("src/sample/lib.rs")), "example");
}
#[test]
fn classify_generated_paths() {
assert_eq!(
classify_file_path(Some("src/generated/api.rs")),
"generated"
);
assert_eq!(
classify_file_path(Some("proto/gen/service.go")),
"generated"
);
assert_eq!(classify_file_path(Some("api.generated.ts")), "generated");
assert_eq!(classify_file_path(Some("service.pb.go")), "generated");
assert_eq!(classify_file_path(Some("model_generated.rs")), "generated");
}
#[test]
fn classify_vendored_paths() {
// Implementation checks for /vendor/, /node_modules/, /third_party/ (with slashes)
assert_eq!(
classify_file_path(Some("src/vendor/lib/foo.go")),
"vendored"
);
assert_eq!(
classify_file_path(Some("src/node_modules/pkg/index.js")),
"vendored"
);
assert_eq!(
classify_file_path(Some("src/third_party/lib.c")),
"vendored"
);
}
#[test]
fn classify_is_case_insensitive() {
assert_eq!(classify_file_path(Some("src/TEST/Helper.rs")), "test");
assert_eq!(classify_file_path(Some("src/VENDOR/lib.go")), "vendored");
assert_eq!(
classify_file_path(Some("src/GENERATED/foo.ts")),
"generated"
);
}
// ── adjust_confidence ────────────────────────────────────────
#[test]
fn adjust_confidence_production() {
assert_eq!(adjust_confidence(8.0, "production"), 8.0);
}
#[test]
fn adjust_confidence_test() {
assert_eq!(adjust_confidence(10.0, "test"), 5.0);
}
#[test]
fn adjust_confidence_example() {
assert_eq!(adjust_confidence(10.0, "example"), 6.0);
}
#[test]
fn adjust_confidence_generated() {
assert_eq!(adjust_confidence(10.0, "generated"), 3.0);
}
#[test]
fn adjust_confidence_vendored() {
assert_eq!(adjust_confidence(10.0, "vendored"), 4.0);
}
#[test]
fn adjust_confidence_unknown_classification() {
assert_eq!(adjust_confidence(7.0, "unknown"), 7.0);
assert_eq!(adjust_confidence(7.0, "something_else"), 7.0);
}
#[test]
fn adjust_confidence_zero() {
assert_eq!(adjust_confidence(0.0, "test"), 0.0);
assert_eq!(adjust_confidence(0.0, "production"), 0.0);
}
// ── downgrade_severity ───────────────────────────────────────
#[test]
fn downgrade_severity_all_levels() {
assert_eq!(downgrade_severity(&Severity::Critical), Severity::High);
assert_eq!(downgrade_severity(&Severity::High), Severity::Medium);
assert_eq!(downgrade_severity(&Severity::Medium), Severity::Low);
assert_eq!(downgrade_severity(&Severity::Low), Severity::Info);
assert_eq!(downgrade_severity(&Severity::Info), Severity::Info);
}
#[test]
fn downgrade_severity_info_is_floor() {
// Downgrading Info twice should still be Info
let s = downgrade_severity(&Severity::Info);
assert_eq!(downgrade_severity(&s), Severity::Info);
}
// ── upgrade_severity ─────────────────────────────────────────
#[test]
fn upgrade_severity_all_levels() {
assert_eq!(upgrade_severity(&Severity::Info), Severity::Low);
assert_eq!(upgrade_severity(&Severity::Low), Severity::Medium);
assert_eq!(upgrade_severity(&Severity::Medium), Severity::High);
assert_eq!(upgrade_severity(&Severity::High), Severity::Critical);
assert_eq!(upgrade_severity(&Severity::Critical), Severity::Critical);
}
#[test]
fn upgrade_severity_critical_is_ceiling() {
let s = upgrade_severity(&Severity::Critical);
assert_eq!(upgrade_severity(&s), Severity::Critical);
}
// ── upgrade/downgrade roundtrip ──────────────────────────────
#[test]
fn upgrade_then_downgrade_is_identity_for_middle_values() {
for sev in [Severity::Low, Severity::Medium, Severity::High] {
assert_eq!(downgrade_severity(&upgrade_severity(&sev)), sev);
}
}
// ── TriageResult deserialization ─────────────────────────────
#[test]
fn triage_result_full() {
let json = r#"{"action":"dismiss","confidence":8.5,"rationale":"false positive","remediation":"remove code"}"#;
let r: TriageResult = serde_json::from_str(json).unwrap();
assert_eq!(r.action, "dismiss");
assert_eq!(r.confidence, 8.5);
assert_eq!(r.rationale, "false positive");
assert_eq!(r.remediation.as_deref(), Some("remove code"));
}
#[test]
fn triage_result_defaults() {
let json = r#"{}"#;
let r: TriageResult = serde_json::from_str(json).unwrap();
assert_eq!(r.action, "confirm");
assert_eq!(r.confidence, 0.0);
assert_eq!(r.rationale, "");
assert!(r.remediation.is_none());
}
#[test]
fn triage_result_partial() {
let json = r#"{"action":"downgrade","confidence":6.0}"#;
let r: TriageResult = serde_json::from_str(json).unwrap();
assert_eq!(r.action, "downgrade");
assert_eq!(r.confidence, 6.0);
assert_eq!(r.rationale, "");
assert!(r.remediation.is_none());
}
#[test]
fn triage_result_with_markdown_fences() {
// Simulate LLM wrapping response in markdown code fences
let raw = "```json\n{\"action\":\"upgrade\",\"confidence\":9,\"rationale\":\"critical\",\"remediation\":null}\n```";
let cleaned = raw
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
let r: TriageResult = serde_json::from_str(cleaned).unwrap();
assert_eq!(r.action, "upgrade");
assert_eq!(r.confidence, 9.0);
}
}

View File

@@ -1,369 +0,0 @@
use serde::{Deserialize, Serialize};
// ── Request types ──────────────────────────────────────────────
#[derive(Serialize, Clone, Debug)]
pub struct ChatMessage {
pub role: String,
pub content: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCallRequest>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_call_id: Option<String>,
}
#[derive(Serialize)]
pub(crate) struct ChatCompletionRequest {
pub(crate) model: String,
pub(crate) messages: Vec<ChatMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) temperature: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) max_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) tools: Option<Vec<ToolDefinitionPayload>>,
}
#[derive(Serialize)]
pub(crate) struct ToolDefinitionPayload {
pub(crate) r#type: String,
pub(crate) function: ToolFunctionPayload,
}
#[derive(Serialize)]
pub(crate) struct ToolFunctionPayload {
pub(crate) name: String,
pub(crate) description: String,
pub(crate) parameters: serde_json::Value,
}
// ── Response types ─────────────────────────────────────────────
#[derive(Deserialize)]
pub(crate) struct ChatCompletionResponse {
pub(crate) choices: Vec<ChatChoice>,
}
#[derive(Deserialize)]
pub(crate) struct ChatChoice {
pub(crate) message: ChatResponseMessage,
}
#[derive(Deserialize)]
pub(crate) struct ChatResponseMessage {
#[serde(default)]
pub(crate) content: Option<String>,
#[serde(default)]
pub(crate) tool_calls: Option<Vec<ToolCallResponse>>,
}
#[derive(Deserialize)]
pub(crate) struct ToolCallResponse {
pub(crate) id: String,
pub(crate) function: ToolCallFunction,
}
#[derive(Deserialize)]
pub(crate) struct ToolCallFunction {
pub(crate) name: String,
pub(crate) arguments: String,
}
// ── Public types for tool calling ──────────────────────────────
/// Definition of a tool that the LLM can invoke
#[derive(Debug, Clone, Serialize)]
pub struct ToolDefinition {
pub name: String,
pub description: String,
pub parameters: serde_json::Value,
}
/// A tool call request from the LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmToolCall {
pub id: String,
pub name: String,
pub arguments: serde_json::Value,
}
/// A tool call in the request message format (for sending back tool_calls in assistant messages)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallRequest {
pub id: String,
pub r#type: String,
pub function: ToolCallRequestFunction,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallRequestFunction {
pub name: String,
pub arguments: String,
}
/// Response from the LLM — either content or tool calls
#[derive(Debug, Clone)]
pub enum LlmResponse {
Content(String),
/// Tool calls with optional reasoning text from the LLM
ToolCalls {
calls: Vec<LlmToolCall>,
reasoning: String,
},
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// ── ChatMessage ──────────────────────────────────────────────
#[test]
fn chat_message_serializes_minimal() {
let msg = ChatMessage {
role: "user".to_string(),
content: Some("hello".to_string()),
tool_calls: None,
tool_call_id: None,
};
let v = serde_json::to_value(&msg).unwrap();
assert_eq!(v["role"], "user");
assert_eq!(v["content"], "hello");
// None fields with skip_serializing_if should be absent
assert!(v.get("tool_calls").is_none());
assert!(v.get("tool_call_id").is_none());
}
#[test]
fn chat_message_serializes_with_tool_calls() {
let msg = ChatMessage {
role: "assistant".to_string(),
content: None,
tool_calls: Some(vec![ToolCallRequest {
id: "call_1".to_string(),
r#type: "function".to_string(),
function: ToolCallRequestFunction {
name: "get_weather".to_string(),
arguments: r#"{"city":"NYC"}"#.to_string(),
},
}]),
tool_call_id: None,
};
let v = serde_json::to_value(&msg).unwrap();
assert!(v["tool_calls"].is_array());
assert_eq!(v["tool_calls"][0]["function"]["name"], "get_weather");
}
#[test]
fn chat_message_content_null_when_none() {
let msg = ChatMessage {
role: "assistant".to_string(),
content: None,
tool_calls: None,
tool_call_id: None,
};
let v = serde_json::to_value(&msg).unwrap();
assert!(v["content"].is_null());
}
// ── ToolDefinition ───────────────────────────────────────────
#[test]
fn tool_definition_serializes() {
let td = ToolDefinition {
name: "search".to_string(),
description: "Search the web".to_string(),
parameters: json!({"type": "object", "properties": {"q": {"type": "string"}}}),
};
let v = serde_json::to_value(&td).unwrap();
assert_eq!(v["name"], "search");
assert_eq!(v["parameters"]["type"], "object");
}
#[test]
fn tool_definition_empty_parameters() {
let td = ToolDefinition {
name: "noop".to_string(),
description: "".to_string(),
parameters: json!({}),
};
let v = serde_json::to_value(&td).unwrap();
assert_eq!(v["parameters"], json!({}));
}
// ── LlmToolCall ──────────────────────────────────────────────
#[test]
fn llm_tool_call_roundtrip() {
let call = LlmToolCall {
id: "tc_42".to_string(),
name: "run_scan".to_string(),
arguments: json!({"path": "/tmp", "verbose": true}),
};
let serialized = serde_json::to_string(&call).unwrap();
let deserialized: LlmToolCall = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.id, "tc_42");
assert_eq!(deserialized.name, "run_scan");
assert_eq!(deserialized.arguments["path"], "/tmp");
assert_eq!(deserialized.arguments["verbose"], true);
}
#[test]
fn llm_tool_call_empty_arguments() {
let call = LlmToolCall {
id: "tc_0".to_string(),
name: "noop".to_string(),
arguments: json!({}),
};
let rt: LlmToolCall = serde_json::from_str(&serde_json::to_string(&call).unwrap()).unwrap();
assert!(rt.arguments.as_object().unwrap().is_empty());
}
// ── ToolCallRequest / ToolCallRequestFunction ────────────────
#[test]
fn tool_call_request_roundtrip() {
let req = ToolCallRequest {
id: "call_abc".to_string(),
r#type: "function".to_string(),
function: ToolCallRequestFunction {
name: "my_func".to_string(),
arguments: r#"{"x":1}"#.to_string(),
},
};
let json_str = serde_json::to_string(&req).unwrap();
let back: ToolCallRequest = serde_json::from_str(&json_str).unwrap();
assert_eq!(back.id, "call_abc");
assert_eq!(back.r#type, "function");
assert_eq!(back.function.name, "my_func");
assert_eq!(back.function.arguments, r#"{"x":1}"#);
}
#[test]
fn tool_call_request_type_field_serializes_as_type() {
let req = ToolCallRequest {
id: "id".to_string(),
r#type: "function".to_string(),
function: ToolCallRequestFunction {
name: "f".to_string(),
arguments: "{}".to_string(),
},
};
let v = serde_json::to_value(&req).unwrap();
// The field should be "type" in JSON, not "r#type"
assert!(v.get("type").is_some());
assert!(v.get("r#type").is_none());
}
// ── ChatCompletionRequest ────────────────────────────────────
#[test]
fn chat_completion_request_skips_none_fields() {
let req = ChatCompletionRequest {
model: "gpt-4".to_string(),
messages: vec![],
temperature: None,
max_tokens: None,
tools: None,
};
let v = serde_json::to_value(&req).unwrap();
assert_eq!(v["model"], "gpt-4");
assert!(v.get("temperature").is_none());
assert!(v.get("max_tokens").is_none());
assert!(v.get("tools").is_none());
}
#[test]
fn chat_completion_request_includes_set_fields() {
let req = ChatCompletionRequest {
model: "gpt-4".to_string(),
messages: vec![],
temperature: Some(0.7),
max_tokens: Some(1024),
tools: Some(vec![]),
};
let v = serde_json::to_value(&req).unwrap();
assert_eq!(v["temperature"], 0.7);
assert_eq!(v["max_tokens"], 1024);
assert!(v["tools"].is_array());
}
// ── ChatCompletionResponse deserialization ───────────────────
#[test]
fn chat_completion_response_deserializes_content() {
let json_str = r#"{"choices":[{"message":{"content":"Hello!"}}]}"#;
let resp: ChatCompletionResponse = serde_json::from_str(json_str).unwrap();
assert_eq!(resp.choices.len(), 1);
assert_eq!(resp.choices[0].message.content.as_deref(), Some("Hello!"));
assert!(resp.choices[0].message.tool_calls.is_none());
}
#[test]
fn chat_completion_response_deserializes_tool_calls() {
let json_str = r#"{
"choices": [{
"message": {
"tool_calls": [{
"id": "call_1",
"function": {"name": "search", "arguments": "{\"q\":\"rust\"}"}
}]
}
}]
}"#;
let resp: ChatCompletionResponse = serde_json::from_str(json_str).unwrap();
let tc = resp.choices[0].message.tool_calls.as_ref().unwrap();
assert_eq!(tc.len(), 1);
assert_eq!(tc[0].id, "call_1");
assert_eq!(tc[0].function.name, "search");
}
#[test]
fn chat_completion_response_defaults_missing_fields() {
// content and tool_calls are both missing — should default to None
let json_str = r#"{"choices":[{"message":{}}]}"#;
let resp: ChatCompletionResponse = serde_json::from_str(json_str).unwrap();
assert!(resp.choices[0].message.content.is_none());
assert!(resp.choices[0].message.tool_calls.is_none());
}
// ── LlmResponse ─────────────────────────────────────────────
#[test]
fn llm_response_content_variant() {
let resp = LlmResponse::Content("answer".to_string());
match resp {
LlmResponse::Content(s) => assert_eq!(s, "answer"),
_ => panic!("expected Content variant"),
}
}
#[test]
fn llm_response_tool_calls_variant() {
let resp = LlmResponse::ToolCalls {
calls: vec![LlmToolCall {
id: "1".to_string(),
name: "f".to_string(),
arguments: json!({}),
}],
reasoning: "because".to_string(),
};
match resp {
LlmResponse::ToolCalls { calls, reasoning } => {
assert_eq!(calls.len(), 1);
assert_eq!(reasoning, "because");
}
_ => panic!("expected ToolCalls variant"),
}
}
#[test]
fn llm_response_empty_content() {
let resp = LlmResponse::Content(String::new());
match resp {
LlmResponse::Content(s) => assert!(s.is_empty()),
_ => panic!("expected Content variant"),
}
}
}

View File

@@ -1,29 +1,25 @@
use compliance_agent::{agent, api, config, database, scheduler, ssh, webhooks};
mod agent;
mod api;
mod config;
mod database;
mod error;
mod llm;
mod pipeline;
mod rag;
mod scheduler;
#[allow(dead_code)]
mod trackers;
mod webhooks;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
match dotenvy::dotenv() {
Ok(path) => eprintln!("[dotenv] Loaded from: {}", path.display()),
Err(e) => eprintln!("[dotenv] FAILED: {e}"),
}
dotenvy::dotenv().ok();
let _telemetry_guard = compliance_core::telemetry::init_telemetry("compliance-agent");
// Log critical env vars at startup
tracing::info!(
chrome_ws_url = std::env::var("CHROME_WS_URL").ok().as_deref(),
pentest_email = std::env::var("PENTEST_VERIFICATION_EMAIL").ok().as_deref(),
encryption_key_set = std::env::var("PENTEST_ENCRYPTION_KEY").is_ok(),
"Loading configuration..."
);
tracing::info!("Loading configuration...");
let config = config::load_config()?;
// Ensure SSH key pair exists for cloning private repos
match ssh::ensure_ssh_key(&config.ssh_key_path) {
Ok(pubkey) => tracing::info!("SSH public key: {}", pubkey.trim()),
Err(e) => tracing::warn!("SSH key generation skipped: {e}"),
}
tracing::info!("Connecting to MongoDB...");
let db = database::Database::connect(&config.mongodb_uri, &config.mongodb_database).await?;
db.ensure_indexes().await?;

View File

@@ -1,484 +0,0 @@
use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
use compliance_core::AgentConfig;
use secrecy::ExposeSecret;
use tracing::{info, warn};
/// Attempt to delete a test user created during a pentest session.
///
/// Routes to the appropriate identity provider based on `TestUserRecord.provider`.
/// Falls back to browser-based cleanup if no API credentials are available.
///
/// Returns `Ok(true)` if the user was deleted, `Ok(false)` if skipped, `Err` on failure.
pub async fn cleanup_test_user(
user: &TestUserRecord,
config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
if user.cleaned_up {
return Ok(false);
}
let provider = user.provider.as_ref();
match provider {
Some(IdentityProvider::Keycloak) => cleanup_keycloak(user, config, http).await,
Some(IdentityProvider::Auth0) => cleanup_auth0(user, config, http).await,
Some(IdentityProvider::Okta) => cleanup_okta(user, config, http).await,
Some(IdentityProvider::Firebase) => {
warn!("Firebase user cleanup not yet implemented");
Ok(false)
}
Some(IdentityProvider::Custom) | None => {
// For custom/unknown providers, try Keycloak if configured, else skip
if config.keycloak_url.is_some() && config.keycloak_admin_username.is_some() {
cleanup_keycloak(user, config, http).await
} else {
warn!(
username = user.username.as_deref(),
"No identity provider configured for cleanup — skipping"
);
Ok(false)
}
}
}
}
/// Delete a user from Keycloak via the Admin REST API.
///
/// Flow: get admin token → search user by username → delete by ID.
async fn cleanup_keycloak(
user: &TestUserRecord,
config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let base_url = config
.keycloak_url
.as_deref()
.ok_or("KEYCLOAK_URL not configured")?;
let realm = config
.keycloak_realm
.as_deref()
.ok_or("KEYCLOAK_REALM not configured")?;
let admin_user = config
.keycloak_admin_username
.as_deref()
.ok_or("KEYCLOAK_ADMIN_USERNAME not configured")?;
let admin_pass = config
.keycloak_admin_password
.as_ref()
.ok_or("KEYCLOAK_ADMIN_PASSWORD not configured")?;
let username = user
.username
.as_deref()
.ok_or("No username in test user record")?;
info!(username, realm, "Cleaning up Keycloak test user");
// Step 1: Get admin access token
let token_url = format!("{base_url}/realms/master/protocol/openid-connect/token");
let token_resp = http
.post(&token_url)
.form(&[
("grant_type", "password"),
("client_id", "admin-cli"),
("username", admin_user),
("password", admin_pass.expose_secret()),
])
.send()
.await
.map_err(|e| format!("Keycloak token request failed: {e}"))?;
if !token_resp.status().is_success() {
let status = token_resp.status();
let body = token_resp.text().await.unwrap_or_default();
return Err(format!("Keycloak admin auth failed ({status}): {body}"));
}
let token_body: serde_json::Value = token_resp
.json()
.await
.map_err(|e| format!("Failed to parse Keycloak token: {e}"))?;
let access_token = token_body
.get("access_token")
.and_then(|v| v.as_str())
.ok_or("No access_token in Keycloak response")?;
// Step 2: Search for user by username
let search_url =
format!("{base_url}/admin/realms/{realm}/users?username={username}&exact=true");
let search_resp = http
.get(&search_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Keycloak user search failed: {e}"))?;
if !search_resp.status().is_success() {
let status = search_resp.status();
let body = search_resp.text().await.unwrap_or_default();
return Err(format!("Keycloak user search failed ({status}): {body}"));
}
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Keycloak users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User '{username}' not found in Keycloak realm '{realm}'"))?;
// Step 3: Delete the user
let delete_url = format!("{base_url}/admin/realms/{realm}/users/{user_id}");
let delete_resp = http
.delete(&delete_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Keycloak user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(username, user_id, "Keycloak test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Keycloak delete failed ({status}): {body}"))
}
}
/// Delete a user from Auth0 via the Management API.
///
/// Requires `AUTH0_DOMAIN`, `AUTH0_CLIENT_ID`, `AUTH0_CLIENT_SECRET` env vars.
async fn cleanup_auth0(
user: &TestUserRecord,
_config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let domain = std::env::var("AUTH0_DOMAIN").map_err(|_| "AUTH0_DOMAIN not set")?;
let client_id = std::env::var("AUTH0_CLIENT_ID").map_err(|_| "AUTH0_CLIENT_ID not set")?;
let client_secret =
std::env::var("AUTH0_CLIENT_SECRET").map_err(|_| "AUTH0_CLIENT_SECRET not set")?;
let email = user
.email
.as_deref()
.ok_or("No email in test user record for Auth0 lookup")?;
info!(email, "Cleaning up Auth0 test user");
// Get management API token
let token_resp = http
.post(format!("https://{domain}/oauth/token"))
.json(&serde_json::json!({
"grant_type": "client_credentials",
"client_id": client_id,
"client_secret": client_secret,
"audience": format!("https://{domain}/api/v2/"),
}))
.send()
.await
.map_err(|e| format!("Auth0 token request failed: {e}"))?;
let token_body: serde_json::Value = token_resp
.json()
.await
.map_err(|e| format!("Failed to parse Auth0 token: {e}"))?;
let access_token = token_body
.get("access_token")
.and_then(|v| v.as_str())
.ok_or("No access_token in Auth0 response")?;
// Search for user by email
let encoded_email = urlencoding::encode(email);
let search_url = format!("https://{domain}/api/v2/users-by-email?email={encoded_email}");
let search_resp = http
.get(&search_url)
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Auth0 user search failed: {e}"))?;
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Auth0 users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("user_id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User with email '{email}' not found in Auth0"))?;
// Delete
let encoded_id = urlencoding::encode(user_id);
let delete_resp = http
.delete(format!("https://{domain}/api/v2/users/{encoded_id}"))
.bearer_auth(access_token)
.send()
.await
.map_err(|e| format!("Auth0 user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(email, user_id, "Auth0 test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Auth0 delete failed ({status}): {body}"))
}
}
/// Delete a user from Okta via the Users API.
///
/// Requires `OKTA_DOMAIN`, `OKTA_API_TOKEN` env vars.
async fn cleanup_okta(
user: &TestUserRecord,
_config: &AgentConfig,
http: &reqwest::Client,
) -> Result<bool, String> {
let domain = std::env::var("OKTA_DOMAIN").map_err(|_| "OKTA_DOMAIN not set")?;
let api_token = std::env::var("OKTA_API_TOKEN").map_err(|_| "OKTA_API_TOKEN not set")?;
let username = user
.username
.as_deref()
.or(user.email.as_deref())
.ok_or("No username/email in test user record for Okta lookup")?;
info!(username, "Cleaning up Okta test user");
// Search user
let encoded = urlencoding::encode(username);
let search_url = format!("https://{domain}/api/v1/users?search=profile.login+eq+\"{encoded}\"");
let search_resp = http
.get(&search_url)
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await
.map_err(|e| format!("Okta user search failed: {e}"))?;
let users: Vec<serde_json::Value> = search_resp
.json()
.await
.map_err(|e| format!("Failed to parse Okta users: {e}"))?;
let user_id = users
.first()
.and_then(|u| u.get("id"))
.and_then(|v| v.as_str())
.ok_or_else(|| format!("User '{username}' not found in Okta"))?;
// Deactivate first (required by Okta before delete)
let _ = http
.post(format!(
"https://{domain}/api/v1/users/{user_id}/lifecycle/deactivate"
))
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await;
// Delete
let delete_resp = http
.delete(format!("https://{domain}/api/v1/users/{user_id}"))
.header("Authorization", format!("SSWS {api_token}"))
.send()
.await
.map_err(|e| format!("Okta user delete failed: {e}"))?;
if delete_resp.status().is_success() || delete_resp.status().as_u16() == 204 {
info!(username, user_id, "Okta test user deleted");
Ok(true)
} else {
let status = delete_resp.status();
let body = delete_resp.text().await.unwrap_or_default();
Err(format!("Okta delete failed ({status}): {body}"))
}
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::pentest::{IdentityProvider, TestUserRecord};
use secrecy::SecretString;
fn make_config_no_keycloak() -> AgentConfig {
AgentConfig {
mongodb_uri: String::new(),
mongodb_database: String::new(),
litellm_url: String::new(),
litellm_api_key: SecretString::from(String::new()),
litellm_model: String::new(),
litellm_embed_model: String::new(),
github_token: None,
github_webhook_secret: None,
gitlab_url: None,
gitlab_token: None,
gitlab_webhook_secret: None,
jira_url: None,
jira_email: None,
jira_api_token: None,
jira_project_key: None,
searxng_url: None,
nvd_api_key: None,
agent_port: 3001,
scan_schedule: String::new(),
cve_monitor_schedule: String::new(),
git_clone_base_path: String::new(),
ssh_key_path: String::new(),
keycloak_url: None,
keycloak_realm: None,
keycloak_admin_username: None,
keycloak_admin_password: None,
pentest_verification_email: None,
pentest_imap_host: None,
pentest_imap_port: None,
pentest_imap_tls: true,
pentest_imap_username: None,
pentest_imap_password: None,
}
}
#[tokio::test]
async fn already_cleaned_up_returns_false() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Keycloak),
cleaned_up: true,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn firebase_returns_false_not_implemented() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Firebase),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn no_provider_no_keycloak_skips() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: None,
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn custom_provider_no_keycloak_skips() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Custom),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert_eq!(result, Ok(false));
}
#[tokio::test]
async fn keycloak_missing_config_returns_error() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Keycloak),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("KEYCLOAK_URL")));
}
#[tokio::test]
async fn keycloak_missing_username_returns_error() {
let user = TestUserRecord {
username: None,
email: Some("test@example.com".into()),
provider_user_id: None,
provider: Some(IdentityProvider::Keycloak),
cleaned_up: false,
};
let mut config = make_config_no_keycloak();
config.keycloak_url = Some("http://localhost:8080".into());
config.keycloak_realm = Some("test".into());
config.keycloak_admin_username = Some("admin".into());
config.keycloak_admin_password = Some(SecretString::from("pass".to_string()));
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("username")));
}
#[tokio::test]
async fn auth0_missing_env_returns_error() {
let user = TestUserRecord {
username: None,
email: Some("test@example.com".into()),
provider_user_id: None,
provider: Some(IdentityProvider::Auth0),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("AUTH0_DOMAIN")));
}
#[tokio::test]
async fn okta_missing_env_returns_error() {
let user = TestUserRecord {
username: Some("test".into()),
email: None,
provider_user_id: None,
provider: Some(IdentityProvider::Okta),
cleaned_up: false,
};
let config = make_config_no_keycloak();
let http = reqwest::Client::new();
let result = cleanup_test_user(&user, &config, &http).await;
assert!(result.is_err());
assert!(result
.as_ref()
.err()
.is_some_and(|e| e.contains("OKTA_DOMAIN")));
}
}

View File

@@ -1,150 +0,0 @@
use futures_util::StreamExt;
use mongodb::bson::doc;
use compliance_core::models::dast::DastTarget;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::CodeContextHint;
use compliance_core::models::sbom::SbomEntry;
use super::orchestrator::PentestOrchestrator;
impl PentestOrchestrator {
/// Fetch SAST findings, SBOM entries (with CVEs), and code graph entry points
/// for the repo linked to this DAST target.
pub(crate) async fn gather_repo_context(
&self,
target: &DastTarget,
) -> (Vec<Finding>, Vec<SbomEntry>, Vec<CodeContextHint>) {
let Some(repo_id) = &target.repo_id else {
return (Vec::new(), Vec::new(), Vec::new());
};
let sast_findings = self.fetch_sast_findings(repo_id).await;
let sbom_entries = self.fetch_vulnerable_sbom(repo_id).await;
let code_context = self.fetch_code_context(repo_id, &sast_findings).await;
tracing::info!(
repo_id,
sast_findings = sast_findings.len(),
vulnerable_deps = sbom_entries.len(),
code_hints = code_context.len(),
"Gathered code-awareness context for pentest"
);
(sast_findings, sbom_entries, code_context)
}
/// Fetch open/triaged SAST findings for the repo (not false positives or resolved)
async fn fetch_sast_findings(&self, repo_id: &str) -> Vec<Finding> {
let cursor = self
.db
.findings()
.find(doc! {
"repo_id": repo_id,
"status": { "$in": ["open", "triaged"] },
})
.sort(doc! { "severity": -1 })
.limit(100)
.await;
match cursor {
Ok(mut c) => {
let mut results = Vec::new();
while let Some(Ok(f)) = c.next().await {
results.push(f);
}
results
}
Err(e) => {
tracing::warn!("Failed to fetch SAST findings for pentest: {e}");
Vec::new()
}
}
}
/// Fetch SBOM entries that have known vulnerabilities
async fn fetch_vulnerable_sbom(&self, repo_id: &str) -> Vec<SbomEntry> {
let cursor = self
.db
.sbom_entries()
.find(doc! {
"repo_id": repo_id,
"known_vulnerabilities": { "$exists": true, "$ne": [] },
})
.limit(50)
.await;
match cursor {
Ok(mut c) => {
let mut results = Vec::new();
while let Some(Ok(e)) = c.next().await {
results.push(e);
}
results
}
Err(e) => {
tracing::warn!("Failed to fetch vulnerable SBOM entries: {e}");
Vec::new()
}
}
}
/// Build CodeContextHint objects from the code knowledge graph.
/// Maps entry points to their source files and links SAST findings.
async fn fetch_code_context(
&self,
repo_id: &str,
sast_findings: &[Finding],
) -> Vec<CodeContextHint> {
// Get entry point nodes from the code graph
let cursor = self
.db
.graph_nodes()
.find(doc! {
"repo_id": repo_id,
"is_entry_point": true,
})
.limit(50)
.await;
let nodes = match cursor {
Ok(mut c) => {
let mut results = Vec::new();
while let Some(Ok(n)) = c.next().await {
results.push(n);
}
results
}
Err(_) => return Vec::new(),
};
// Build hints by matching graph nodes to SAST findings by file path
nodes
.into_iter()
.map(|node| {
// Find SAST findings in the same file
let linked_vulns: Vec<String> = sast_findings
.iter()
.filter(|f| f.file_path.as_deref() == Some(&node.file_path))
.map(|f| {
format!(
"[{}] {}: {} (line {})",
f.severity,
f.scanner,
f.title,
f.line_number.unwrap_or(0)
)
})
.collect();
CodeContextHint {
endpoint_pattern: node.qualified_name.clone(),
handler_function: node.name.clone(),
file_path: node.file_path.clone(),
code_snippet: String::new(), // Could fetch from embeddings
known_vulnerabilities: linked_vulns,
}
})
.collect()
}
}

View File

@@ -1,117 +0,0 @@
use aes_gcm::aead::AeadCore;
use aes_gcm::{
aead::{Aead, KeyInit, OsRng},
Aes256Gcm, Nonce,
};
use base64::Engine;
/// Load the 32-byte encryption key from PENTEST_ENCRYPTION_KEY env var.
/// Returns None if not set or invalid length.
pub fn load_encryption_key() -> Option<[u8; 32]> {
let hex_key = std::env::var("PENTEST_ENCRYPTION_KEY").ok()?;
let bytes = hex::decode(hex_key).ok()?;
if bytes.len() != 32 {
return None;
}
let mut key = [0u8; 32];
key.copy_from_slice(&bytes);
Some(key)
}
/// Encrypt a plaintext string. Returns base64-encoded nonce+ciphertext.
/// Returns the original string if no encryption key is available.
pub fn encrypt(plaintext: &str) -> String {
let Some(key_bytes) = load_encryption_key() else {
return plaintext.to_string();
};
let Ok(cipher) = Aes256Gcm::new_from_slice(&key_bytes) else {
return plaintext.to_string();
};
let nonce = Aes256Gcm::generate_nonce(&mut OsRng);
let Ok(ciphertext) = cipher.encrypt(&nonce, plaintext.as_bytes()) else {
return plaintext.to_string();
};
let mut combined = nonce.to_vec();
combined.extend_from_slice(&ciphertext);
base64::engine::general_purpose::STANDARD.encode(&combined)
}
/// Decrypt a base64-encoded nonce+ciphertext string.
/// Returns None if decryption fails.
pub fn decrypt(encrypted: &str) -> Option<String> {
let key_bytes = load_encryption_key()?;
let cipher = Aes256Gcm::new_from_slice(&key_bytes).ok()?;
let combined = base64::engine::general_purpose::STANDARD
.decode(encrypted)
.ok()?;
if combined.len() < 12 {
return None;
}
let (nonce_bytes, ciphertext) = combined.split_at(12);
let nonce = Nonce::from_slice(nonce_bytes);
let plaintext = cipher.decrypt(nonce, ciphertext).ok()?;
String::from_utf8(plaintext).ok()
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Mutex;
// Guard to serialize tests that touch env vars
static ENV_LOCK: Mutex<()> = Mutex::new(());
fn with_key<F: FnOnce()>(hex_key: &str, f: F) {
let _guard = ENV_LOCK.lock();
unsafe { std::env::set_var("PENTEST_ENCRYPTION_KEY", hex_key) };
f();
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
}
#[test]
fn round_trip() {
let key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
with_key(key, || {
let plaintext = "my_secret_password";
let encrypted = encrypt(plaintext);
assert_ne!(encrypted, plaintext);
let decrypted = decrypt(&encrypted);
assert_eq!(decrypted, Some(plaintext.to_string()));
});
}
#[test]
fn wrong_key_fails() {
let _guard = ENV_LOCK.lock();
let key1 = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
let key2 = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789";
let encrypted = {
unsafe { std::env::set_var("PENTEST_ENCRYPTION_KEY", key1) };
let e = encrypt("secret");
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
e
};
unsafe { std::env::set_var("PENTEST_ENCRYPTION_KEY", key2) };
assert!(decrypt(&encrypted).is_none());
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
}
#[test]
fn no_key_passthrough() {
let _guard = ENV_LOCK.lock();
unsafe { std::env::remove_var("PENTEST_ENCRYPTION_KEY") };
let result = encrypt("plain");
assert_eq!(result, "plain");
}
#[test]
fn corrupted_ciphertext() {
let key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
with_key(key, || {
assert!(decrypt("not-valid-base64!!!").is_none());
// Valid base64 but wrong content
let garbage = base64::engine::general_purpose::STANDARD.encode(b"tooshort");
assert!(decrypt(&garbage).is_none());
});
}
}

View File

@@ -1,9 +0,0 @@
pub mod cleanup;
mod context;
pub mod crypto;
pub mod orchestrator;
mod prompt_builder;
pub mod report;
pub use orchestrator::PentestOrchestrator;
pub use report::generate_encrypted_report;

View File

@@ -1,735 +0,0 @@
use std::sync::Arc;
use std::time::Duration;
use mongodb::bson::doc;
use tokio::sync::{broadcast, watch};
use compliance_core::models::dast::DastTarget;
use compliance_core::models::pentest::*;
use compliance_core::traits::pentest_tool::PentestToolContext;
use compliance_dast::ToolRegistry;
use crate::database::Database;
use crate::llm::{
ChatMessage, LlmClient, LlmResponse, ToolCallRequest, ToolCallRequestFunction, ToolDefinition,
};
/// Maximum duration for a single pentest session before timeout
const SESSION_TIMEOUT: Duration = Duration::from_secs(30 * 60); // 30 minutes
pub struct PentestOrchestrator {
pub(crate) tool_registry: ToolRegistry,
pub(crate) llm: Arc<LlmClient>,
pub(crate) db: Database,
pub(crate) event_tx: broadcast::Sender<PentestEvent>,
pub(crate) pause_rx: Option<watch::Receiver<bool>>,
}
impl PentestOrchestrator {
/// Create a new orchestrator with an externally-provided broadcast sender
/// and an optional pause receiver.
pub fn new(
llm: Arc<LlmClient>,
db: Database,
event_tx: broadcast::Sender<PentestEvent>,
pause_rx: Option<watch::Receiver<bool>>,
) -> Self {
Self {
tool_registry: ToolRegistry::new(),
llm,
db,
event_tx,
pause_rx,
}
}
/// Run a pentest session with timeout and automatic failure marking on errors.
pub async fn run_session_guarded(
&self,
session: &PentestSession,
target: &DastTarget,
initial_message: &str,
) {
let session_id = session.id;
// Use config-specified timeout or default
let timeout_duration = session
.config
.as_ref()
.and_then(|c| c.max_duration_minutes)
.map(|m| Duration::from_secs(m as u64 * 60))
.unwrap_or(SESSION_TIMEOUT);
let timeout_minutes = timeout_duration.as_secs() / 60;
match tokio::time::timeout(
timeout_duration,
self.run_session(session, target, initial_message),
)
.await
{
Ok(Ok(())) => {
tracing::info!(?session_id, "Pentest session completed successfully");
}
Ok(Err(e)) => {
tracing::error!(?session_id, error = %e, "Pentest session failed");
self.mark_session_failed(session_id, &format!("Error: {e}"))
.await;
let _ = self.event_tx.send(PentestEvent::Error {
message: format!("Session failed: {e}"),
});
}
Err(_) => {
let msg = format!("Session timed out after {timeout_minutes} minutes");
tracing::warn!(?session_id, "{msg}");
self.mark_session_failed(session_id, &msg).await;
let _ = self.event_tx.send(PentestEvent::Error { message: msg });
}
}
}
async fn mark_session_failed(
&self,
session_id: Option<mongodb::bson::oid::ObjectId>,
reason: &str,
) {
if let Some(sid) = session_id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"status": "failed",
"completed_at": mongodb::bson::DateTime::now(),
"error_message": reason,
}},
)
.await;
}
}
/// Check if the session is paused; if so, update DB status and wait until resumed.
async fn wait_if_paused(&self, session: &PentestSession) {
let Some(ref pause_rx) = self.pause_rx else {
return;
};
let mut rx = pause_rx.clone();
if !*rx.borrow() {
return;
}
// We are paused — update DB status
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(doc! { "_id": sid }, doc! { "$set": { "status": "paused" }})
.await;
}
let _ = self.event_tx.send(PentestEvent::Paused);
// Wait until unpaused
while *rx.borrow() {
if rx.changed().await.is_err() {
break;
}
}
// Resumed — update DB status back to running
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(doc! { "_id": sid }, doc! { "$set": { "status": "running" }})
.await;
}
let _ = self.event_tx.send(PentestEvent::Resumed);
}
async fn run_session(
&self,
session: &PentestSession,
target: &DastTarget,
initial_message: &str,
) -> Result<(), crate::error::AgentError> {
let session_id = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
// Gather code-awareness context from linked repo
let (sast_findings, sbom_entries, code_context) = self.gather_repo_context(target).await;
// Build system prompt with code context
let system_prompt = self
.build_system_prompt(
session,
target,
&sast_findings,
&sbom_entries,
&code_context,
)
.await;
// Build tool definitions for LLM
let tool_defs: Vec<ToolDefinition> = self
.tool_registry
.all_definitions()
.into_iter()
.map(|td| ToolDefinition {
name: td.name,
description: td.description,
parameters: td.input_schema,
})
.collect();
// Initialize messages
let mut messages = vec![
ChatMessage {
role: "system".to_string(),
content: Some(system_prompt),
tool_calls: None,
tool_call_id: None,
},
ChatMessage {
role: "user".to_string(),
content: Some(initial_message.to_string()),
tool_calls: None,
tool_call_id: None,
},
];
// Store user message
let user_msg = PentestMessage::user(session_id.clone(), initial_message.to_string());
let _ = self.db.pentest_messages().insert_one(&user_msg).await;
// Build tool context with real data
let tool_context = PentestToolContext {
target: target.clone(),
session_id: session_id.clone(),
sast_findings,
sbom_entries,
code_context,
rate_limit: target.rate_limit,
allow_destructive: target.allow_destructive,
};
let max_iterations = 50;
let mut total_findings = 0u32;
let mut total_tool_calls = 0u32;
let mut total_successes = 0u32;
let mut prev_node_ids: Vec<String> = Vec::new();
for _iteration in 0..max_iterations {
// Check pause state at top of each iteration
self.wait_if_paused(session).await;
let response = self
.llm
.chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192))
.await?;
match response {
LlmResponse::Content(content) => {
let msg = PentestMessage::assistant(session_id.clone(), content.clone());
let _ = self.db.pentest_messages().insert_one(&msg).await;
let _ = self.event_tx.send(PentestEvent::Message {
content: content.clone(),
});
messages.push(ChatMessage {
role: "assistant".to_string(),
content: Some(content.clone()),
tool_calls: None,
tool_call_id: None,
});
let done_indicators = [
"pentest complete",
"testing complete",
"scan complete",
"analysis complete",
"finished",
"that concludes",
];
let content_lower = content.to_lowercase();
if done_indicators
.iter()
.any(|ind| content_lower.contains(ind))
{
break;
}
break;
}
LlmResponse::ToolCalls {
calls: tool_calls,
reasoning,
} => {
let tc_requests: Vec<ToolCallRequest> = tool_calls
.iter()
.map(|tc| ToolCallRequest {
id: tc.id.clone(),
r#type: "function".to_string(),
function: ToolCallRequestFunction {
name: tc.name.clone(),
arguments: serde_json::to_string(&tc.arguments).unwrap_or_default(),
},
})
.collect();
messages.push(ChatMessage {
role: "assistant".to_string(),
content: if reasoning.is_empty() {
None
} else {
Some(reasoning.clone())
},
tool_calls: Some(tc_requests),
tool_call_id: None,
});
let mut current_batch_node_ids: Vec<String> = Vec::new();
for tc in &tool_calls {
total_tool_calls += 1;
let node_id = uuid::Uuid::new_v4().to_string();
let mut node = AttackChainNode::new(
session_id.clone(),
node_id.clone(),
tc.name.clone(),
tc.arguments.clone(),
reasoning.clone(),
);
// Link to previous iteration's nodes
node.parent_node_ids = prev_node_ids.clone();
node.status = AttackNodeStatus::Running;
node.started_at = Some(chrono::Utc::now());
let _ = self.db.attack_chain_nodes().insert_one(&node).await;
current_batch_node_ids.push(node_id.clone());
let _ = self.event_tx.send(PentestEvent::ToolStart {
node_id: node_id.clone(),
tool_name: tc.name.clone(),
input: tc.arguments.clone(),
});
let result = if let Some(tool) = self.tool_registry.get(&tc.name) {
match tool.execute(tc.arguments.clone(), &tool_context).await {
Ok(result) => {
total_successes += 1;
let findings_count = result.findings.len() as u32;
total_findings += findings_count;
let mut finding_ids: Vec<String> = Vec::new();
// Dedup findings within this tool result before inserting
let deduped_findings =
crate::pipeline::dedup::dedup_dast_findings(
result.findings,
);
for mut finding in deduped_findings {
finding.scan_run_id = session_id.clone();
finding.session_id = Some(session_id.clone());
// Check for existing duplicate in this session
let fp = crate::pipeline::dedup::compute_dast_fingerprint(
&finding,
);
let existing = self
.db
.dast_findings()
.find_one(doc! {
"session_id": &session_id,
"title": &finding.title,
"endpoint": &finding.endpoint,
"method": &finding.method,
})
.await;
if matches!(existing, Ok(Some(_))) {
tracing::debug!(
"Skipping duplicate DAST finding: {} (fp={:.12})",
finding.title,
fp,
);
continue;
}
let insert_result =
self.db.dast_findings().insert_one(&finding).await;
if let Ok(res) = &insert_result {
finding_ids.push(
res.inserted_id
.as_object_id()
.map(|oid| oid.to_hex())
.unwrap_or_default(),
);
}
let _ = self.event_tx.send(PentestEvent::Finding {
finding_id: finding
.id
.map(|oid| oid.to_hex())
.unwrap_or_default(),
title: finding.title.clone(),
severity: finding.severity.to_string(),
});
}
// Compute risk score based on findings severity
let risk_score: Option<u8> = if findings_count > 0 {
Some(std::cmp::min(
100,
(findings_count as u8)
.saturating_mul(15)
.saturating_add(20),
))
} else {
None
};
let _ = self.event_tx.send(PentestEvent::ToolComplete {
node_id: node_id.clone(),
summary: result.summary.clone(),
findings_count,
});
let finding_ids_bson: Vec<mongodb::bson::Bson> = finding_ids
.iter()
.map(|id| mongodb::bson::Bson::String(id.clone()))
.collect();
let mut update_doc = doc! {
"status": "completed",
"tool_output": mongodb::bson::to_bson(&result.data)
.unwrap_or(mongodb::bson::Bson::Null),
"completed_at": mongodb::bson::DateTime::now(),
"findings_produced": finding_ids_bson,
};
if let Some(rs) = risk_score {
update_doc.insert("risk_score", rs as i32);
}
let _ = self
.db
.attack_chain_nodes()
.update_one(
doc! {
"session_id": &session_id,
"node_id": &node_id,
},
doc! { "$set": update_doc },
)
.await;
// Build LLM-facing summary: strip large fields
// (screenshots, raw HTML) to save context window
let llm_data = summarize_tool_output(&result.data);
serde_json::json!({
"summary": result.summary,
"findings_count": findings_count,
"data": llm_data,
})
.to_string()
}
Err(e) => {
let _ = self
.db
.attack_chain_nodes()
.update_one(
doc! {
"session_id": &session_id,
"node_id": &node_id,
},
doc! { "$set": {
"status": "failed",
"completed_at": mongodb::bson::DateTime::now(),
}},
)
.await;
format!("Tool execution failed: {e}")
}
}
} else {
format!("Unknown tool: {}", tc.name)
};
messages.push(ChatMessage {
role: "tool".to_string(),
content: Some(result),
tool_calls: None,
tool_call_id: Some(tc.id.clone()),
});
}
// Advance parent links so next iteration's nodes connect to this batch
prev_node_ids = current_batch_node_ids;
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"tool_invocations": total_tool_calls as i64,
"tool_successes": total_successes as i64,
"findings_count": total_findings as i64,
}},
)
.await;
}
}
}
}
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": {
"status": "completed",
"completed_at": mongodb::bson::DateTime::now(),
"tool_invocations": total_tool_calls as i64,
"tool_successes": total_successes as i64,
"findings_count": total_findings as i64,
}},
)
.await;
}
// Clean up test user via identity provider API if requested
if session
.config
.as_ref()
.is_some_and(|c| c.auth.cleanup_test_user)
{
if let Some(ref test_user) = session.test_user {
let http = reqwest::Client::new();
// We need the AgentConfig — read from env since orchestrator doesn't hold it
let config = crate::config::load_config();
match config {
Ok(cfg) => {
match crate::pentest::cleanup::cleanup_test_user(test_user, &cfg, &http)
.await
{
Ok(true) => {
tracing::info!(
username = test_user.username.as_deref(),
"Test user cleaned up via provider API"
);
// Mark as cleaned up in DB
if let Some(sid) = session.id {
let _ = self
.db
.pentest_sessions()
.update_one(
doc! { "_id": sid },
doc! { "$set": { "test_user.cleaned_up": true } },
)
.await;
}
}
Ok(false) => {
tracing::info!(
"Test user cleanup skipped (no provider configured)"
);
}
Err(e) => {
tracing::warn!(error = %e, "Test user cleanup failed");
let _ = self.event_tx.send(PentestEvent::Error {
message: format!("Test user cleanup failed: {e}"),
});
}
}
}
Err(e) => {
tracing::warn!(error = %e, "Could not load config for cleanup");
}
}
}
}
// Clean up the persistent browser session for this pentest
compliance_dast::tools::browser::cleanup_browser_session(&session_id).await;
let _ = self.event_tx.send(PentestEvent::Complete {
summary: format!(
"Pentest complete. {} findings from {} tool invocations.",
total_findings, total_tool_calls
),
});
Ok(())
}
}
/// Strip large fields from tool output before sending to the LLM.
/// Screenshots, raw HTML, and other bulky data are replaced with short summaries.
/// The full data is still stored in the DB for the report.
fn summarize_tool_output(data: &serde_json::Value) -> serde_json::Value {
let Some(obj) = data.as_object() else {
return data.clone();
};
let mut summarized = serde_json::Map::new();
for (key, value) in obj {
match key.as_str() {
// Replace screenshot base64 with a placeholder
"screenshot_base64" => {
if let Some(s) = value.as_str() {
if !s.is_empty() {
summarized.insert(
key.clone(),
serde_json::Value::String(
"[screenshot captured and saved to report]".to_string(),
),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate raw HTML content
"html" => {
if let Some(s) = value.as_str() {
if s.len() > 2000 {
summarized.insert(
key.clone(),
serde_json::Value::String(format!(
"{}... [truncated, {} chars total]",
&s[..2000],
s.len()
)),
);
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Truncate page text
"text" if value.as_str().is_some_and(|s| s.len() > 1500) => {
let s = value.as_str().unwrap_or_default();
summarized.insert(
key.clone(),
serde_json::Value::String(format!("{}... [truncated]", &s[..1500])),
);
}
// Trim large arrays (e.g., "elements", "links", "inputs")
"elements" | "links" | "inputs" => {
if let Some(arr) = value.as_array() {
if arr.len() > 15 {
let mut trimmed: Vec<serde_json::Value> = arr[..15].to_vec();
trimmed.push(serde_json::json!(format!(
"... and {} more",
arr.len() - 15
)));
summarized.insert(key.clone(), serde_json::Value::Array(trimmed));
continue;
}
}
summarized.insert(key.clone(), value.clone());
}
// Recursively summarize nested objects (e.g., "page" in get_content)
_ if value.is_object() => {
summarized.insert(key.clone(), summarize_tool_output(value));
}
// Keep everything else as-is
_ => {
summarized.insert(key.clone(), value.clone());
}
}
}
serde_json::Value::Object(summarized)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_summarize_strips_screenshot() {
let input = json!({
"screenshot_base64": "iVBOR...",
"url": "https://example.com"
});
let result = summarize_tool_output(&input);
assert_eq!(
result["screenshot_base64"],
"[screenshot captured and saved to report]"
);
assert_eq!(result["url"], "https://example.com");
}
#[test]
fn test_summarize_truncates_html() {
let long_html = "x".repeat(3000);
let input = json!({ "html": long_html });
let result = summarize_tool_output(&input);
let s = result["html"].as_str().unwrap_or_default();
assert!(s.contains("[truncated, 3000 chars total]"));
assert!(s.starts_with(&"x".repeat(2000)));
assert!(s.len() < 3000);
}
#[test]
fn test_summarize_truncates_text() {
let long_text = "a".repeat(2000);
let input = json!({ "text": long_text });
let result = summarize_tool_output(&input);
let s = result["text"].as_str().unwrap_or_default();
assert!(s.contains("[truncated]"));
assert!(s.starts_with(&"a".repeat(1500)));
assert!(s.len() < 2000);
}
#[test]
fn test_summarize_trims_large_arrays() {
let elements: Vec<serde_json::Value> = (0..20).map(|i| json!(format!("el-{i}"))).collect();
let input = json!({ "elements": elements });
let result = summarize_tool_output(&input);
let arr = result["elements"].as_array();
assert!(arr.is_some());
if let Some(arr) = arr {
// 15 kept + 1 summary entry
assert_eq!(arr.len(), 16);
assert_eq!(arr[15], json!("... and 5 more"));
}
}
#[test]
fn test_summarize_preserves_small_data() {
let input = json!({
"url": "https://example.com",
"status": 200,
"title": "Example"
});
let result = summarize_tool_output(&input);
assert_eq!(result, input);
}
#[test]
fn test_summarize_recursive() {
let input = json!({
"page": {
"screenshot_base64": "iVBORw0KGgoAAAA...",
"url": "https://example.com"
}
});
let result = summarize_tool_output(&input);
assert_eq!(
result["page"]["screenshot_base64"],
"[screenshot captured and saved to report]"
);
assert_eq!(result["page"]["url"], "https://example.com");
}
#[test]
fn test_summarize_non_object() {
let string_val = json!("just a string");
assert_eq!(summarize_tool_output(&string_val), string_val);
let num_val = json!(42);
assert_eq!(summarize_tool_output(&num_val), num_val);
}
}

View File

@@ -1,637 +0,0 @@
use compliance_core::models::dast::DastTarget;
use compliance_core::models::finding::{Finding, FindingStatus, Severity};
use compliance_core::models::pentest::*;
use compliance_core::models::sbom::SbomEntry;
use super::orchestrator::PentestOrchestrator;
/// Attempt to decrypt a field; if decryption fails, return the original value
/// (which may be plaintext from before encryption was enabled).
fn decrypt_field(value: &str) -> String {
super::crypto::decrypt(value).unwrap_or_else(|| value.to_string())
}
/// Build additional prompt sections from PentestConfig when present.
fn build_config_sections(config: &PentestConfig) -> String {
let mut sections = String::new();
// Authentication section
match config.auth.mode {
AuthMode::Manual => {
sections.push_str("\n## Authentication\n");
sections.push_str("- **Mode**: Manual credentials\n");
if let Some(ref u) = config.auth.username {
let decrypted = decrypt_field(u);
sections.push_str(&format!("- **Username**: {decrypted}\n"));
}
if let Some(ref p) = config.auth.password {
let decrypted = decrypt_field(p);
sections.push_str(&format!("- **Password**: {decrypted}\n"));
}
sections.push_str(
"Use these credentials to log in before testing authenticated endpoints.\n",
);
}
AuthMode::AutoRegister => {
sections.push_str("\n## Authentication\n");
sections.push_str("- **Mode**: Auto-register\n");
if let Some(ref url) = config.auth.registration_url {
sections.push_str(&format!("- **Registration URL**: {url}\n"));
} else {
sections.push_str(
"- **Registration URL**: Not provided — use Playwright to discover the registration page.\n",
);
}
if let Some(ref email) = config.auth.verification_email {
sections.push_str(&format!(
"- **Verification Email**: Use plus-addressing from `{email}` \
(e.g. `{base}+{{session_id}}@{domain}`) for email verification. \
The system will poll the IMAP mailbox for verification links.\n",
base = email.split('@').next().unwrap_or(email),
domain = email.split('@').nth(1).unwrap_or("example.com"),
));
}
sections.push_str(
"Register a new test account using the registration page, then use it for testing.\n",
);
}
AuthMode::None => {}
}
// Custom headers
if !config.custom_headers.is_empty() {
sections.push_str("\n## Custom HTTP Headers\n");
sections.push_str("Include these headers in all HTTP requests:\n");
for (k, v) in &config.custom_headers {
sections.push_str(&format!("- `{k}: {v}`\n"));
}
}
// Scope exclusions
if !config.scope_exclusions.is_empty() {
sections.push_str("\n## Scope Exclusions\n");
sections.push_str("Do NOT test the following paths:\n");
for path in &config.scope_exclusions {
sections.push_str(&format!("- `{path}`\n"));
}
}
// Git context
if config.git_repo_url.is_some() || config.branch.is_some() || config.commit_hash.is_some() {
sections.push_str("\n## Git Context\n");
if let Some(ref url) = config.git_repo_url {
sections.push_str(&format!("- **Repository**: {url}\n"));
}
if let Some(ref branch) = config.branch {
sections.push_str(&format!("- **Branch**: {branch}\n"));
}
if let Some(ref commit) = config.commit_hash {
sections.push_str(&format!("- **Commit**: {commit}\n"));
}
}
// Environment
sections.push_str(&format!(
"\n## Environment\n- **Target environment**: {}\n",
config.environment
));
sections
}
/// Return strategy guidance text for the given strategy.
fn strategy_guidance(strategy: &PentestStrategy) -> &'static str {
match strategy {
PentestStrategy::Quick => {
"Focus on the most common and impactful vulnerabilities. Run a quick recon, then target the highest-risk areas."
}
PentestStrategy::Comprehensive => {
"Perform a thorough assessment covering all vulnerability types. Start with recon, then systematically test each attack surface."
}
PentestStrategy::Targeted => {
"Focus specifically on areas highlighted by SAST findings and known CVEs. Prioritize exploiting known weaknesses."
}
PentestStrategy::Aggressive => {
"Use all available tools aggressively. Test with maximum payloads and attempt full exploitation."
}
PentestStrategy::Stealth => {
"Minimize noise. Use fewer requests, avoid aggressive payloads. Focus on passive analysis and targeted probes."
}
}
}
/// Build the SAST findings section for the system prompt.
fn build_sast_section(sast_findings: &[Finding]) -> String {
if sast_findings.is_empty() {
return String::from("No SAST findings available for this target.");
}
let critical = sast_findings
.iter()
.filter(|f| f.severity == Severity::Critical)
.count();
let high = sast_findings
.iter()
.filter(|f| f.severity == Severity::High)
.count();
let mut section = format!(
"{} open findings ({} critical, {} high):\n",
sast_findings.len(),
critical,
high
);
// List the most important findings (critical/high first, up to 20)
for f in sast_findings.iter().take(20) {
let file_info = f
.file_path
.as_ref()
.map(|p| format!(" in {}:{}", p, f.line_number.unwrap_or(0)))
.unwrap_or_default();
let status_note = match f.status {
FindingStatus::Triaged => " [TRIAGED]",
_ => "",
};
section.push_str(&format!(
"- [{sev}] {title}{file}{status}\n",
sev = f.severity,
title = f.title,
file = file_info,
status = status_note,
));
if let Some(cwe) = &f.cwe {
section.push_str(&format!(" CWE: {cwe}\n"));
}
}
if sast_findings.len() > 20 {
section.push_str(&format!(
"... and {} more findings\n",
sast_findings.len() - 20
));
}
section
}
/// Build the SBOM/CVE section for the system prompt.
fn build_sbom_section(sbom_entries: &[SbomEntry]) -> String {
if sbom_entries.is_empty() {
return String::from("No vulnerable dependencies identified.");
}
let mut section = format!(
"{} dependencies with known vulnerabilities:\n",
sbom_entries.len()
);
for entry in sbom_entries.iter().take(15) {
let cve_ids: Vec<&str> = entry
.known_vulnerabilities
.iter()
.map(|v| v.id.as_str())
.collect();
section.push_str(&format!(
"- {} {} ({}): {}\n",
entry.name,
entry.version,
entry.package_manager,
cve_ids.join(", ")
));
}
if sbom_entries.len() > 15 {
section.push_str(&format!(
"... and {} more vulnerable dependencies\n",
sbom_entries.len() - 15
));
}
section
}
/// Build the code context section for the system prompt.
fn build_code_section(code_context: &[CodeContextHint]) -> String {
if code_context.is_empty() {
return String::from("No code knowledge graph available for this target.");
}
let with_vulns = code_context
.iter()
.filter(|c| !c.known_vulnerabilities.is_empty())
.count();
let mut section = format!(
"{} entry points identified ({} with linked SAST findings):\n",
code_context.len(),
with_vulns
);
for hint in code_context.iter().take(20) {
section.push_str(&format!(
"- {} ({})\n",
hint.endpoint_pattern, hint.file_path
));
for vuln in &hint.known_vulnerabilities {
section.push_str(&format!(" SAST: {vuln}\n"));
}
}
section
}
impl PentestOrchestrator {
pub(crate) async fn build_system_prompt(
&self,
session: &PentestSession,
target: &DastTarget,
sast_findings: &[Finding],
sbom_entries: &[SbomEntry],
code_context: &[CodeContextHint],
) -> String {
let tool_names = self.tool_registry.list_names().join(", ");
let guidance = strategy_guidance(&session.strategy);
let sast_section = build_sast_section(sast_findings);
let sbom_section = build_sbom_section(sbom_entries);
let code_section = build_code_section(code_context);
let config_sections = session
.config
.as_ref()
.map(build_config_sections)
.unwrap_or_default();
format!(
r#"You are an expert penetration tester conducting an authorized security assessment.
## Target
- **Name**: {target_name}
- **URL**: {base_url}
- **Type**: {target_type}
- **Rate Limit**: {rate_limit} req/s
- **Destructive Tests Allowed**: {allow_destructive}
- **Linked Repository**: {repo_linked}
## Strategy
{strategy_guidance}
## SAST Findings (Static Analysis)
{sast_section}
## Vulnerable Dependencies (SBOM)
{sbom_section}
## Code Entry Points (Knowledge Graph)
{code_section}
{config_sections}
## Available Tools
{tool_names}
## Instructions
1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
2. Run the OpenAPI parser to discover API endpoints from specs.
3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
4. If the target requires authentication (auto-register mode), use the browser tool to:
a. Navigate to the target — it will redirect to the login page.
b. Click the "Register" link to reach the registration form.
c. Fill all form fields (username, email with plus-addressing, password, name) one by one.
d. Click submit. If a Terms & Conditions page appears, accept it.
e. After registration, use the browser to navigate through the application pages.
f. **Take a screenshot after each major page** for evidence in the report.
5. Use the browser tool to explore the authenticated application — navigate to each section,
use get_content to understand the page structure, and take screenshots.
6. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
7. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
8. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
9. Test rate limiting on critical endpoints (login, API).
10. Check for console.log leakage in frontend JavaScript.
11. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
12. When testing is complete, provide a structured summary with severity and remediation.
13. Always explain your reasoning before invoking each tool.
14. When done, say "Testing complete" followed by a final summary.
## Browser Tool Usage
- The browser tab **persists** between calls — cookies and login state are preserved.
- After navigate, the response includes `elements` (links, inputs, buttons on the page).
- Use `get_content` to see forms, links, buttons, headings, and page text.
- Use `click` with CSS selectors to interact (e.g., `a:text('Register')`, `input[type='submit']`).
- Use `fill` with selector + value to fill form fields (e.g., `input[name='email']`).
- **Take screenshots** (`action: screenshot`) after important actions for evidence.
- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
page content with the browser tool to verify if it shows real data or a login redirect.
## Finding Quality Rules
- **Do not report the same issue twice.** If multiple tools detect the same missing header or
vulnerability on the same endpoint, report it ONCE with the most specific tool's output.
For example, if the recon tool and the header scanner both find missing HSTS, report it only
from the header scanner (more specific).
- **Group related findings.** Missing security headers on the same endpoint are ONE finding
("Missing security headers") listing all missing headers, not separate findings per header.
- **Severity must match real impact:**
- critical/high: Exploitable vulnerability (you can demonstrate the exploit)
- medium: Real misconfiguration with security implications but not directly exploitable
- low: Best-practice recommendation, defense-in-depth, or informational
- **Missing headers are medium at most** unless you can demonstrate a concrete exploit enabled
by the missing header (e.g., missing CSP + confirmed XSS = high for CSP finding).
- Console.log in third-party/vendored JS (node_modules, minified libraries) is informational only.
## Important
- This is an authorized penetration test. All testing is permitted within the target scope.
- Respect the rate limit of {rate_limit} requests per second.
- Only use destructive tests if explicitly allowed ({allow_destructive}).
- Use SAST findings to guide your testing — they tell you WHERE in the code vulnerabilities exist.
- Use SBOM data to understand what technologies and versions the target runs.
"#,
target_name = target.name,
base_url = target.base_url,
target_type = target.target_type,
rate_limit = target.rate_limit,
allow_destructive = target.allow_destructive,
repo_linked = target.repo_id.as_deref().unwrap_or("None"),
strategy_guidance = guidance,
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::finding::Severity;
use compliance_core::models::sbom::VulnRef;
use compliance_core::models::scan::ScanType;
fn make_finding(
severity: Severity,
title: &str,
file_path: Option<&str>,
line: Option<u32>,
status: FindingStatus,
cwe: Option<&str>,
) -> Finding {
let mut f = Finding::new(
"repo-1".into(),
format!("fp-{title}"),
"semgrep".into(),
ScanType::Sast,
title.into(),
"desc".into(),
severity,
);
f.file_path = file_path.map(|s| s.to_string());
f.line_number = line;
f.status = status;
f.cwe = cwe.map(|s| s.to_string());
f
}
fn make_sbom_entry(name: &str, version: &str, cves: &[&str]) -> SbomEntry {
let mut entry = SbomEntry::new("repo-1".into(), name.into(), version.into(), "npm".into());
entry.known_vulnerabilities = cves
.iter()
.map(|id| VulnRef {
id: id.to_string(),
source: "nvd".into(),
severity: None,
url: None,
})
.collect();
entry
}
fn make_code_hint(endpoint: &str, file: &str, vulns: Vec<String>) -> CodeContextHint {
CodeContextHint {
endpoint_pattern: endpoint.into(),
handler_function: "handler".into(),
file_path: file.into(),
code_snippet: String::new(),
known_vulnerabilities: vulns,
}
}
// ── strategy_guidance ────────────────────────────────────────────
#[test]
fn strategy_guidance_quick() {
let g = strategy_guidance(&PentestStrategy::Quick);
assert!(g.contains("most common"));
assert!(g.contains("quick recon"));
}
#[test]
fn strategy_guidance_comprehensive() {
let g = strategy_guidance(&PentestStrategy::Comprehensive);
assert!(g.contains("thorough assessment"));
}
#[test]
fn strategy_guidance_targeted() {
let g = strategy_guidance(&PentestStrategy::Targeted);
assert!(g.contains("SAST findings"));
assert!(g.contains("known CVEs"));
}
#[test]
fn strategy_guidance_aggressive() {
let g = strategy_guidance(&PentestStrategy::Aggressive);
assert!(g.contains("aggressively"));
assert!(g.contains("full exploitation"));
}
#[test]
fn strategy_guidance_stealth() {
let g = strategy_guidance(&PentestStrategy::Stealth);
assert!(g.contains("Minimize noise"));
assert!(g.contains("passive analysis"));
}
// ── build_sast_section ───────────────────────────────────────────
#[test]
fn sast_section_empty() {
let section = build_sast_section(&[]);
assert_eq!(section, "No SAST findings available for this target.");
}
#[test]
fn sast_section_single_critical() {
let findings = vec![make_finding(
Severity::Critical,
"SQL Injection",
Some("src/db.rs"),
Some(42),
FindingStatus::Open,
Some("CWE-89"),
)];
let section = build_sast_section(&findings);
assert!(section.contains("1 open findings (1 critical, 0 high)"));
assert!(section.contains("[critical] SQL Injection in src/db.rs:42"));
assert!(section.contains("CWE: CWE-89"));
}
#[test]
fn sast_section_triaged_finding_shows_marker() {
let findings = vec![make_finding(
Severity::High,
"XSS",
None,
None,
FindingStatus::Triaged,
None,
)];
let section = build_sast_section(&findings);
assert!(section.contains("[TRIAGED]"));
}
#[test]
fn sast_section_no_file_path_omits_location() {
let findings = vec![make_finding(
Severity::Medium,
"Open Redirect",
None,
None,
FindingStatus::Open,
None,
)];
let section = build_sast_section(&findings);
assert!(section.contains("- [medium] Open Redirect\n"));
assert!(!section.contains(" in "));
}
#[test]
fn sast_section_counts_critical_and_high() {
let findings = vec![
make_finding(
Severity::Critical,
"F1",
None,
None,
FindingStatus::Open,
None,
),
make_finding(
Severity::Critical,
"F2",
None,
None,
FindingStatus::Open,
None,
),
make_finding(Severity::High, "F3", None, None, FindingStatus::Open, None),
make_finding(
Severity::Medium,
"F4",
None,
None,
FindingStatus::Open,
None,
),
];
let section = build_sast_section(&findings);
assert!(section.contains("4 open findings (2 critical, 1 high)"));
}
#[test]
fn sast_section_truncates_at_20() {
let findings: Vec<Finding> = (0..25)
.map(|i| {
make_finding(
Severity::Low,
&format!("Finding {i}"),
None,
None,
FindingStatus::Open,
None,
)
})
.collect();
let section = build_sast_section(&findings);
assert!(section.contains("... and 5 more findings"));
// Should contain Finding 19 (the 20th) but not Finding 20 (the 21st)
assert!(section.contains("Finding 19"));
assert!(!section.contains("Finding 20"));
}
// ── build_sbom_section ───────────────────────────────────────────
#[test]
fn sbom_section_empty() {
let section = build_sbom_section(&[]);
assert_eq!(section, "No vulnerable dependencies identified.");
}
#[test]
fn sbom_section_single_entry() {
let entries = vec![make_sbom_entry("lodash", "4.17.20", &["CVE-2021-23337"])];
let section = build_sbom_section(&entries);
assert!(section.contains("1 dependencies with known vulnerabilities"));
assert!(section.contains("- lodash 4.17.20 (npm): CVE-2021-23337"));
}
#[test]
fn sbom_section_multiple_cves() {
let entries = vec![make_sbom_entry(
"openssl",
"1.1.1",
&["CVE-2022-0001", "CVE-2022-0002"],
)];
let section = build_sbom_section(&entries);
assert!(section.contains("CVE-2022-0001, CVE-2022-0002"));
}
#[test]
fn sbom_section_truncates_at_15() {
let entries: Vec<SbomEntry> = (0..18)
.map(|i| make_sbom_entry(&format!("pkg-{i}"), "1.0.0", &["CVE-2024-0001"]))
.collect();
let section = build_sbom_section(&entries);
assert!(section.contains("... and 3 more vulnerable dependencies"));
assert!(section.contains("pkg-14"));
assert!(!section.contains("pkg-15"));
}
// ── build_code_section ───────────────────────────────────────────
#[test]
fn code_section_empty() {
let section = build_code_section(&[]);
assert_eq!(
section,
"No code knowledge graph available for this target."
);
}
#[test]
fn code_section_single_entry_no_vulns() {
let hints = vec![make_code_hint("GET /api/users", "src/routes.rs", vec![])];
let section = build_code_section(&hints);
assert!(section.contains("1 entry points identified (0 with linked SAST findings)"));
assert!(section.contains("- GET /api/users (src/routes.rs)"));
}
#[test]
fn code_section_with_linked_vulns() {
let hints = vec![make_code_hint(
"POST /login",
"src/auth.rs",
vec!["[critical] semgrep: SQL Injection (line 15)".into()],
)];
let section = build_code_section(&hints);
assert!(section.contains("1 entry points identified (1 with linked SAST findings)"));
assert!(section.contains("SAST: [critical] semgrep: SQL Injection (line 15)"));
}
#[test]
fn code_section_counts_entries_with_vulns() {
let hints = vec![
make_code_hint("GET /a", "a.rs", vec!["vuln1".into()]),
make_code_hint("GET /b", "b.rs", vec![]),
make_code_hint("GET /c", "c.rs", vec!["vuln2".into(), "vuln3".into()]),
];
let section = build_code_section(&hints);
assert!(section.contains("3 entry points identified (2 with linked SAST findings)"));
}
#[test]
fn code_section_truncates_at_20() {
let hints: Vec<CodeContextHint> = (0..25)
.map(|i| make_code_hint(&format!("GET /ep{i}"), &format!("f{i}.rs"), vec![]))
.collect();
let section = build_code_section(&hints);
assert!(section.contains("GET /ep19"));
assert!(!section.contains("GET /ep20"));
}
}

View File

@@ -1,43 +0,0 @@
use std::io::{Cursor, Write};
use zip::write::SimpleFileOptions;
use zip::AesMode;
use super::ReportContext;
pub(super) fn build_zip(
ctx: &ReportContext,
password: &str,
html: &str,
pdf: &[u8],
) -> Result<Vec<u8>, zip::result::ZipError> {
let buf = Cursor::new(Vec::new());
let mut zip = zip::ZipWriter::new(buf);
let options = SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Deflated)
.with_aes_encryption(AesMode::Aes256, password);
// report.pdf (primary)
zip.start_file("report.pdf", options)?;
zip.write_all(pdf)?;
// report.html (fallback)
zip.start_file("report.html", options)?;
zip.write_all(html.as_bytes())?;
// findings.json
let findings_json =
serde_json::to_string_pretty(&ctx.findings).unwrap_or_else(|_| "[]".to_string());
zip.start_file("findings.json", options)?;
zip.write_all(findings_json.as_bytes())?;
// attack-chain.json
let chain_json =
serde_json::to_string_pretty(&ctx.attack_chain).unwrap_or_else(|_| "[]".to_string());
zip.start_file("attack-chain.json", options)?;
zip.write_all(chain_json.as_bytes())?;
let cursor = zip.finish()?;
Ok(cursor.into_inner())
}

View File

@@ -1,40 +0,0 @@
use super::html_escape;
pub(super) fn appendix(session_id: &str) -> String {
format!(
r##"<!-- ═══════════════ 5. APPENDIX ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">5.</span> Appendix</h2>
<h3>Severity Definitions</h3>
<table class="info">
<tr><td style="color: var(--sev-critical); font-weight: 700;">Critical</td><td>Vulnerabilities that can be exploited remotely without authentication to execute arbitrary code, exfiltrate sensitive data, or fully compromise the system.</td></tr>
<tr><td style="color: var(--sev-high); font-weight: 700;">High</td><td>Vulnerabilities that allow significant unauthorized access or data exposure, typically requiring minimal user interaction or privileges.</td></tr>
<tr><td style="color: var(--sev-medium); font-weight: 700;">Medium</td><td>Vulnerabilities that may lead to limited data exposure or require specific conditions to exploit, but still represent meaningful risk.</td></tr>
<tr><td style="color: var(--sev-low); font-weight: 700;">Low</td><td>Minor issues with limited direct impact. May contribute to broader attack chains or indicate defense-in-depth weaknesses.</td></tr>
<tr><td style="color: var(--sev-info); font-weight: 700;">Info</td><td>Observations and best-practice recommendations that do not represent direct security vulnerabilities.</td></tr>
</table>
<h3>Disclaimer</h3>
<p style="font-size: 9pt; color: var(--text-secondary);">
This report was generated by an automated AI-powered penetration testing engine. While the system
employs advanced techniques to identify vulnerabilities, no automated assessment can guarantee
complete coverage. The results should be reviewed by qualified security professionals and validated
in the context of the target application's threat model. Findings are point-in-time observations
and may change as the application evolves.
</p>
<!-- ═══════════════ FOOTER ═══════════════ -->
<div class="report-footer">
<div class="footer-company">Compliance Scanner</div>
<div>AI-Powered Security Assessment Platform</div>
<div style="margin-top: 6px;">This document is confidential and intended solely for the named recipient.</div>
<div>Report ID: {session_id}</div>
</div>
</div><!-- .report-body -->
</body>
</html>"##,
session_id = html_escape(session_id),
)
}

View File

@@ -1,193 +0,0 @@
use super::html_escape;
use compliance_core::models::pentest::AttackChainNode;
pub(super) fn attack_chain(chain: &[AttackChainNode]) -> String {
let chain_section = if chain.is_empty() {
r#"<p style="color: var(--text-muted);">No attack chain steps recorded.</p>"#.to_string()
} else {
build_chain_html(chain)
};
format!(
r##"<!-- ═══════════════ 4. ATTACK CHAIN ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">4.</span> Attack Chain Timeline</h2>
<p>
The following sequence shows each tool invocation made by the AI orchestrator during the assessment,
grouped by phase. Each step includes the tool's name, execution status, and the AI's reasoning
for choosing that action.
</p>
<div style="margin-top: 16px;">
{chain_section}
</div>"##
)
}
fn build_chain_html(chain: &[AttackChainNode]) -> String {
let mut chain_html = String::new();
// Compute phases via BFS from root nodes
let mut phase_map: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
let mut queue: std::collections::VecDeque<String> = std::collections::VecDeque::new();
for node in chain {
if node.parent_node_ids.is_empty() {
let nid = node.node_id.clone();
if !nid.is_empty() {
phase_map.insert(nid.clone(), 0);
queue.push_back(nid);
}
}
}
while let Some(nid) = queue.pop_front() {
let parent_phase = phase_map.get(&nid).copied().unwrap_or(0);
for node in chain {
if node.parent_node_ids.contains(&nid) {
let child_id = node.node_id.clone();
if !child_id.is_empty() && !phase_map.contains_key(&child_id) {
phase_map.insert(child_id.clone(), parent_phase + 1);
queue.push_back(child_id);
}
}
}
}
// Assign phase 0 to any unassigned nodes
for node in chain {
let nid = node.node_id.clone();
if !nid.is_empty() && !phase_map.contains_key(&nid) {
phase_map.insert(nid, 0);
}
}
// Group nodes by phase
let max_phase = phase_map.values().copied().max().unwrap_or(0);
let phase_labels = [
"Reconnaissance",
"Enumeration",
"Exploitation",
"Validation",
"Post-Exploitation",
];
for phase_idx in 0..=max_phase {
let phase_nodes: Vec<&AttackChainNode> = chain
.iter()
.filter(|n| {
let nid = n.node_id.clone();
phase_map.get(&nid).copied().unwrap_or(0) == phase_idx
})
.collect();
if phase_nodes.is_empty() {
continue;
}
let label = if phase_idx < phase_labels.len() {
phase_labels[phase_idx]
} else {
"Additional Testing"
};
chain_html.push_str(&format!(
r#"<div class="phase-block">
<div class="phase-header">
<span class="phase-num">Phase {}</span>
<span class="phase-label">{}</span>
<span class="phase-count">{} step{}</span>
</div>
<div class="phase-steps">"#,
phase_idx + 1,
label,
phase_nodes.len(),
if phase_nodes.len() == 1 { "" } else { "s" },
));
for (i, node) in phase_nodes.iter().enumerate() {
let status_label = format!("{:?}", node.status);
let status_class = match status_label.to_lowercase().as_str() {
"completed" => "step-completed",
"failed" => "step-failed",
_ => "step-running",
};
let findings_badge = if !node.findings_produced.is_empty() {
format!(
r#"<span class="step-findings">{} finding{}</span>"#,
node.findings_produced.len(),
if node.findings_produced.len() == 1 {
""
} else {
"s"
},
)
} else {
String::new()
};
let risk_badge = node
.risk_score
.map(|r| {
let risk_class = if r >= 70 {
"risk-high"
} else if r >= 40 {
"risk-med"
} else {
"risk-low"
};
format!(r#"<span class="step-risk {risk_class}">Risk: {r}</span>"#)
})
.unwrap_or_default();
let reasoning_html = if node.llm_reasoning.is_empty() {
String::new()
} else {
format!(
r#"<div class="step-reasoning">{}</div>"#,
html_escape(&node.llm_reasoning)
)
};
// Render inline screenshot if this is a browser screenshot action
let screenshot_html = if node.tool_name == "browser" {
node.tool_output
.as_ref()
.and_then(|out| out.get("screenshot_base64"))
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.map(|b64| {
format!(
r#"<div class="step-screenshot"><img src="data:image/png;base64,{b64}" alt="Browser screenshot" style="max-width:100%;border:1px solid #e2e8f0;border-radius:6px;margin-top:8px;"/></div>"#
)
})
.unwrap_or_default()
} else {
String::new()
};
chain_html.push_str(&format!(
r#"<div class="step-row">
<div class="step-num">{num}</div>
<div class="step-connector"></div>
<div class="step-content">
<div class="step-header">
<span class="step-tool">{tool_name}</span>
<span class="step-status {status_class}">{status_label}</span>
{findings_badge}
{risk_badge}
</div>
{reasoning_html}
{screenshot_html}
</div>
</div>"#,
num = i + 1,
tool_name = html_escape(&node.tool_name),
));
}
chain_html.push_str("</div></div>");
}
chain_html
}

View File

@@ -1,69 +0,0 @@
use super::html_escape;
pub(super) fn cover(
target_name: &str,
session_id: &str,
date_short: &str,
target_url: &str,
requester_name: &str,
requester_email: &str,
app_screenshot_b64: Option<&str>,
) -> String {
let screenshot_html = app_screenshot_b64
.filter(|s| !s.is_empty())
.map(|b64| {
format!(
r#"<div style="margin: 20px auto; max-width: 560px; border: 1px solid #cbd5e1; border-radius: 8px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.08);">
<img src="data:image/png;base64,{b64}" alt="Application screenshot" style="width:100%;display:block;"/>
</div>"#
)
})
.unwrap_or_default();
format!(
r##"<!-- ═══════════════ COVER PAGE ═══════════════ -->
<div class="cover">
<svg class="cover-shield" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 96 96">
<defs>
<linearGradient id="sg" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" stop-color="#0d2137"/>
<stop offset="100%" stop-color="#1a56db"/>
</linearGradient>
</defs>
<path d="M48 6 L22 22 L22 48 C22 66 34 80 48 86 C62 80 74 66 74 48 L74 22 Z"
fill="none" stroke="url(#sg)" stroke-width="3.5" stroke-linejoin="round"/>
<path d="M48 12 L26 26 L26 47 C26 63 36 76 48 82 C60 76 70 63 70 47 L70 26 Z"
fill="url(#sg)" opacity="0.07"/>
<circle cx="44" cy="44" r="11" fill="none" stroke="#0d2137" stroke-width="2.5"/>
<line x1="52" y1="52" x2="62" y2="62" stroke="#0d2137" stroke-width="2.5" stroke-linecap="round"/>
<path d="M39 44 L42.5 47.5 L49 41" fill="none" stroke="#166534" stroke-width="2.5"
stroke-linecap="round" stroke-linejoin="round"/>
</svg>
<div class="cover-tag">CONFIDENTIAL</div>
<div class="cover-title">Penetration Test Report</div>
<div class="cover-subtitle">{target_name}</div>
<div class="cover-divider"></div>
<div class="cover-meta">
<strong>Report ID:</strong> {session_id}<br>
<strong>Date:</strong> {date_short}<br>
<strong>Target:</strong> {target_url}<br>
<strong>Prepared for:</strong> {requester_name} ({requester_email})
</div>
{screenshot_html}
<div class="cover-footer">
Compliance Scanner &mdash; AI-Powered Security Assessment Platform
</div>
</div>"##,
target_name = html_escape(target_name),
session_id = html_escape(session_id),
date_short = date_short,
target_url = html_escape(target_url),
requester_name = html_escape(requester_name),
requester_email = html_escape(requester_email),
)
}

View File

@@ -1,238 +0,0 @@
use super::html_escape;
use compliance_core::models::dast::DastFinding;
pub(super) fn executive_summary(
findings: &[DastFinding],
target_name: &str,
target_url: &str,
tool_count: usize,
tool_invocations: u32,
success_rate: f64,
) -> String {
let critical = findings
.iter()
.filter(|f| f.severity.to_string() == "critical")
.count();
let high = findings
.iter()
.filter(|f| f.severity.to_string() == "high")
.count();
let medium = findings
.iter()
.filter(|f| f.severity.to_string() == "medium")
.count();
let low = findings
.iter()
.filter(|f| f.severity.to_string() == "low")
.count();
let info = findings
.iter()
.filter(|f| f.severity.to_string() == "info")
.count();
let exploitable = findings.iter().filter(|f| f.exploitable).count();
let total = findings.len();
let overall_risk = if critical > 0 {
"CRITICAL"
} else if high > 0 {
"HIGH"
} else if medium > 0 {
"MEDIUM"
} else if low > 0 {
"LOW"
} else {
"INFORMATIONAL"
};
let risk_color = match overall_risk {
"CRITICAL" => "#991b1b",
"HIGH" => "#c2410c",
"MEDIUM" => "#a16207",
"LOW" => "#1d4ed8",
_ => "#4b5563",
};
let risk_score: usize =
std::cmp::min(100, critical * 25 + high * 15 + medium * 8 + low * 3 + info);
let severity_bar = build_severity_bar(critical, high, medium, low, info, total);
// Table of contents finding sub-entries
let severity_order = ["critical", "high", "medium", "low", "info"];
let toc_findings_sub = if !findings.is_empty() {
let mut sub = String::new();
let mut fnum = 0usize;
for &sev_key in severity_order.iter() {
let count = findings
.iter()
.filter(|f| f.severity.to_string() == sev_key)
.count();
if count == 0 {
continue;
}
for f in findings
.iter()
.filter(|f| f.severity.to_string() == sev_key)
{
fnum += 1;
sub.push_str(&format!(
r#"<div class="toc-sub">F-{:03} — {}</div>"#,
fnum,
html_escape(&f.title),
));
}
}
sub
} else {
String::new()
};
let critical_high_str = format!("{} / {}", critical, high);
let escaped_target_name = html_escape(target_name);
let escaped_target_url = html_escape(target_url);
format!(
r##"<!-- ═══════════════ TABLE OF CONTENTS ═══════════════ -->
<div class="report-body">
<div class="toc">
<h2>Table of Contents</h2>
<div class="toc-entry"><span class="toc-num">1</span><span class="toc-label">Executive Summary</span></div>
<div class="toc-entry"><span class="toc-num">2</span><span class="toc-label">Scope &amp; Methodology</span></div>
<div class="toc-entry"><span class="toc-num">3</span><span class="toc-label">Findings ({total_findings})</span></div>
{toc_findings_sub}
<div class="toc-entry"><span class="toc-num">4</span><span class="toc-label">Attack Chain Timeline</span></div>
<div class="toc-entry"><span class="toc-num">5</span><span class="toc-label">Appendix</span></div>
</div>
<!-- ═══════════════ 1. EXECUTIVE SUMMARY ═══════════════ -->
<h2><span class="section-num">1.</span> Executive Summary</h2>
<div class="risk-gauge">
<div class="risk-gauge-meter">
<div class="risk-gauge-track">
<div class="risk-gauge-fill" style="width: {risk_score}%; background: {risk_color};"></div>
</div>
<div class="risk-gauge-score" style="color: {risk_color};">{risk_score} / 100</div>
</div>
<div class="risk-gauge-text">
<div class="risk-gauge-label" style="color: {risk_color};">Overall Risk: {overall_risk}</div>
<div class="risk-gauge-desc">
Based on {total_findings} finding{findings_plural} identified across the target application.
</div>
</div>
</div>
<div class="exec-grid">
<div class="kpi-card">
<div class="kpi-value">{total_findings}</div>
<div class="kpi-label">Total Findings</div>
</div>
<div class="kpi-card">
<div class="kpi-value" style="color: var(--sev-critical);">{critical_high}</div>
<div class="kpi-label">Critical / High</div>
</div>
<div class="kpi-card">
<div class="kpi-value" style="color: var(--sev-critical);">{exploitable_count}</div>
<div class="kpi-label">Exploitable</div>
</div>
<div class="kpi-card">
<div class="kpi-value">{tool_count}</div>
<div class="kpi-label">Tools Used</div>
</div>
</div>
<h3>Severity Distribution</h3>
{severity_bar}
<p>
This report presents the results of an automated penetration test conducted against
<strong>{target_name}</strong> (<code>{target_url}</code>) using the Compliance Scanner
AI-powered testing engine. A total of <strong>{total_findings} vulnerabilities</strong> were
identified, of which <strong>{exploitable_count}</strong> were confirmed exploitable with
working proof-of-concept payloads. The assessment employed <strong>{tool_count} security tools</strong>
across <strong>{tool_invocations} invocations</strong> ({success_rate:.0}% success rate).
</p>"##,
total_findings = total,
findings_plural = if total == 1 { "" } else { "s" },
critical_high = critical_high_str,
exploitable_count = exploitable,
target_name = escaped_target_name,
target_url = escaped_target_url,
)
}
fn build_severity_bar(
critical: usize,
high: usize,
medium: usize,
low: usize,
info: usize,
total: usize,
) -> String {
if total == 0 {
return String::new();
}
let crit_pct = (critical as f64 / total as f64 * 100.0) as usize;
let high_pct = (high as f64 / total as f64 * 100.0) as usize;
let med_pct = (medium as f64 / total as f64 * 100.0) as usize;
let low_pct = (low as f64 / total as f64 * 100.0) as usize;
let info_pct = 100_usize.saturating_sub(crit_pct + high_pct + med_pct + low_pct);
let mut bar = String::from(r#"<div class="sev-bar">"#);
if critical > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-critical" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(crit_pct, 4),
critical
));
}
if high > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-high" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(high_pct, 4),
high
));
}
if medium > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-medium" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(med_pct, 4),
medium
));
}
if low > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-low" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(low_pct, 4),
low
));
}
if info > 0 {
bar.push_str(&format!(
r#"<div class="sev-bar-seg sev-bar-info" style="width:{}%"><span>{}</span></div>"#,
std::cmp::max(info_pct, 4),
info
));
}
bar.push_str("</div>");
bar.push_str(r#"<div class="sev-bar-legend">"#);
if critical > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#991b1b"></i> Critical</span>"#);
}
if high > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#c2410c"></i> High</span>"#);
}
if medium > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#a16207"></i> Medium</span>"#);
}
if low > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#1d4ed8"></i> Low</span>"#);
}
if info > 0 {
bar.push_str(r#"<span><i class="sev-dot" style="background:#4b5563"></i> Info</span>"#);
}
bar.push_str("</div>");
bar
}

View File

@@ -1,522 +0,0 @@
use super::html_escape;
use compliance_core::models::dast::DastFinding;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::CodeContextHint;
use compliance_core::models::sbom::SbomEntry;
/// Render the findings section with code-level correlation.
///
/// For each DAST finding, if a linked SAST finding exists (via `linked_sast_finding_id`)
/// or if we can match the endpoint to a code entry point, we render a "Code-Level
/// Remediation" block showing the exact file, line, code snippet, and suggested fix.
pub(super) fn findings(
findings_list: &[DastFinding],
sast_findings: &[Finding],
code_context: &[CodeContextHint],
sbom_entries: &[SbomEntry],
) -> String {
if findings_list.is_empty() {
return r#"<!-- ═══════════════ 3. FINDINGS ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">3.</span> Findings</h2>
<p style="color: var(--text-muted);">No vulnerabilities were identified during this assessment.</p>"#.to_string();
}
let severity_order = ["critical", "high", "medium", "low", "info"];
let severity_labels = ["Critical", "High", "Medium", "Low", "Informational"];
let severity_colors = ["#991b1b", "#c2410c", "#a16207", "#1d4ed8", "#4b5563"];
// Build SAST lookup by ObjectId hex string
let sast_by_id: std::collections::HashMap<String, &Finding> = sast_findings
.iter()
.filter_map(|f| {
let id = f.id.as_ref()?.to_hex();
Some((id, f))
})
.collect();
let mut findings_html = String::new();
let mut finding_num = 0usize;
for (si, &sev_key) in severity_order.iter().enumerate() {
let sev_findings: Vec<&DastFinding> = findings_list
.iter()
.filter(|f| f.severity.to_string() == sev_key)
.collect();
if sev_findings.is_empty() {
continue;
}
findings_html.push_str(&format!(
r#"<h4 class="sev-group-title" style="border-color: {color}">{label} ({count})</h4>"#,
color = severity_colors[si],
label = severity_labels[si],
count = sev_findings.len(),
));
for f in sev_findings {
finding_num += 1;
let sev_color = severity_colors[si];
let exploitable_badge = if f.exploitable {
r#"<span class="badge badge-exploit">EXPLOITABLE</span>"#
} else {
""
};
let cwe_cell = f
.cwe
.as_deref()
.map(|c| format!("<tr><td>CWE</td><td>{}</td></tr>", html_escape(c)))
.unwrap_or_default();
let param_row = f
.parameter
.as_deref()
.map(|p| {
format!(
"<tr><td>Parameter</td><td><code>{}</code></td></tr>",
html_escape(p)
)
})
.unwrap_or_default();
let remediation = f
.remediation
.as_deref()
.unwrap_or("Refer to industry best practices for this vulnerability class.");
let evidence_html = build_evidence_html(f);
// ── Code-level correlation ──────────────────────────────
let code_correlation =
build_code_correlation(f, &sast_by_id, code_context, sbom_entries);
findings_html.push_str(&format!(
r#"
<div class="finding" style="border-left-color: {sev_color}">
<div class="finding-header">
<span class="finding-id">F-{num:03}</span>
<span class="finding-title">{title}</span>
{exploitable_badge}
</div>
<table class="finding-meta">
<tr><td>Type</td><td>{vuln_type}</td></tr>
<tr><td>Endpoint</td><td><code>{method} {endpoint}</code></td></tr>
{param_row}
{cwe_cell}
</table>
<div class="finding-desc">{description}</div>
{evidence_html}
{code_correlation}
<div class="remediation">
<div class="remediation-label">Recommendation</div>
{remediation}
</div>
</div>
"#,
num = finding_num,
title = html_escape(&f.title),
vuln_type = f.vuln_type,
method = f.method,
endpoint = html_escape(&f.endpoint),
description = html_escape(&f.description),
));
}
}
format!(
r##"<!-- ═══════════════ 3. FINDINGS ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">3.</span> Findings</h2>
{findings_html}"##
)
}
/// Build the evidence table HTML for a finding.
fn build_evidence_html(f: &DastFinding) -> String {
if f.evidence.is_empty() {
return String::new();
}
let mut eh = String::from(
r#"<div class="evidence-block"><div class="evidence-title">Evidence</div><table class="evidence-table"><thead><tr><th>Request</th><th>Status</th><th>Details</th></tr></thead><tbody>"#,
);
for ev in &f.evidence {
let payload_info = ev
.payload
.as_deref()
.map(|p| {
format!(
"<br><span class=\"evidence-payload\">Payload: <code>{}</code></span>",
html_escape(p)
)
})
.unwrap_or_default();
eh.push_str(&format!(
"<tr><td><code>{} {}</code></td><td>{}</td><td>{}{}</td></tr>",
html_escape(&ev.request_method),
html_escape(&ev.request_url),
ev.response_status,
ev.response_snippet
.as_deref()
.map(html_escape)
.unwrap_or_default(),
payload_info,
));
}
eh.push_str("</tbody></table></div>");
eh
}
/// Build the code-level correlation block for a DAST finding.
///
/// Attempts correlation in priority order:
/// 1. Direct link via `linked_sast_finding_id` → shows exact file, line, snippet, suggested fix
/// 2. Endpoint match via code context → shows handler function, file, known SAST vulns
/// 3. CWE/CVE match to SBOM → shows vulnerable dependency + version to upgrade
fn build_code_correlation(
dast_finding: &DastFinding,
sast_by_id: &std::collections::HashMap<String, &Finding>,
code_context: &[CodeContextHint],
sbom_entries: &[SbomEntry],
) -> String {
let mut sections: Vec<String> = Vec::new();
// 1. Direct SAST link
if let Some(ref sast_id) = dast_finding.linked_sast_finding_id {
if let Some(sast) = sast_by_id.get(sast_id) {
let mut s = String::new();
s.push_str(r#"<div class="code-correlation-item">"#);
s.push_str(r#"<div class="code-correlation-badge">SAST Correlation</div>"#);
s.push_str("<table class=\"code-meta\">");
if let Some(ref fp) = sast.file_path {
let line_info = sast
.line_number
.map(|l| format!(":{l}"))
.unwrap_or_default();
s.push_str(&format!(
"<tr><td>Location</td><td><code>{}{}</code></td></tr>",
html_escape(fp),
line_info,
));
}
s.push_str(&format!(
"<tr><td>Scanner</td><td>{} &mdash; {}</td></tr>",
html_escape(&sast.scanner),
html_escape(&sast.title),
));
if let Some(ref cwe) = sast.cwe {
s.push_str(&format!(
"<tr><td>CWE</td><td>{}</td></tr>",
html_escape(cwe)
));
}
if let Some(ref rule) = sast.rule_id {
s.push_str(&format!(
"<tr><td>Rule</td><td><code>{}</code></td></tr>",
html_escape(rule)
));
}
s.push_str("</table>");
// Code snippet
if let Some(ref snippet) = sast.code_snippet {
if !snippet.is_empty() {
s.push_str(&format!(
"<div class=\"code-snippet-block\"><div class=\"code-snippet-label\">Vulnerable Code</div><pre class=\"code-snippet\">{}</pre></div>",
html_escape(snippet)
));
}
}
// Suggested fix
if let Some(ref fix) = sast.suggested_fix {
if !fix.is_empty() {
s.push_str(&format!(
"<div class=\"code-fix-block\"><div class=\"code-fix-label\">Suggested Fix</div><pre class=\"code-fix\">{}</pre></div>",
html_escape(fix)
));
}
}
// Remediation from SAST
if let Some(ref rem) = sast.remediation {
if !rem.is_empty() {
s.push_str(&format!(
"<div class=\"code-remediation\">{}</div>",
html_escape(rem)
));
}
}
s.push_str("</div>");
sections.push(s);
}
}
// 2. Endpoint match via code context
let endpoint_lower = dast_finding.endpoint.to_lowercase();
let matching_hints: Vec<&CodeContextHint> = code_context
.iter()
.filter(|hint| {
// Match by endpoint pattern overlap
let pattern_lower = hint.endpoint_pattern.to_lowercase();
endpoint_lower.contains(&pattern_lower)
|| pattern_lower.contains(&endpoint_lower)
|| hint.file_path.to_lowercase().contains(
&endpoint_lower
.split('/')
.next_back()
.unwrap_or("")
.replace(".html", "")
.replace(".php", ""),
)
})
.collect();
for hint in &matching_hints {
let mut s = String::new();
s.push_str(r#"<div class="code-correlation-item">"#);
s.push_str(r#"<div class="code-correlation-badge">Code Entry Point</div>"#);
s.push_str("<table class=\"code-meta\">");
s.push_str(&format!(
"<tr><td>Handler</td><td><code>{}</code></td></tr>",
html_escape(&hint.handler_function),
));
s.push_str(&format!(
"<tr><td>File</td><td><code>{}</code></td></tr>",
html_escape(&hint.file_path),
));
s.push_str(&format!(
"<tr><td>Route</td><td><code>{}</code></td></tr>",
html_escape(&hint.endpoint_pattern),
));
s.push_str("</table>");
if !hint.known_vulnerabilities.is_empty() {
s.push_str("<div class=\"code-linked-vulns\"><strong>Known SAST issues in this file:</strong><ul>");
for vuln in &hint.known_vulnerabilities {
s.push_str(&format!("<li>{}</li>", html_escape(vuln)));
}
s.push_str("</ul></div>");
}
s.push_str("</div>");
sections.push(s);
}
// 3. SBOM match — if a linked SAST finding has a CVE, or we can match by CWE
let linked_cve = dast_finding
.linked_sast_finding_id
.as_deref()
.and_then(|id| sast_by_id.get(id))
.and_then(|f| f.cve.as_deref());
if let Some(cve_id) = linked_cve {
let matching_deps: Vec<&SbomEntry> = sbom_entries
.iter()
.filter(|e| e.known_vulnerabilities.iter().any(|v| v.id == cve_id))
.collect();
for dep in &matching_deps {
let mut s = String::new();
s.push_str(r#"<div class="code-correlation-item">"#);
s.push_str(r#"<div class="code-correlation-badge">Vulnerable Dependency</div>"#);
s.push_str("<table class=\"code-meta\">");
s.push_str(&format!(
"<tr><td>Package</td><td><code>{} {}</code> ({})</td></tr>",
html_escape(&dep.name),
html_escape(&dep.version),
html_escape(&dep.package_manager),
));
let cve_ids: Vec<&str> = dep
.known_vulnerabilities
.iter()
.map(|v| v.id.as_str())
.collect();
s.push_str(&format!(
"<tr><td>CVEs</td><td>{}</td></tr>",
cve_ids.join(", "),
));
if let Some(ref purl) = dep.purl {
s.push_str(&format!(
"<tr><td>PURL</td><td><code>{}</code></td></tr>",
html_escape(purl),
));
}
s.push_str("</table>");
s.push_str(&format!(
"<div class=\"code-remediation\">Upgrade <code>{}</code> to the latest patched version to resolve {}.</div>",
html_escape(&dep.name),
html_escape(cve_id),
));
s.push_str("</div>");
sections.push(s);
}
}
if sections.is_empty() {
return String::new();
}
format!(
r#"<div class="code-correlation">
<div class="code-correlation-title">Code-Level Remediation</div>
{}
</div>"#,
sections.join("\n")
)
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::dast::{DastEvidence, DastVulnType};
use compliance_core::models::finding::Severity;
use compliance_core::models::scan::ScanType;
/// Helper: create a minimal `DastFinding`.
fn make_dast(title: &str, severity: Severity, endpoint: &str) -> DastFinding {
DastFinding::new(
"run1".into(),
"target1".into(),
DastVulnType::Xss,
title.into(),
"desc".into(),
severity,
endpoint.into(),
"GET".into(),
)
}
/// Helper: create a minimal SAST `Finding` with an ObjectId.
fn make_sast(title: &str) -> Finding {
let mut f = Finding::new(
"repo1".into(),
"fp1".into(),
"semgrep".into(),
ScanType::Sast,
title.into(),
"sast desc".into(),
Severity::High,
);
f.id = Some(mongodb::bson::oid::ObjectId::new());
f
}
#[test]
fn test_findings_empty() {
let result = findings(&[], &[], &[], &[]);
assert!(
result.contains("No vulnerabilities were identified"),
"Empty findings should contain the no-vulns message"
);
}
#[test]
fn test_findings_grouped_by_severity() {
let f_high = make_dast("High vuln", Severity::High, "/a");
let f_low = make_dast("Low vuln", Severity::Low, "/b");
let f_critical = make_dast("Crit vuln", Severity::Critical, "/c");
let result = findings(&[f_high, f_low, f_critical], &[], &[], &[]);
// All severity group headers should appear
assert!(
result.contains("Critical (1)"),
"should have Critical header"
);
assert!(result.contains("High (1)"), "should have High header");
assert!(result.contains("Low (1)"), "should have Low header");
// Critical should appear before High, High before Low
let crit_pos = result.find("Critical (1)");
let high_pos = result.find("High (1)");
let low_pos = result.find("Low (1)");
assert!(crit_pos < high_pos, "Critical should come before High");
assert!(high_pos < low_pos, "High should come before Low");
}
#[test]
fn test_code_correlation_sast_link() {
let mut sast = make_sast("SQL Injection in query");
sast.file_path = Some("src/db/query.rs".into());
sast.line_number = Some(42);
sast.code_snippet =
Some("let q = format!(\"SELECT * FROM {} WHERE id={}\", table, id);".into());
let sast_id = sast.id.as_ref().map(|oid| oid.to_hex()).unwrap_or_default();
let mut dast = make_dast("SQLi on /api/users", Severity::High, "/api/users");
dast.linked_sast_finding_id = Some(sast_id);
let result = findings(&[dast], &[sast], &[], &[]);
assert!(
result.contains("SAST Correlation"),
"should render SAST Correlation badge"
);
assert!(
result.contains("src/db/query.rs"),
"should contain the file path"
);
assert!(result.contains(":42"), "should contain the line number");
assert!(
result.contains("Vulnerable Code"),
"should render code snippet block"
);
}
#[test]
fn test_code_correlation_no_match() {
let dast = make_dast("XSS in search", Severity::Medium, "/search");
// No linked_sast_finding_id, no code context, no sbom
let result = findings(&[dast], &[], &[], &[]);
assert!(
!result.contains("code-correlation"),
"should not contain any code-correlation div"
);
}
#[test]
fn test_evidence_html_empty() {
let f = make_dast("No evidence", Severity::Low, "/x");
let result = build_evidence_html(&f);
assert!(result.is_empty(), "no evidence should yield empty string");
}
#[test]
fn test_evidence_html_with_entries() {
let mut f = make_dast("Has evidence", Severity::High, "/y");
f.evidence.push(DastEvidence {
request_method: "POST".into(),
request_url: "https://example.com/login".into(),
request_headers: None,
request_body: None,
response_status: 200,
response_headers: None,
response_snippet: Some("OK".into()),
screenshot_path: None,
payload: Some("<script>alert(1)</script>".into()),
response_time_ms: None,
});
let result = build_evidence_html(&f);
assert!(
result.contains("evidence-table"),
"should render the evidence table"
);
assert!(result.contains("POST"), "should contain request method");
assert!(
result.contains("https://example.com/login"),
"should contain request URL"
);
assert!(result.contains("200"), "should contain response status");
assert!(
result.contains("&lt;script&gt;alert(1)&lt;/script&gt;"),
"payload should be HTML-escaped"
);
}
}

View File

@@ -1,518 +0,0 @@
mod appendix;
mod attack_chain;
mod cover;
mod executive_summary;
mod findings;
mod scope;
mod styles;
use super::ReportContext;
#[allow(clippy::format_in_format_args)]
pub(super) fn build_html_report(ctx: &ReportContext) -> String {
let session = &ctx.session;
let session_id = session
.id
.map(|oid| oid.to_hex())
.unwrap_or_else(|| "-".to_string());
let date_str = session
.started_at
.format("%B %d, %Y at %H:%M UTC")
.to_string();
let date_short = session.started_at.format("%B %d, %Y").to_string();
let completed_str = session
.completed_at
.map(|d| d.format("%B %d, %Y at %H:%M UTC").to_string())
.unwrap_or_else(|| "In Progress".to_string());
// Collect unique tool names used
let tool_names: Vec<String> = {
let mut names: Vec<String> = ctx
.attack_chain
.iter()
.map(|n| n.tool_name.clone())
.collect();
names.sort();
names.dedup();
names
};
// Find the best app screenshot for the cover page:
// prefer the first navigate to the target URL that has a screenshot,
// falling back to any navigate with a screenshot
let app_screenshot: Option<String> = ctx
.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.filter_map(|n| {
n.tool_output
.as_ref()?
.get("screenshot_base64")?
.as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
})
// Skip the Keycloak login page screenshots — prefer one that shows the actual app
.find(|_| {
ctx.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.any(|n| {
n.tool_output
.as_ref()
.and_then(|o| o.get("title"))
.and_then(|t| t.as_str())
.is_some_and(|t| t.contains("Compliance") || t.contains("Dashboard"))
})
})
.or_else(|| {
// Fallback: any screenshot
ctx.attack_chain
.iter()
.filter(|n| n.tool_name == "browser")
.filter_map(|n| {
n.tool_output
.as_ref()?
.get("screenshot_base64")?
.as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
})
.next()
});
let styles_html = styles::styles();
let cover_html = cover::cover(
&ctx.target_name,
&session_id,
&date_short,
&ctx.target_url,
&ctx.requester_name,
&ctx.requester_email,
app_screenshot.as_deref(),
);
let exec_html = executive_summary::executive_summary(
&ctx.findings,
&ctx.target_name,
&ctx.target_url,
tool_names.len(),
session.tool_invocations,
session.success_rate(),
);
let scope_html = scope::scope(
session,
&ctx.target_name,
&ctx.target_url,
&date_str,
&completed_str,
&tool_names,
ctx.config.as_ref(),
);
let findings_html = findings::findings(
&ctx.findings,
&ctx.sast_findings,
&ctx.code_context,
&ctx.sbom_entries,
);
let chain_html = attack_chain::attack_chain(&ctx.attack_chain);
let appendix_html = appendix::appendix(&session_id);
format!(
r#"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Penetration Test Report — {target_name}</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Libre+Baskerville:ital,wght@0,400;0,700;1,400&family=Source+Sans+3:ital,wght@0,300;0,400;0,600;0,700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
{styles_html}
</head>
<body>
{cover_html}
{exec_html}
{scope_html}
{findings_html}
{chain_html}
{appendix_html}
"#,
target_name = html_escape(&ctx.target_name),
)
}
fn tool_category(tool_name: &str) -> &'static str {
let name = tool_name.to_lowercase();
if name.contains("nmap") || name.contains("port") {
return "Network Reconnaissance";
}
if name.contains("nikto") || name.contains("header") {
return "Web Server Analysis";
}
if name.contains("zap") || name.contains("spider") || name.contains("crawl") {
return "Web Application Scanning";
}
if name.contains("sqlmap") || name.contains("sqli") || name.contains("sql") {
return "SQL Injection Testing";
}
if name.contains("xss") || name.contains("cross-site") {
return "Cross-Site Scripting Testing";
}
if name.contains("dir")
|| name.contains("brute")
|| name.contains("fuzz")
|| name.contains("gobuster")
{
return "Directory Enumeration";
}
if name.contains("ssl") || name.contains("tls") || name.contains("cert") {
return "SSL/TLS Analysis";
}
if name.contains("api") || name.contains("endpoint") {
return "API Security Testing";
}
if name.contains("auth") || name.contains("login") || name.contains("credential") {
return "Authentication Testing";
}
if name.contains("cors") {
return "CORS Testing";
}
if name.contains("csrf") {
return "CSRF Testing";
}
if name.contains("nuclei") || name.contains("template") {
return "Vulnerability Scanning";
}
if name.contains("whatweb") || name.contains("tech") || name.contains("wappalyzer") {
return "Technology Fingerprinting";
}
"Security Testing"
}
fn html_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::dast::{DastFinding, DastVulnType};
use compliance_core::models::finding::Severity;
use compliance_core::models::pentest::{
AttackChainNode, AttackNodeStatus, PentestSession, PentestStrategy,
};
// ── html_escape ──────────────────────────────────────────────────
#[test]
fn html_escape_handles_ampersand() {
assert_eq!(html_escape("a & b"), "a &amp; b");
}
#[test]
fn html_escape_handles_angle_brackets() {
assert_eq!(html_escape("<script>"), "&lt;script&gt;");
}
#[test]
fn html_escape_handles_quotes() {
assert_eq!(html_escape(r#"key="val""#), "key=&quot;val&quot;");
}
#[test]
fn html_escape_handles_all_special_chars() {
assert_eq!(
html_escape(r#"<a href="x">&y</a>"#),
"&lt;a href=&quot;x&quot;&gt;&amp;y&lt;/a&gt;"
);
}
#[test]
fn html_escape_no_change_for_plain_text() {
assert_eq!(html_escape("hello world"), "hello world");
}
#[test]
fn html_escape_empty_string() {
assert_eq!(html_escape(""), "");
}
// ── tool_category ────────────────────────────────────────────────
#[test]
fn tool_category_nmap() {
assert_eq!(tool_category("nmap_scan"), "Network Reconnaissance");
}
#[test]
fn tool_category_port_scanner() {
assert_eq!(tool_category("port_scanner"), "Network Reconnaissance");
}
#[test]
fn tool_category_nikto() {
assert_eq!(tool_category("nikto"), "Web Server Analysis");
}
#[test]
fn tool_category_header_check() {
assert_eq!(
tool_category("security_header_check"),
"Web Server Analysis"
);
}
#[test]
fn tool_category_zap_spider() {
assert_eq!(tool_category("zap_spider"), "Web Application Scanning");
}
#[test]
fn tool_category_sqlmap() {
assert_eq!(tool_category("sqlmap"), "SQL Injection Testing");
}
#[test]
fn tool_category_xss_scanner() {
assert_eq!(tool_category("xss_scanner"), "Cross-Site Scripting Testing");
}
#[test]
fn tool_category_dir_bruteforce() {
assert_eq!(tool_category("dir_bruteforce"), "Directory Enumeration");
}
#[test]
fn tool_category_gobuster() {
assert_eq!(tool_category("gobuster"), "Directory Enumeration");
}
#[test]
fn tool_category_ssl_check() {
assert_eq!(tool_category("ssl_check"), "SSL/TLS Analysis");
}
#[test]
fn tool_category_tls_scan() {
assert_eq!(tool_category("tls_scan"), "SSL/TLS Analysis");
}
#[test]
fn tool_category_api_test() {
assert_eq!(tool_category("api_endpoint_test"), "API Security Testing");
}
#[test]
fn tool_category_auth_bypass() {
assert_eq!(tool_category("auth_bypass_check"), "Authentication Testing");
}
#[test]
fn tool_category_cors() {
assert_eq!(tool_category("cors_check"), "CORS Testing");
}
#[test]
fn tool_category_csrf() {
assert_eq!(tool_category("csrf_scanner"), "CSRF Testing");
}
#[test]
fn tool_category_nuclei() {
assert_eq!(tool_category("nuclei"), "Vulnerability Scanning");
}
#[test]
fn tool_category_whatweb() {
assert_eq!(tool_category("whatweb"), "Technology Fingerprinting");
}
#[test]
fn tool_category_unknown_defaults_to_security_testing() {
assert_eq!(tool_category("custom_tool"), "Security Testing");
}
#[test]
fn tool_category_is_case_insensitive() {
assert_eq!(tool_category("NMAP_Scanner"), "Network Reconnaissance");
assert_eq!(tool_category("SQLMap"), "SQL Injection Testing");
}
// ── build_html_report ────────────────────────────────────────────
fn make_session(strategy: PentestStrategy) -> PentestSession {
let mut s = PentestSession::new("target-1".into(), strategy);
s.tool_invocations = 5;
s.tool_successes = 4;
s.findings_count = 2;
s.exploitable_count = 1;
s
}
fn make_finding(severity: Severity, title: &str, exploitable: bool) -> DastFinding {
let mut f = DastFinding::new(
"run-1".into(),
"target-1".into(),
DastVulnType::Xss,
title.into(),
"description".into(),
severity,
"https://example.com/test".into(),
"GET".into(),
);
f.exploitable = exploitable;
f
}
fn make_attack_node(tool_name: &str) -> AttackChainNode {
let mut node = AttackChainNode::new(
"session-1".into(),
"node-1".into(),
tool_name.into(),
serde_json::json!({}),
"Testing this tool".into(),
);
node.status = AttackNodeStatus::Completed;
node
}
fn make_report_context(
findings: Vec<DastFinding>,
chain: Vec<AttackChainNode>,
) -> ReportContext {
ReportContext {
session: make_session(PentestStrategy::Comprehensive),
target_name: "Test App".into(),
target_url: "https://example.com".into(),
findings,
attack_chain: chain,
requester_name: "Alice".into(),
requester_email: "alice@example.com".into(),
config: None,
sast_findings: Vec::new(),
sbom_entries: Vec::new(),
code_context: Vec::new(),
}
}
#[test]
fn report_contains_target_info() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("Test App"));
assert!(html.contains("https://example.com"));
}
#[test]
fn report_contains_requester_info() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("Alice"));
assert!(html.contains("alice@example.com"));
}
#[test]
fn report_shows_informational_risk_when_no_findings() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("INFORMATIONAL"));
}
#[test]
fn report_shows_critical_risk_with_critical_finding() {
let findings = vec![make_finding(Severity::Critical, "Critical XSS", true)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("CRITICAL"));
}
#[test]
fn report_shows_high_risk_without_critical() {
let findings = vec![make_finding(Severity::High, "High SQLi", false)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
// Should show HIGH, not CRITICAL
assert!(html.contains("HIGH"));
}
#[test]
fn report_shows_medium_risk_level() {
let findings = vec![make_finding(Severity::Medium, "Medium Issue", false)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("MEDIUM"));
}
#[test]
fn report_includes_finding_title() {
let findings = vec![make_finding(
Severity::High,
"Reflected XSS in /search",
true,
)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("Reflected XSS in /search"));
}
#[test]
fn report_shows_exploitable_badge() {
let findings = vec![make_finding(Severity::Critical, "SQLi", true)];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
// The report should mark exploitable findings
assert!(html.contains("EXPLOITABLE"));
}
#[test]
fn report_includes_attack_chain_tool_names() {
let chain = vec![make_attack_node("nmap_scan"), make_attack_node("sqlmap")];
let ctx = make_report_context(vec![], chain);
let html = build_html_report(&ctx);
assert!(html.contains("nmap_scan"));
assert!(html.contains("sqlmap"));
}
#[test]
fn report_is_valid_html_structure() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
assert!(html.contains("<!DOCTYPE html>") || html.contains("<html"));
assert!(html.contains("</html>"));
}
#[test]
fn report_strategy_appears() {
let ctx = make_report_context(vec![], vec![]);
let html = build_html_report(&ctx);
// PentestStrategy::Comprehensive => "comprehensive"
assert!(html.contains("comprehensive") || html.contains("Comprehensive"));
}
#[test]
fn report_finding_count_is_correct() {
let findings = vec![
make_finding(Severity::Critical, "F1", true),
make_finding(Severity::High, "F2", false),
make_finding(Severity::Low, "F3", false),
];
let ctx = make_report_context(findings, vec![]);
let html = build_html_report(&ctx);
// The total count "3" should appear somewhere
assert!(
html.contains(">3<")
|| html.contains(">3 ")
|| html.contains("3 findings")
|| html.contains("3 Total")
);
}
}

View File

@@ -1,127 +0,0 @@
use super::{html_escape, tool_category};
use compliance_core::models::pentest::{AuthMode, PentestConfig, PentestSession};
pub(super) fn scope(
session: &PentestSession,
target_name: &str,
target_url: &str,
date_str: &str,
completed_str: &str,
tool_names: &[String],
config: Option<&PentestConfig>,
) -> String {
let tools_table: String = tool_names
.iter()
.enumerate()
.map(|(i, t)| {
let category = tool_category(t);
format!(
"<tr><td>{}</td><td><code>{}</code></td><td>{}</td></tr>",
i + 1,
html_escape(t),
category,
)
})
.collect::<Vec<_>>()
.join("\n");
let engagement_config_section = if let Some(cfg) = config {
let mut rows = String::new();
rows.push_str(&format!(
"<tr><td>Environment</td><td>{}</td></tr>",
html_escape(&cfg.environment.to_string())
));
if let Some(ref app_type) = cfg.app_type {
rows.push_str(&format!(
"<tr><td>Application Type</td><td>{}</td></tr>",
html_escape(app_type)
));
}
let auth_mode = match cfg.auth.mode {
AuthMode::None => "No authentication",
AuthMode::Manual => "Manual credentials",
AuthMode::AutoRegister => "Auto-register",
};
rows.push_str(&format!("<tr><td>Auth Mode</td><td>{auth_mode}</td></tr>"));
if !cfg.scope_exclusions.is_empty() {
let excl = cfg
.scope_exclusions
.iter()
.map(|s| html_escape(s))
.collect::<Vec<_>>()
.join(", ");
rows.push_str(&format!(
"<tr><td>Scope Exclusions</td><td><code>{excl}</code></td></tr>"
));
}
if !cfg.tester.name.is_empty() {
rows.push_str(&format!(
"<tr><td>Tester</td><td>{} ({})</td></tr>",
html_escape(&cfg.tester.name),
html_escape(&cfg.tester.email)
));
}
if let Some(ref ts) = cfg.disclaimer_accepted_at {
rows.push_str(&format!(
"<tr><td>Disclaimer Accepted</td><td>{}</td></tr>",
ts.format("%B %d, %Y at %H:%M UTC")
));
}
if let Some(ref branch) = cfg.branch {
rows.push_str(&format!(
"<tr><td>Git Branch</td><td>{}</td></tr>",
html_escape(branch)
));
}
if let Some(ref commit) = cfg.commit_hash {
rows.push_str(&format!(
"<tr><td>Git Commit</td><td><code>{}</code></td></tr>",
html_escape(commit)
));
}
format!("<h3>Engagement Configuration</h3>\n<table class=\"info\">\n{rows}\n</table>")
} else {
String::new()
};
format!(
r##"
<!-- ═══════════════ 2. SCOPE & METHODOLOGY ═══════════════ -->
<div class="page-break"></div>
<h2><span class="section-num">2.</span> Scope &amp; Methodology</h2>
<p>
The assessment was performed using an AI-driven orchestrator that autonomously selects and
executes security testing tools based on the target's attack surface, technology stack, and
any available static analysis (SAST) findings and SBOM data.
</p>
<h3>Engagement Details</h3>
<table class="info">
<tr><td>Target</td><td><strong>{target_name}</strong></td></tr>
<tr><td>URL</td><td><code>{target_url}</code></td></tr>
<tr><td>Strategy</td><td>{strategy}</td></tr>
<tr><td>Status</td><td>{status}</td></tr>
<tr><td>Started</td><td>{date_str}</td></tr>
<tr><td>Completed</td><td>{completed_str}</td></tr>
<tr><td>Tool Invocations</td><td>{tool_invocations} ({tool_successes} successful, {success_rate:.1}% success rate)</td></tr>
</table>
{engagement_config_section}
<h3>Tools Employed</h3>
<table class="tools-table">
<thead><tr><th>#</th><th>Tool</th><th>Category</th></tr></thead>
<tbody>{tools_table}</tbody>
</table>"##,
target_name = html_escape(target_name),
target_url = html_escape(target_url),
strategy = session.strategy,
status = session.status,
date_str = date_str,
completed_str = completed_str,
tool_invocations = session.tool_invocations,
tool_successes = session.tool_successes,
success_rate = session.success_rate(),
)
}

View File

@@ -1,889 +0,0 @@
pub(super) fn styles() -> String {
r##"<style>
/* ──────────────── Base / Print-first ──────────────── */
@page {
size: A4;
margin: 20mm 18mm 25mm 18mm;
}
@page :first {
margin: 0;
}
*, *::before, *::after { margin: 0; padding: 0; box-sizing: border-box; }
:root {
--text: #1a1a2e;
--text-secondary: #475569;
--text-muted: #64748b;
--heading: #0d2137;
--accent: #1a56db;
--accent-light: #dbeafe;
--border: #d1d5db;
--border-light: #e5e7eb;
--bg-subtle: #f8fafc;
--bg-section: #f1f5f9;
--sev-critical: #991b1b;
--sev-high: #c2410c;
--sev-medium: #a16207;
--sev-low: #1d4ed8;
--sev-info: #4b5563;
--font-serif: 'Libre Baskerville', 'Georgia', serif;
--font-sans: 'Source Sans 3', 'Helvetica Neue', sans-serif;
--font-mono: 'JetBrains Mono', 'Consolas', monospace;
}
body {
font-family: var(--font-sans);
color: var(--text);
background: #fff;
line-height: 1.65;
font-size: 10.5pt;
-webkit-print-color-adjust: exact;
print-color-adjust: exact;
}
.report-body {
max-width: 190mm;
margin: 0 auto;
padding: 0 16px;
}
/* ──────────────── Cover Page ──────────────── */
.cover {
height: 100vh;
min-height: 297mm;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
text-align: center;
padding: 40mm 30mm;
page-break-after: always;
break-after: page;
position: relative;
background: #fff;
}
.cover-shield {
width: 72px;
height: 72px;
margin-bottom: 32px;
}
.cover-tag {
display: inline-block;
background: var(--sev-critical);
color: #fff;
font-family: var(--font-sans);
font-size: 8pt;
font-weight: 700;
letter-spacing: 0.15em;
text-transform: uppercase;
padding: 4px 16px;
border-radius: 2px;
margin-bottom: 28px;
}
.cover-title {
font-family: var(--font-serif);
font-size: 28pt;
font-weight: 700;
color: var(--heading);
line-height: 1.2;
margin-bottom: 8px;
}
.cover-subtitle {
font-family: var(--font-serif);
font-size: 14pt;
color: var(--text-secondary);
font-weight: 400;
font-style: italic;
margin-bottom: 48px;
}
.cover-meta {
font-size: 10pt;
color: var(--text-secondary);
line-height: 2;
}
.cover-meta strong {
color: var(--text);
}
.cover-divider {
width: 60px;
height: 2px;
background: var(--accent);
margin: 24px auto;
}
.cover-footer {
position: absolute;
bottom: 30mm;
left: 0;
right: 0;
text-align: center;
font-size: 8pt;
color: var(--text-muted);
letter-spacing: 0.05em;
}
/* ──────────────── Typography ──────────────── */
h2 {
font-family: var(--font-serif);
font-size: 16pt;
font-weight: 700;
color: var(--heading);
margin: 36px 0 16px;
padding-bottom: 8px;
border-bottom: 2px solid var(--heading);
page-break-after: avoid;
break-after: avoid;
}
h3 {
font-family: var(--font-serif);
font-size: 12pt;
font-weight: 700;
color: var(--heading);
margin: 24px 0 10px;
page-break-after: avoid;
break-after: avoid;
}
h4 {
font-family: var(--font-sans);
font-size: 10pt;
font-weight: 700;
color: var(--text-secondary);
margin: 16px 0 8px;
}
p {
margin: 8px 0;
font-size: 10.5pt;
}
code {
font-family: var(--font-mono);
font-size: 9pt;
background: var(--bg-section);
padding: 1px 5px;
border-radius: 3px;
border: 1px solid var(--border-light);
word-break: break-all;
}
/* ──────────────── Section Numbers ──────────────── */
.section-num {
color: var(--accent);
margin-right: 8px;
}
/* ──────────────── Table of Contents ──────────────── */
.toc {
page-break-after: always;
break-after: page;
padding-top: 24px;
}
.toc h2 {
border-bottom-color: var(--accent);
margin-top: 0;
}
.toc-entry {
display: flex;
justify-content: space-between;
align-items: baseline;
padding: 6px 0;
border-bottom: 1px dotted var(--border);
font-size: 11pt;
}
.toc-entry .toc-num {
font-weight: 700;
color: var(--accent);
min-width: 24px;
margin-right: 10px;
}
.toc-entry .toc-label {
flex: 1;
font-weight: 600;
color: var(--heading);
}
.toc-sub {
padding: 3px 0 3px 34px;
font-size: 9.5pt;
color: var(--text-secondary);
}
/* ──────────────── Executive Summary ──────────────── */
.exec-grid {
display: grid;
grid-template-columns: 1fr 1fr 1fr 1fr;
gap: 12px;
margin: 16px 0 20px;
}
.kpi-card {
border: 1px solid var(--border);
border-radius: 6px;
padding: 14px 12px;
text-align: center;
background: var(--bg-subtle);
}
.kpi-value {
font-family: var(--font-serif);
font-size: 22pt;
font-weight: 700;
line-height: 1.1;
}
.kpi-label {
font-size: 8pt;
text-transform: uppercase;
letter-spacing: 0.08em;
color: var(--text-muted);
margin-top: 4px;
font-weight: 600;
}
/* Risk gauge */
.risk-gauge {
display: flex;
align-items: center;
gap: 16px;
padding: 16px 20px;
border: 1px solid var(--border);
border-radius: 6px;
background: var(--bg-subtle);
margin: 16px 0;
}
.risk-gauge-meter {
width: 140px;
flex-shrink: 0;
}
.risk-gauge-track {
height: 10px;
background: var(--border-light);
border-radius: 5px;
overflow: hidden;
position: relative;
}
.risk-gauge-fill {
height: 100%;
border-radius: 5px;
transition: width 0.3s;
}
.risk-gauge-score {
font-family: var(--font-serif);
font-size: 9pt;
font-weight: 700;
text-align: center;
margin-top: 3px;
}
.risk-gauge-text {
flex: 1;
}
.risk-gauge-label {
font-family: var(--font-serif);
font-size: 14pt;
font-weight: 700;
}
.risk-gauge-desc {
font-size: 9.5pt;
color: var(--text-secondary);
margin-top: 2px;
}
/* Severity bar */
.sev-bar {
display: flex;
height: 28px;
border-radius: 4px;
overflow: hidden;
margin: 12px 0 6px;
border: 1px solid var(--border);
}
.sev-bar-seg {
display: flex;
align-items: center;
justify-content: center;
color: #fff;
font-size: 8.5pt;
font-weight: 700;
min-width: 24px;
}
.sev-bar-critical { background: var(--sev-critical); }
.sev-bar-high { background: var(--sev-high); }
.sev-bar-medium { background: var(--sev-medium); }
.sev-bar-low { background: var(--sev-low); }
.sev-bar-info { background: var(--sev-info); }
.sev-bar-legend {
display: flex;
gap: 16px;
font-size: 8.5pt;
color: var(--text-secondary);
margin-bottom: 16px;
}
.sev-dot {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 2px;
margin-right: 4px;
vertical-align: middle;
}
/* ──────────────── Info Tables ──────────────── */
table.info {
width: 100%;
border-collapse: collapse;
margin: 10px 0;
font-size: 10pt;
}
table.info td,
table.info th {
padding: 7px 12px;
border: 1px solid var(--border);
text-align: left;
vertical-align: top;
}
table.info td:first-child,
table.info th:first-child {
width: 160px;
font-weight: 600;
color: var(--text-secondary);
background: var(--bg-subtle);
}
/* Methodology tools table */
table.tools-table {
width: 100%;
border-collapse: collapse;
margin: 10px 0;
font-size: 10pt;
}
table.tools-table th {
background: var(--heading);
color: #fff;
padding: 8px 12px;
text-align: left;
font-weight: 600;
font-size: 9pt;
text-transform: uppercase;
letter-spacing: 0.04em;
}
table.tools-table td {
padding: 6px 12px;
border-bottom: 1px solid var(--border-light);
}
table.tools-table tr:nth-child(even) td {
background: var(--bg-subtle);
}
table.tools-table td:first-child {
width: 32px;
text-align: center;
color: var(--text-muted);
font-weight: 600;
}
/* ──────────────── Badges ──────────────── */
.badge {
display: inline-block;
padding: 2px 8px;
border-radius: 3px;
font-size: 7.5pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.03em;
vertical-align: middle;
}
.badge-exploit {
background: var(--sev-critical);
color: #fff;
}
/* ──────────────── Findings ──────────────── */
.sev-group-title {
font-family: var(--font-sans);
font-size: 11pt;
font-weight: 700;
color: var(--heading);
padding: 8px 0 6px 12px;
margin: 20px 0 8px;
border-left: 4px solid;
page-break-after: avoid;
break-after: avoid;
}
.finding {
border: 1px solid var(--border);
border-left: 4px solid;
border-radius: 0 4px 4px 0;
padding: 14px 16px;
margin-bottom: 12px;
background: #fff;
page-break-inside: avoid;
break-inside: avoid;
}
.finding-header {
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 8px;
}
.finding-id {
font-family: var(--font-mono);
font-size: 9pt;
font-weight: 500;
color: var(--text-muted);
background: var(--bg-section);
padding: 2px 6px;
border-radius: 3px;
border: 1px solid var(--border-light);
}
.finding-title {
font-family: var(--font-serif);
font-weight: 700;
font-size: 11pt;
flex: 1;
color: var(--heading);
}
.finding-meta {
border-collapse: collapse;
margin: 6px 0;
font-size: 9.5pt;
width: 100%;
}
.finding-meta td {
padding: 3px 10px 3px 0;
vertical-align: top;
}
.finding-meta td:first-child {
color: var(--text-muted);
font-weight: 600;
width: 90px;
white-space: nowrap;
}
.finding-desc {
margin: 8px 0;
font-size: 10pt;
color: var(--text);
line-height: 1.6;
}
.remediation {
margin-top: 10px;
padding: 10px 14px;
background: var(--accent-light);
border-left: 3px solid var(--accent);
border-radius: 0 4px 4px 0;
font-size: 9.5pt;
line-height: 1.55;
}
.remediation-label {
font-weight: 700;
font-size: 8.5pt;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--accent);
margin-bottom: 3px;
}
.evidence-block {
margin: 10px 0;
page-break-inside: avoid;
break-inside: avoid;
}
.evidence-title {
font-weight: 700;
font-size: 8.5pt;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--text-muted);
margin-bottom: 4px;
}
.evidence-table {
width: 100%;
border-collapse: collapse;
font-size: 9pt;
}
.evidence-table th {
background: var(--bg-section);
padding: 5px 8px;
text-align: left;
font-weight: 600;
font-size: 8.5pt;
text-transform: uppercase;
letter-spacing: 0.03em;
color: var(--text-secondary);
border: 1px solid var(--border-light);
}
.evidence-table td {
padding: 5px 8px;
border: 1px solid var(--border-light);
vertical-align: top;
word-break: break-word;
}
.evidence-payload {
font-size: 8.5pt;
color: var(--sev-critical);
}
.linked-sast {
font-size: 9pt;
color: var(--text-muted);
margin: 6px 0;
font-style: italic;
}
/* ──────────────── Code-Level Correlation ──────────────── */
.code-correlation {
margin: 12px 0;
border: 1px solid #e2e8f0;
border-radius: 6px;
overflow: hidden;
}
.code-correlation-title {
background: #1e293b;
color: #f8fafc;
padding: 6px 12px;
font-size: 9pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.code-correlation-item {
padding: 10px 12px;
border-bottom: 1px solid #e2e8f0;
}
.code-correlation-item:last-child { border-bottom: none; }
.code-correlation-badge {
display: inline-block;
background: #3b82f6;
color: #fff;
font-size: 7pt;
font-weight: 600;
padding: 2px 8px;
border-radius: 3px;
text-transform: uppercase;
letter-spacing: 0.04em;
margin-bottom: 6px;
}
.code-meta {
width: 100%;
font-size: 8.5pt;
border-collapse: collapse;
margin-bottom: 6px;
}
.code-meta td:first-child {
width: 80px;
font-weight: 600;
color: var(--text-muted);
padding: 2px 8px 2px 0;
vertical-align: top;
}
.code-meta td:last-child {
padding: 2px 0;
}
.code-snippet-block, .code-fix-block {
margin: 6px 0;
}
.code-snippet-label, .code-fix-label {
font-size: 7.5pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.04em;
color: var(--text-muted);
margin-bottom: 3px;
}
.code-snippet-label { color: #dc2626; }
.code-fix-label { color: #16a34a; }
.code-snippet {
background: #fef2f2;
border: 1px solid #fecaca;
border-left: 3px solid #dc2626;
padding: 8px 10px;
font-family: 'JetBrains Mono', monospace;
font-size: 8pt;
line-height: 1.5;
white-space: pre-wrap;
word-break: break-word;
border-radius: 0 4px 4px 0;
margin: 0;
}
.code-fix {
background: #f0fdf4;
border: 1px solid #bbf7d0;
border-left: 3px solid #16a34a;
padding: 8px 10px;
font-family: 'JetBrains Mono', monospace;
font-size: 8pt;
line-height: 1.5;
white-space: pre-wrap;
word-break: break-word;
border-radius: 0 4px 4px 0;
margin: 0;
}
.code-remediation {
font-size: 8.5pt;
color: var(--text-secondary);
margin-top: 4px;
padding: 4px 0;
}
.code-linked-vulns {
font-size: 8.5pt;
margin-top: 4px;
}
.code-linked-vulns ul {
margin: 2px 0 0 16px;
padding: 0;
}
.code-linked-vulns li {
margin-bottom: 2px;
}
/* ──────────────── Attack Chain ──────────────── */
.phase-block {
margin-bottom: 20px;
page-break-inside: avoid;
break-inside: avoid;
}
.phase-header {
display: flex;
align-items: center;
gap: 10px;
padding: 8px 14px;
background: var(--heading);
color: #fff;
border-radius: 4px 4px 0 0;
font-size: 9.5pt;
}
.phase-num {
font-weight: 700;
font-size: 8pt;
text-transform: uppercase;
letter-spacing: 0.1em;
background: rgba(255,255,255,0.15);
padding: 2px 8px;
border-radius: 3px;
}
.phase-label {
font-weight: 600;
flex: 1;
}
.phase-count {
font-size: 8.5pt;
opacity: 0.7;
}
.phase-steps {
border: 1px solid var(--border);
border-top: none;
border-radius: 0 0 4px 4px;
}
.step-row {
display: flex;
align-items: flex-start;
gap: 10px;
padding: 8px 14px;
border-bottom: 1px solid var(--border-light);
position: relative;
}
.step-row:last-child {
border-bottom: none;
}
.step-num {
width: 22px;
height: 22px;
border-radius: 50%;
background: var(--bg-section);
border: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: center;
font-size: 8pt;
font-weight: 700;
color: var(--text-secondary);
flex-shrink: 0;
margin-top: 1px;
}
.step-connector {
display: none;
}
.step-content {
flex: 1;
min-width: 0;
}
.step-header {
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
}
.step-tool {
font-family: var(--font-mono);
font-size: 9.5pt;
font-weight: 500;
color: var(--heading);
}
.step-status {
font-size: 7.5pt;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.04em;
padding: 1px 7px;
border-radius: 3px;
}
.step-completed { background: #dcfce7; color: #166534; }
.step-failed { background: #fef2f2; color: #991b1b; }
.step-running { background: #fef9c3; color: #854d0e; }
.step-findings {
font-size: 8pt;
font-weight: 600;
color: var(--sev-high);
background: #fff7ed;
padding: 1px 7px;
border-radius: 3px;
border: 1px solid #fed7aa;
}
.step-risk {
font-size: 7.5pt;
font-weight: 700;
padding: 1px 6px;
border-radius: 3px;
}
.risk-high { background: #fef2f2; color: var(--sev-critical); border: 1px solid #fecaca; }
.risk-med { background: #fffbeb; color: var(--sev-medium); border: 1px solid #fde68a; }
.risk-low { background: #f0fdf4; color: #166534; border: 1px solid #bbf7d0; }
.step-reasoning {
font-size: 9pt;
color: var(--text-muted);
margin-top: 3px;
line-height: 1.5;
font-style: italic;
}
/* ──────────────── Footer ──────────────── */
.report-footer {
margin-top: 48px;
padding-top: 14px;
border-top: 2px solid var(--heading);
font-size: 8pt;
color: var(--text-muted);
text-align: center;
line-height: 1.8;
}
.report-footer .footer-company {
font-weight: 700;
color: var(--text-secondary);
}
/* ──────────────── Page Break Utilities ──────────────── */
.page-break {
page-break-before: always;
break-before: page;
}
.avoid-break {
page-break-inside: avoid;
break-inside: avoid;
}
/* ──────────────── Print Overrides ──────────────── */
@media print {
body {
font-size: 10pt;
}
.cover {
height: auto;
min-height: 250mm;
padding: 50mm 20mm;
}
.report-body {
padding: 0;
}
.no-print {
display: none !important;
}
a {
color: var(--accent);
text-decoration: none;
}
}
/* ──────────────── Screen Enhancements ──────────────── */
@media screen {
body {
background: #e2e8f0;
}
.cover {
background: #fff;
box-shadow: 0 1px 4px rgba(0,0,0,0.08);
}
.report-body {
background: #fff;
padding: 20px 32px 40px;
box-shadow: 0 1px 4px rgba(0,0,0,0.08);
margin-bottom: 40px;
}
}
</style>"##
.to_string()
}

View File

@@ -1,69 +0,0 @@
mod archive;
mod html;
mod pdf;
use compliance_core::models::dast::DastFinding;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::{
AttackChainNode, CodeContextHint, PentestConfig, PentestSession,
};
use compliance_core::models::sbom::SbomEntry;
use sha2::{Digest, Sha256};
/// Report archive with metadata
pub struct ReportArchive {
/// The password-protected ZIP bytes
pub archive: Vec<u8>,
/// SHA-256 hex digest of the archive
pub sha256: String,
}
/// Report context gathered from the database
pub struct ReportContext {
pub session: PentestSession,
pub target_name: String,
pub target_url: String,
pub findings: Vec<DastFinding>,
pub attack_chain: Vec<AttackChainNode>,
pub requester_name: String,
pub requester_email: String,
pub config: Option<PentestConfig>,
/// SAST findings for the linked repository (for code-level correlation)
pub sast_findings: Vec<Finding>,
/// Vulnerable dependencies from SBOM
pub sbom_entries: Vec<SbomEntry>,
/// Code knowledge graph entry points linked to SAST findings
pub code_context: Vec<CodeContextHint>,
}
/// Generate a password-protected ZIP archive containing the pentest report.
///
/// The archive contains:
/// - `report.pdf` — Professional pentest report (PDF)
/// - `report.html` — HTML source (fallback)
/// - `findings.json` — Raw findings data
/// - `attack-chain.json` — Attack chain timeline
///
/// Files are encrypted with AES-256 inside the ZIP (standard WinZip AES format,
/// supported by 7-Zip, WinRAR, macOS Archive Utility, etc.).
pub async fn generate_encrypted_report(
ctx: &ReportContext,
password: &str,
) -> Result<ReportArchive, String> {
let html = html::build_html_report(ctx);
// Convert HTML to PDF via headless Chrome
let pdf_bytes = pdf::html_to_pdf(&html).await?;
let zip_bytes = archive::build_zip(ctx, password, &html, &pdf_bytes)
.map_err(|e| format!("Failed to create archive: {e}"))?;
let mut hasher = Sha256::new();
hasher.update(&zip_bytes);
let sha256 = hex::encode(hasher.finalize());
Ok(ReportArchive {
archive: zip_bytes,
sha256,
})
}

View File

@@ -1,289 +0,0 @@
use futures_util::SinkExt;
use tokio_tungstenite::tungstenite::Message;
type WsStream =
tokio_tungstenite::WebSocketStream<tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>>;
/// Convert HTML string to PDF bytes.
///
/// If `CHROME_WS_URL` is set (e.g. `ws://host:3000`), connects to a remote
/// headless Chrome via the Chrome DevTools Protocol over WebSocket.
/// Otherwise falls back to a local Chrome/Chromium binary.
pub(super) async fn html_to_pdf(html: &str) -> Result<Vec<u8>, String> {
if let Ok(ws_url) = std::env::var("CHROME_WS_URL") {
tracing::info!(url = %ws_url, "Generating PDF via remote Chrome (CDP)");
cdp_print_to_pdf(&ws_url, html).await
} else {
tracing::info!("Generating PDF via local Chrome binary");
local_chrome_pdf(html).await
}
}
/// Send a CDP command (no session) and return the response.
async fn cdp_send(
ws: &mut WsStream,
id: u64,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, String> {
let msg = serde_json::json!({ "id": id, "method": method, "params": params });
ws.send(Message::Text(msg.to_string().into()))
.await
.map_err(|e| format!("WS send failed: {e}"))?;
read_until_result(ws, id).await
}
/// Send a CDP command on a session and return the response.
async fn cdp_send_session(
ws: &mut WsStream,
id: u64,
session_id: &str,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value, String> {
let msg = serde_json::json!({
"id": id,
"sessionId": session_id,
"method": method,
"params": params,
});
ws.send(Message::Text(msg.to_string().into()))
.await
.map_err(|e| format!("WS send failed: {e}"))?;
read_until_result(ws, id).await
}
/// Generate PDF by connecting to a remote Chrome instance over CDP WebSocket.
async fn cdp_print_to_pdf(base_ws_url: &str, html: &str) -> Result<Vec<u8>, String> {
use base64::Engine;
// Step 1: Discover browser WS endpoint via /json/version
let http_url = base_ws_url
.replace("ws://", "http://")
.replace("wss://", "https://");
let version_url = format!("{http_url}/json/version");
let version: serde_json::Value = reqwest::get(&version_url)
.await
.map_err(|e| format!("Failed to reach Chrome at {version_url}: {e}"))?
.json()
.await
.map_err(|e| format!("Invalid /json/version response: {e}"))?;
let browser_ws = version["webSocketDebuggerUrl"]
.as_str()
.ok_or_else(|| "No webSocketDebuggerUrl in /json/version".to_string())?;
// Step 2: Connect to browser WS endpoint
let (mut ws, _) = tokio_tungstenite::connect_async(browser_ws)
.await
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
let mut id: u64 = 1;
// Step 3: Create a new target (tab)
let resp = cdp_send(
&mut ws,
id,
"Target.createTarget",
serde_json::json!({ "url": "about:blank" }),
)
.await?;
id += 1;
let target_id = resp
.get("result")
.and_then(|r| r.get("targetId"))
.and_then(|t| t.as_str())
.ok_or("No targetId in createTarget response")?
.to_string();
// Step 4: Attach to target
let resp = cdp_send(
&mut ws,
id,
"Target.attachToTarget",
serde_json::json!({ "targetId": target_id, "flatten": true }),
)
.await?;
id += 1;
let session_id = resp
.get("result")
.and_then(|r| r.get("sessionId"))
.and_then(|s| s.as_str())
.ok_or("No sessionId in attachToTarget response")?
.to_string();
// Step 5: Enable Page domain
cdp_send_session(
&mut ws,
id,
&session_id,
"Page.enable",
serde_json::json!({}),
)
.await?;
id += 1;
// Step 6: Set page content with the HTML
cdp_send_session(
&mut ws,
id,
&session_id,
"Page.setDocumentContent",
serde_json::json!({ "frameId": target_id, "html": html }),
)
.await?;
id += 1;
// Brief pause for rendering
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
// Step 7: Print to PDF
let pdf_response = cdp_send_session(
&mut ws,
id,
&session_id,
"Page.printToPDF",
serde_json::json!({
"printBackground": true,
"preferCSSPageSize": true,
"displayHeaderFooter": false,
}),
)
.await?;
id += 1;
let pdf_b64 = pdf_response
.get("result")
.and_then(|r| r.get("data"))
.and_then(|d| d.as_str())
.ok_or("No PDF data in printToPDF response")?;
let pdf_bytes = base64::engine::general_purpose::STANDARD
.decode(pdf_b64)
.map_err(|e| format!("Failed to decode PDF base64: {e}"))?;
// Step 8: Close the target
let _ = cdp_send(
&mut ws,
id,
"Target.closeTarget",
serde_json::json!({ "targetId": target_id }),
)
.await;
let _ = ws.close(None).await;
if pdf_bytes.is_empty() {
return Err("Chrome produced an empty PDF".to_string());
}
tracing::info!(
size_kb = pdf_bytes.len() / 1024,
"PDF report generated via CDP"
);
Ok(pdf_bytes)
}
/// Read WebSocket messages until we get a response matching the given id.
async fn read_until_result(ws: &mut WsStream, id: u64) -> Result<serde_json::Value, String> {
use futures_util::StreamExt;
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
loop {
let msg = tokio::time::timeout_at(deadline, ws.next())
.await
.map_err(|_| format!("Timeout waiting for CDP response id={id}"))?
.ok_or_else(|| "WebSocket closed unexpectedly".to_string())?
.map_err(|e| format!("WebSocket read error: {e}"))?;
if let Message::Text(text) = msg {
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
if val.get("id").and_then(|i| i.as_u64()) == Some(id) {
if let Some(err) = val.get("error") {
return Err(format!("CDP error: {err}"));
}
return Ok(val);
}
}
}
}
}
/// Fallback: generate PDF using a local Chrome/Chromium binary.
async fn local_chrome_pdf(html: &str) -> Result<Vec<u8>, String> {
let tmp_dir = std::env::temp_dir();
let run_id = uuid::Uuid::new_v4().to_string();
let html_path = tmp_dir.join(format!("pentest-report-{run_id}.html"));
let pdf_path = tmp_dir.join(format!("pentest-report-{run_id}.pdf"));
std::fs::write(&html_path, html).map_err(|e| format!("Failed to write temp HTML: {e}"))?;
let chrome_bin = find_chrome_binary().ok_or_else(|| {
"Chrome/Chromium not found. Set CHROME_WS_URL for remote Chrome or install chromium locally."
.to_string()
})?;
tracing::info!(chrome = %chrome_bin, "Generating PDF report via headless Chrome");
let html_url = format!("file://{}", html_path.display());
let output = tokio::process::Command::new(&chrome_bin)
.args([
"--headless",
"--disable-gpu",
"--no-sandbox",
"--disable-software-rasterizer",
"--run-all-compositor-stages-before-draw",
"--disable-dev-shm-usage",
&format!("--print-to-pdf={}", pdf_path.display()),
"--no-pdf-header-footer",
&html_url,
])
.output()
.await
.map_err(|e| format!("Failed to run Chrome: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let _ = std::fs::remove_file(&html_path);
let _ = std::fs::remove_file(&pdf_path);
return Err(format!("Chrome PDF generation failed: {stderr}"));
}
let pdf_bytes =
std::fs::read(&pdf_path).map_err(|e| format!("Failed to read generated PDF: {e}"))?;
let _ = std::fs::remove_file(&html_path);
let _ = std::fs::remove_file(&pdf_path);
if pdf_bytes.is_empty() {
return Err("Chrome produced an empty PDF".to_string());
}
tracing::info!(size_kb = pdf_bytes.len() / 1024, "PDF report generated");
Ok(pdf_bytes)
}
/// Search for Chrome/Chromium binary on the system.
fn find_chrome_binary() -> Option<String> {
let candidates = [
"google-chrome-stable",
"google-chrome",
"chromium-browser",
"chromium",
];
for name in &candidates {
if let Ok(output) = std::process::Command::new("which").arg(name).output() {
if output.status.success() {
let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
if !path.is_empty() {
return Some(path);
}
}
}
}
None
}

View File

@@ -1,236 +0,0 @@
use std::path::Path;
use std::sync::Arc;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::ScanOutput;
use crate::llm::review_prompts::REVIEW_PASSES;
use crate::llm::LlmClient;
use crate::pipeline::dedup;
use crate::pipeline::git::{DiffFile, GitOps};
pub struct CodeReviewScanner {
llm: Arc<LlmClient>,
}
impl CodeReviewScanner {
pub fn new(llm: Arc<LlmClient>) -> Self {
Self { llm }
}
/// Run multi-pass LLM code review on the diff between old and new commits.
pub async fn review_diff(
&self,
repo_path: &Path,
repo_id: &str,
old_sha: &str,
new_sha: &str,
) -> ScanOutput {
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
Ok(files) => files,
Err(e) => {
tracing::warn!("Failed to extract diff for code review: {e}");
return ScanOutput::default();
}
};
if diff_files.is_empty() {
return ScanOutput::default();
}
let mut all_findings = Vec::new();
// Chunk diff files into groups to avoid exceeding context limits
let chunks = chunk_diff_files(&diff_files, 8000);
for (pass_name, system_prompt) in REVIEW_PASSES {
for chunk in &chunks {
let user_prompt = format!(
"Review the following code changes:\n\n{}",
chunk
.iter()
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
.collect::<Vec<_>>()
.join("\n\n")
);
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
Ok(response) => {
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
all_findings.extend(parsed);
}
Err(e) => {
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
}
}
}
}
let deduped = dedup_cross_pass(all_findings);
ScanOutput {
findings: deduped,
sbom_entries: Vec::new(),
}
}
}
/// Group diff files into chunks that fit within a token budget (rough char estimate)
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
let mut current_chunk: Vec<&DiffFile> = Vec::new();
let mut current_size = 0;
for file in files {
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
chunks.push(std::mem::take(&mut current_chunk));
current_size = 0;
}
current_chunk.push(file);
current_size += file.hunks.len();
}
if !current_chunk.is_empty() {
chunks.push(current_chunk);
}
chunks
}
fn parse_review_response(
response: &str,
pass_name: &str,
repo_id: &str,
chunk: &[&DiffFile],
) -> Vec<Finding> {
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
} else {
cleaned
};
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
Ok(v) => v,
Err(_) => {
if cleaned != "[]" {
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
}
return Vec::new();
}
};
issues
.into_iter()
.filter(|issue| {
// Verify the file exists in the diff chunk
chunk.iter().any(|f| f.path == issue.file)
})
.map(|issue| {
let severity = match issue.severity.as_str() {
"critical" => Severity::Critical,
"high" => Severity::High,
"medium" => Severity::Medium,
"low" => Severity::Low,
_ => Severity::Info,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"code-review",
pass_name,
&issue.file,
&issue.line.to_string(),
&issue.title,
]);
let description = if let Some(suggestion) = &issue.suggestion {
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
} else {
issue.description.clone()
};
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
format!("code-review/{pass_name}"),
ScanType::CodeReview,
issue.title,
description,
severity,
);
finding.rule_id = Some(format!("review/{pass_name}"));
finding.file_path = Some(issue.file);
finding.line_number = Some(issue.line);
finding.cwe = issue.cwe;
finding.suggested_fix = issue.suggestion;
finding
})
.collect()
}
#[derive(serde::Deserialize)]
struct ReviewIssue {
title: String,
description: String,
severity: String,
file: String,
#[serde(default)]
line: u32,
#[serde(default)]
cwe: Option<String>,
#[serde(default)]
suggestion: Option<String>,
}
/// Deduplicate findings across review passes.
///
/// Multiple passes often flag the same issue (e.g. SQL injection reported by
/// logic, security, and convention passes). We group by file + nearby line +
/// normalized title keywords and keep the highest-severity finding.
fn dedup_cross_pass(findings: Vec<Finding>) -> Vec<Finding> {
use std::collections::HashMap;
// Build a dedup key: (file, line bucket, normalized title words)
fn dedup_key(f: &Finding) -> String {
let file = f.file_path.as_deref().unwrap_or("");
// Group lines within 3 of each other
let line_bucket = f.line_number.unwrap_or(0) / 4;
// Normalize: lowercase, keep only alphanumeric, sort words for order-independence
let title_lower = f.title.to_lowercase();
let mut words: Vec<&str> = title_lower
.split(|c: char| !c.is_alphanumeric())
.filter(|w| w.len() > 2)
.collect();
words.sort();
format!("{file}:{line_bucket}:{}", words.join(","))
}
let mut groups: HashMap<String, Finding> = HashMap::new();
for finding in findings {
let key = dedup_key(&finding);
groups
.entry(key)
.and_modify(|existing| {
// Keep the higher severity; on tie, keep the one with more detail
if finding.severity > existing.severity
|| (finding.severity == existing.severity
&& finding.description.len() > existing.description.len())
{
*existing = finding.clone();
}
// Merge CWE if the existing one is missing it
if existing.cwe.is_none() {
existing.cwe = finding.cwe.clone();
}
})
.or_insert(finding);
}
groups.into_values().collect()
}

View File

@@ -21,13 +21,11 @@ impl CveScanner {
}
}
#[tracing::instrument(skip_all)]
pub async fn scan_dependencies(
&self,
repo_id: &str,
entries: &mut [SbomEntry],
) -> Result<Vec<CveAlert>, CoreError> {
tracing::info!("scanning {} SBOM entries for known CVEs", entries.len());
let mut alerts = Vec::new();
// Batch query OSV.dev
@@ -66,8 +64,6 @@ impl CveScanner {
}
async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
const OSV_BATCH_SIZE: usize = 500;
let queries: Vec<_> = entries
.iter()
.filter_map(|e| {
@@ -83,37 +79,32 @@ impl CveScanner {
return Ok(Vec::new());
}
let mut all_vulns: Vec<Vec<OsvVuln>> = Vec::with_capacity(queries.len());
let body = serde_json::json!({ "queries": queries });
for chunk in queries.chunks(OSV_BATCH_SIZE) {
let body = serde_json::json!({ "queries": chunk });
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| {
tracing::warn!("OSV.dev API call failed: {e}");
CoreError::Http(format!("OSV.dev request failed: {e}"))
})?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
return Ok(Vec::new());
}
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
// Push empty results for this chunk so indices stay aligned
all_vulns.extend(std::iter::repeat_with(Vec::new).take(chunk.len()));
continue;
}
let result: OsvBatchResponse = resp
.json()
.await
.map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
let result: OsvBatchResponse = resp.json().await.map_err(|e| {
tracing::warn!("failed to parse OSV.dev response: {e}");
CoreError::Http(format!("Failed to parse OSV.dev response: {e}"))
})?;
let chunk_vulns = result.results.into_iter().map(|r| {
let vulns = result
.results
.into_iter()
.map(|r| {
r.vulns
.unwrap_or_default()
.into_iter()
@@ -125,12 +116,10 @@ impl CveScanner {
}),
})
.collect()
});
})
.collect();
all_vulns.extend(chunk_vulns);
}
Ok(all_vulns)
Ok(vulns)
}
async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {

View File

@@ -1,7 +1,5 @@
use sha2::{Digest, Sha256};
use compliance_core::models::dast::DastFinding;
pub fn compute_fingerprint(parts: &[&str]) -> String {
let mut hasher = Sha256::new();
for part in parts {
@@ -10,406 +8,3 @@ pub fn compute_fingerprint(parts: &[&str]) -> String {
}
hex::encode(hasher.finalize())
}
/// Compute a dedup fingerprint for a DAST finding.
///
/// The key is derived from the *canonicalized* title (lowercased, domain names
/// stripped, known synonyms resolved), endpoint, and HTTP method. This lets us
/// detect both exact duplicates (same tool reporting twice across passes) and
/// semantic duplicates (e.g., `security_header_missing` "Missing HSTS header"
/// vs `tls_misconfiguration` "Missing strict-transport-security header").
pub fn compute_dast_fingerprint(f: &DastFinding) -> String {
let canon = canonicalize_dast_title(&f.title);
let endpoint = f.endpoint.to_lowercase().trim_end_matches('/').to_string();
let method = f.method.to_uppercase();
let param = f.parameter.as_deref().unwrap_or("");
compute_fingerprint(&[&canon, &endpoint, &method, param])
}
/// Canonicalize a DAST finding title for dedup purposes.
///
/// 1. Lowercase
/// 2. Strip domain names / URLs (e.g. "for comp-dev.meghsakha.com")
/// 3. Resolve known header synonyms (hsts ↔ strict-transport-security, etc.)
/// 4. Strip extra whitespace
fn canonicalize_dast_title(title: &str) -> String {
let mut s = title.to_lowercase();
// Strip "for <domain>" or "on <domain>" suffixes
// Pattern: "for <word.word...>" or "on <method> <url>"
if let Some(idx) = s.find(" for ") {
// Check if what follows looks like a domain or URL
let rest = &s[idx + 5..];
if rest.contains('.') || rest.starts_with("http") {
s.truncate(idx);
}
}
if let Some(idx) = s.find(" on ") {
let rest = &s[idx + 4..];
if rest.contains("http") || rest.contains('/') {
s.truncate(idx);
}
}
// Resolve known header synonyms
let synonyms: &[(&str, &str)] = &[
("hsts", "strict-transport-security"),
("csp", "content-security-policy"),
("cors", "cross-origin-resource-sharing"),
("xfo", "x-frame-options"),
];
for &(short, canonical) in synonyms {
// Only replace whole words — check boundaries
if let Some(pos) = s.find(short) {
let before_ok = pos == 0 || !s.as_bytes()[pos - 1].is_ascii_alphanumeric();
let after_ok = pos + short.len() >= s.len()
|| !s.as_bytes()[pos + short.len()].is_ascii_alphanumeric();
if before_ok && after_ok {
s = format!("{}{}{}", &s[..pos], canonical, &s[pos + short.len()..]);
}
}
}
// Collapse whitespace
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
/// Deduplicate a list of DAST findings, merging evidence from duplicates.
///
/// Two-phase approach:
/// 1. **Exact dedup** — group by canonicalized `(title, endpoint, method, parameter)`.
/// Merge evidence arrays, keep the highest severity, preserve exploitable flag.
/// 2. **CWE-based dedup** — within the same `(cwe, endpoint, method)` group, merge
/// findings whose canonicalized titles resolve to the same subject (e.g., HSTS
/// reported as both `security_header_missing` and `tls_misconfiguration`).
pub fn dedup_dast_findings(findings: Vec<DastFinding>) -> Vec<DastFinding> {
use std::collections::HashMap;
if findings.len() <= 1 {
return findings;
}
// Phase 1: exact fingerprint dedup
let mut seen: HashMap<String, usize> = HashMap::new();
let mut deduped: Vec<DastFinding> = Vec::new();
for finding in findings {
let fp = compute_dast_fingerprint(&finding);
if let Some(&idx) = seen.get(&fp) {
// Merge into existing
merge_dast_finding(&mut deduped[idx], &finding);
} else {
seen.insert(fp, deduped.len());
deduped.push(finding);
}
}
let before = deduped.len();
// Phase 2: CWE-based related dedup
// Group by (cwe, endpoint_normalized, method) — only when CWE is present
let mut cwe_groups: HashMap<String, Vec<usize>> = HashMap::new();
for (i, f) in deduped.iter().enumerate() {
if let Some(ref cwe) = f.cwe {
let key = format!(
"{}|{}|{}",
cwe,
f.endpoint.to_lowercase().trim_end_matches('/'),
f.method.to_uppercase(),
);
cwe_groups.entry(key).or_default().push(i);
}
}
// For each CWE group with multiple findings, keep the one with highest severity
// and most evidence, merge the rest into it
let mut merge_map: HashMap<usize, Vec<usize>> = HashMap::new();
let mut remove_indices: Vec<usize> = Vec::new();
for indices in cwe_groups.values() {
if indices.len() <= 1 {
continue;
}
// Find the "primary" finding: highest severity, then most evidence, then longest description
let Some(&primary_idx) = indices.iter().max_by(|&&a, &&b| {
deduped[a]
.severity
.cmp(&deduped[b].severity)
.then_with(|| deduped[a].evidence.len().cmp(&deduped[b].evidence.len()))
.then_with(|| {
deduped[a]
.description
.len()
.cmp(&deduped[b].description.len())
})
}) else {
continue;
};
for &idx in indices {
if idx != primary_idx {
remove_indices.push(idx);
merge_map.entry(primary_idx).or_default().push(idx);
}
}
}
if !remove_indices.is_empty() {
remove_indices.sort_unstable();
remove_indices.dedup();
// Merge evidence
for (&primary, secondaries) in &merge_map {
let extra_evidence: Vec<_> = secondaries
.iter()
.flat_map(|&i| deduped[i].evidence.clone())
.collect();
let any_exploitable = secondaries.iter().any(|&i| deduped[i].exploitable);
deduped[primary].evidence.extend(extra_evidence);
if any_exploitable {
deduped[primary].exploitable = true;
}
}
// Remove merged findings (iterate in reverse to preserve indices)
for &idx in remove_indices.iter().rev() {
deduped.remove(idx);
}
}
let after = deduped.len();
if before != after {
tracing::debug!(
"DAST CWE-based dedup: {before} → {after} findings ({} merged)",
before - after
);
}
deduped
}
/// Merge a duplicate DAST finding into a primary one.
fn merge_dast_finding(primary: &mut DastFinding, duplicate: &DastFinding) {
primary.evidence.extend(duplicate.evidence.clone());
if duplicate.severity > primary.severity {
primary.severity = duplicate.severity.clone();
}
if duplicate.exploitable {
primary.exploitable = true;
}
// Keep the longer/better description
if duplicate.description.len() > primary.description.len() {
primary.description.clone_from(&duplicate.description);
}
// Keep remediation if primary doesn't have one
if primary.remediation.is_none() && duplicate.remediation.is_some() {
primary.remediation.clone_from(&duplicate.remediation);
}
}
#[cfg(test)]
mod tests {
use super::*;
use compliance_core::models::dast::DastVulnType;
use compliance_core::models::finding::Severity;
#[test]
fn fingerprint_is_deterministic() {
let a = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
let b = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
assert_eq!(a, b);
}
#[test]
fn fingerprint_changes_with_different_input() {
let a = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
let b = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "43"]);
assert_ne!(a, b);
}
#[test]
fn fingerprint_is_valid_hex_sha256() {
let fp = compute_fingerprint(&["hello"]);
assert_eq!(fp.len(), 64, "SHA-256 hex should be 64 chars");
assert!(fp.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn fingerprint_empty_parts() {
let fp = compute_fingerprint(&[]);
// Should still produce a valid hash (of empty input)
assert_eq!(fp.len(), 64);
}
#[test]
fn fingerprint_order_matters() {
let a = compute_fingerprint(&["a", "b"]);
let b = compute_fingerprint(&["b", "a"]);
assert_ne!(a, b);
}
#[test]
fn fingerprint_separator_prevents_collision() {
// "ab" + "c" vs "a" + "bc" should differ because of the "|" separator
let a = compute_fingerprint(&["ab", "c"]);
let b = compute_fingerprint(&["a", "bc"]);
assert_ne!(a, b);
}
fn make_dast(title: &str, endpoint: &str, vuln_type: DastVulnType) -> DastFinding {
let mut f = DastFinding::new(
"run1".into(),
"target1".into(),
vuln_type,
title.into(),
format!("Description for {title}"),
Severity::Medium,
endpoint.into(),
"GET".into(),
);
f.cwe = Some("CWE-319".into());
f
}
#[test]
fn canonicalize_strips_domain_suffix() {
let canon = canonicalize_dast_title("Missing HSTS header for comp-dev.meghsakha.com");
assert!(!canon.contains("meghsakha"), "domain should be stripped");
assert!(
canon.contains("strict-transport-security"),
"hsts should be resolved: {canon}"
);
}
#[test]
fn canonicalize_resolves_synonyms() {
let a = canonicalize_dast_title("Missing HSTS header");
let b = canonicalize_dast_title("Missing strict-transport-security header");
assert_eq!(a, b);
}
#[test]
fn exact_dedup_merges_identical_findings() {
let f1 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
let f2 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
let result = dedup_dast_findings(vec![f1, f2]);
assert_eq!(result.len(), 1, "exact duplicates should be merged");
}
#[test]
fn synonym_dedup_merges_hsts_variants() {
let f1 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
let f2 = make_dast(
"Missing HSTS header for example.com",
"https://example.com",
DastVulnType::TlsMisconfiguration,
);
let result = dedup_dast_findings(vec![f1, f2]);
assert_eq!(
result.len(),
1,
"HSTS synonym variants should merge to 1 finding"
);
}
#[test]
fn different_headers_not_merged() {
let mut f1 = make_dast(
"Missing x-content-type-options header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
f1.cwe = Some("CWE-16".into());
let mut f2 = make_dast(
"Missing permissions-policy header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
f2.cwe = Some("CWE-16".into());
// These share CWE-16 but are different headers — phase 2 will merge them
// since they share the same CWE+endpoint. This is acceptable because they
// have the same root cause (missing security headers configuration).
let result = dedup_dast_findings(vec![f1, f2]);
// CWE-based dedup will merge these into 1
assert!(
result.len() <= 2,
"same CWE+endpoint findings may be merged"
);
}
#[test]
fn different_endpoints_not_merged() {
let f1 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
let f2 = make_dast(
"Missing strict-transport-security header",
"https://other.com",
DastVulnType::SecurityHeaderMissing,
);
let result = dedup_dast_findings(vec![f1, f2]);
assert_eq!(result.len(), 2, "different endpoints should not merge");
}
#[test]
fn dedup_preserves_highest_severity() {
let f1 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
let mut f2 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
f2.severity = Severity::High;
let result = dedup_dast_findings(vec![f1, f2]);
assert_eq!(result.len(), 1);
assert_eq!(result[0].severity, Severity::High);
}
#[test]
fn dedup_merges_evidence() {
let mut f1 = make_dast(
"Missing strict-transport-security header",
"https://example.com",
DastVulnType::SecurityHeaderMissing,
);
f1.evidence
.push(compliance_core::models::dast::DastEvidence {
request_method: "GET".into(),
request_url: "https://example.com".into(),
request_headers: None,
request_body: None,
response_status: 200,
response_headers: None,
response_snippet: Some("pass 1".into()),
screenshot_path: None,
payload: None,
response_time_ms: None,
});
let mut f2 = f1.clone();
f2.evidence[0].response_snippet = Some("pass 2".into());
let result = dedup_dast_findings(vec![f1, f2]);
assert_eq!(result.len(), 1);
assert_eq!(result[0].evidence.len(), 2, "evidence should be merged");
}
}

View File

@@ -1,113 +1,38 @@
use std::path::{Path, PathBuf};
use git2::{Cred, FetchOptions, RemoteCallbacks, Repository};
use git2::{FetchOptions, Repository};
use crate::error::AgentError;
/// Credentials for accessing a private repository
#[derive(Debug, Clone, Default)]
pub struct RepoCredentials {
/// Path to the SSH private key (for SSH URLs)
pub ssh_key_path: Option<String>,
/// Auth token / password (for HTTPS URLs)
pub auth_token: Option<String>,
/// Username for HTTPS auth (defaults to "x-access-token")
pub auth_username: Option<String>,
}
impl RepoCredentials {
pub(crate) fn make_callbacks(&self) -> RemoteCallbacks<'_> {
let mut callbacks = RemoteCallbacks::new();
let ssh_key = self.ssh_key_path.clone();
let token = self.auth_token.clone();
let username = self.auth_username.clone();
callbacks.credentials(move |_url, username_from_url, allowed_types| {
// SSH key authentication
if allowed_types.contains(git2::CredentialType::SSH_KEY) {
if let Some(ref key_path) = ssh_key {
let key = Path::new(key_path);
if key.exists() {
let user = username_from_url.unwrap_or("git");
return Cred::ssh_key(user, None, key, None);
}
}
}
// HTTPS userpass authentication
if allowed_types.contains(git2::CredentialType::USER_PASS_PLAINTEXT) {
if let Some(ref tok) = token {
let user = username.as_deref().unwrap_or("x-access-token");
return Cred::userpass_plaintext(user, tok);
}
}
Cred::default()
});
callbacks
}
fn fetch_options(&self) -> FetchOptions<'_> {
let mut fetch_opts = FetchOptions::new();
if self.has_credentials() {
fetch_opts.remote_callbacks(self.make_callbacks());
}
fetch_opts
}
fn has_credentials(&self) -> bool {
self.ssh_key_path
.as_ref()
.map(|p| Path::new(p).exists())
.unwrap_or(false)
|| self.auth_token.is_some()
}
}
pub struct GitOps {
base_path: PathBuf,
credentials: RepoCredentials,
}
impl GitOps {
pub fn new(base_path: &str, credentials: RepoCredentials) -> Self {
pub fn new(base_path: &str) -> Self {
Self {
base_path: PathBuf::from(base_path),
credentials,
}
}
#[tracing::instrument(skip_all, fields(repo_name = %repo_name))]
pub fn clone_or_fetch(&self, git_url: &str, repo_name: &str) -> Result<PathBuf, AgentError> {
let repo_path = self.base_path.join(repo_name);
if repo_path.exists() {
tracing::info!("fetching updates for existing repo");
self.fetch(&repo_path)?;
} else {
std::fs::create_dir_all(&repo_path)?;
self.clone_repo(git_url, &repo_path)?;
Repository::clone(git_url, &repo_path)?;
tracing::info!("Cloned {git_url} to {}", repo_path.display());
}
Ok(repo_path)
}
#[tracing::instrument(skip_all)]
fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> {
tracing::info!("cloning repo from {}", git_url);
let mut builder = git2::build::RepoBuilder::new();
let fetch_opts = self.credentials.fetch_options();
builder.fetch_options(fetch_opts);
builder.clone(git_url, repo_path)?;
Ok(())
}
fn fetch(&self, repo_path: &Path) -> Result<(), AgentError> {
let repo = Repository::open(repo_path)?;
let mut remote = repo.find_remote("origin")?;
let mut fetch_opts = self.credentials.fetch_options();
let mut fetch_opts = FetchOptions::new();
remote.fetch(&[] as &[&str], Some(&mut fetch_opts), None)?;
// Fast-forward to origin/HEAD
@@ -123,27 +48,6 @@ impl GitOps {
Ok(())
}
/// Test that we can access a remote repository (used during add validation)
pub fn test_access(git_url: &str, credentials: &RepoCredentials) -> Result<(), AgentError> {
let mut remote = git2::Remote::create_detached(git_url)?;
let callbacks = credentials.make_callbacks();
remote.connect_auth(git2::Direction::Fetch, Some(callbacks), None)?;
remote.disconnect()?;
Ok(())
}
/// Build credentials from agent config + per-repo overrides
pub fn make_repo_credentials(
config: &compliance_core::AgentConfig,
repo: &compliance_core::models::TrackedRepository,
) -> RepoCredentials {
RepoCredentials {
ssh_key_path: Some(config.ssh_key_path.clone()),
auth_token: repo.auth_token.clone(),
auth_username: repo.auth_username.clone(),
}
}
pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
let repo = Repository::open(repo_path)?;
let head = repo.head()?;
@@ -159,62 +63,6 @@ impl GitOps {
}
}
/// Extract structured diff content between two commits
pub fn get_diff_content(
repo_path: &Path,
old_sha: &str,
new_sha: &str,
) -> Result<Vec<DiffFile>, AgentError> {
let repo = Repository::open(repo_path)?;
let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
let old_tree = old_commit.tree()?;
let new_tree = new_commit.tree()?;
let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
let mut diff_files: Vec<DiffFile> = Vec::new();
diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
let file_path = delta
.new_file()
.path()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// Find or create the DiffFile entry
let idx = if let Some(pos) = diff_files.iter().position(|f| f.path == file_path) {
pos
} else {
diff_files.push(DiffFile {
path: file_path,
hunks: String::new(),
});
diff_files.len() - 1
};
let diff_file = &mut diff_files[idx];
let prefix = match line.origin() {
'+' => "+",
'-' => "-",
' ' => " ",
_ => "",
};
let content = std::str::from_utf8(line.content()).unwrap_or("");
diff_file.hunks.push_str(prefix);
diff_file.hunks.push_str(content);
true
})?;
// Filter out binary files and very large diffs
diff_files.retain(|f| !f.hunks.is_empty() && f.hunks.len() < 50_000);
Ok(diff_files)
}
#[allow(dead_code)]
pub fn get_changed_files(
repo_path: &Path,
@@ -246,10 +94,3 @@ impl GitOps {
Ok(files)
}
}
/// A file changed between two commits with its diff content
#[derive(Debug, Clone)]
pub struct DiffFile {
pub path: String,
pub hunks: String,
}

View File

@@ -1,238 +0,0 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use crate::pipeline::dedup;
pub struct GitleaksScanner;
impl Scanner for GitleaksScanner {
fn name(&self) -> &str {
"gitleaks"
}
fn scan_type(&self) -> ScanType {
ScanType::SecretDetection
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let output = tokio::process::Command::new("gitleaks")
.args([
"detect",
"--source",
".",
"--report-format",
"json",
"--report-path",
"/dev/stdout",
"--no-banner",
"--exit-code",
"0",
])
.current_dir(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "gitleaks".to_string(),
source: Box::new(e),
})?;
if output.stdout.is_empty() {
return Ok(ScanOutput::default());
}
let results: Vec<GitleaksResult> =
serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.filter(|r| !is_allowlisted(&r.file))
.map(|r| {
let severity = match r.rule_id.as_str() {
s if s.contains("private-key") => Severity::Critical,
s if s.contains("token") || s.contains("password") || s.contains("secret") => {
Severity::High
}
s if s.contains("api-key") => Severity::High,
_ => Severity::Medium,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
&r.rule_id,
&r.file,
&r.start_line.to_string(),
]);
let title = format!("Secret detected: {}", r.description);
let description = format!(
"Potential secret ({}) found in {}:{}. Match: {}",
r.rule_id,
r.file,
r.start_line,
r.r#match.chars().take(80).collect::<String>(),
);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"gitleaks".to_string(),
ScanType::SecretDetection,
title,
description,
severity,
);
finding.rule_id = Some(r.rule_id);
finding.file_path = Some(r.file);
finding.line_number = Some(r.start_line);
finding.code_snippet = Some(r.r#match);
finding
})
.collect();
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
/// Skip files that commonly contain example/placeholder secrets
fn is_allowlisted(file_path: &str) -> bool {
let lower = file_path.to_lowercase();
lower.ends_with(".env.example")
|| lower.ends_with(".env.sample")
|| lower.ends_with(".env.template")
|| lower.contains("/test/")
|| lower.contains("/tests/")
|| lower.contains("/fixtures/")
|| lower.contains("/testdata/")
|| lower.contains("mock")
|| lower.ends_with("_test.go")
|| lower.ends_with(".test.ts")
|| lower.ends_with(".test.js")
|| lower.ends_with(".spec.ts")
|| lower.ends_with(".spec.js")
}
#[derive(serde::Deserialize)]
#[serde(rename_all = "PascalCase")]
struct GitleaksResult {
description: String,
#[serde(rename = "RuleID")]
rule_id: String,
file: String,
start_line: u32,
#[serde(rename = "Match")]
r#match: String,
}
#[cfg(test)]
mod tests {
use super::*;
// --- is_allowlisted tests ---
#[test]
fn allowlisted_env_example_files() {
assert!(is_allowlisted(".env.example"));
assert!(is_allowlisted("config/.env.sample"));
assert!(is_allowlisted("deploy/.ENV.TEMPLATE"));
}
#[test]
fn allowlisted_test_directories() {
assert!(is_allowlisted("src/test/config.json"));
assert!(is_allowlisted("src/tests/fixtures.rs"));
assert!(is_allowlisted("data/fixtures/secret.txt"));
assert!(is_allowlisted("pkg/testdata/key.pem"));
}
#[test]
fn allowlisted_mock_files() {
assert!(is_allowlisted("src/mock_service.py"));
assert!(is_allowlisted("lib/MockAuth.java"));
}
#[test]
fn allowlisted_test_suffixes() {
assert!(is_allowlisted("auth_test.go"));
assert!(is_allowlisted("auth.test.ts"));
assert!(is_allowlisted("auth.test.js"));
assert!(is_allowlisted("auth.spec.ts"));
assert!(is_allowlisted("auth.spec.js"));
}
#[test]
fn not_allowlisted_regular_files() {
assert!(!is_allowlisted("src/main.rs"));
assert!(!is_allowlisted("config/.env"));
assert!(!is_allowlisted("lib/auth.ts"));
assert!(!is_allowlisted("deploy/secrets.yaml"));
}
#[test]
fn not_allowlisted_partial_matches() {
// "test" as substring in a non-directory context should not match
assert!(!is_allowlisted("src/attestation.rs"));
assert!(!is_allowlisted("src/contest/data.json"));
}
// --- GitleaksResult deserialization tests ---
#[test]
fn deserialize_gitleaks_result() {
let json = r#"{
"Description": "AWS Access Key",
"RuleID": "aws-access-key",
"File": "src/config.rs",
"StartLine": 10,
"Match": "AKIAIOSFODNN7EXAMPLE"
}"#;
let result: GitleaksResult = serde_json::from_str(json).unwrap();
assert_eq!(result.description, "AWS Access Key");
assert_eq!(result.rule_id, "aws-access-key");
assert_eq!(result.file, "src/config.rs");
assert_eq!(result.start_line, 10);
assert_eq!(result.r#match, "AKIAIOSFODNN7EXAMPLE");
}
#[test]
fn deserialize_gitleaks_result_array() {
let json = r#"[
{
"Description": "Generic Secret",
"RuleID": "generic-secret",
"File": "app.py",
"StartLine": 5,
"Match": "password=hunter2"
}
]"#;
let results: Vec<GitleaksResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].rule_id, "generic-secret");
}
#[test]
fn severity_mapping_private_key() {
// Verify the severity logic from the scan method
let rule_id = "some-private-key-rule";
assert!(rule_id.contains("private-key"));
}
#[test]
fn severity_mapping_token_password_secret() {
for keyword in &["token", "password", "secret"] {
let rule_id = format!("some-{}-rule", keyword);
assert!(
rule_id.contains("token")
|| rule_id.contains("password")
|| rule_id.contains("secret"),
"Expected '{rule_id}' to match token/password/secret"
);
}
}
}

View File

@@ -1,106 +0,0 @@
use compliance_core::models::Finding;
use super::orchestrator::{GraphContext, PipelineOrchestrator};
use crate::error::AgentError;
impl PipelineOrchestrator {
/// Build the code knowledge graph for a repo and compute impact analyses
pub(super) async fn build_code_graph(
&self,
repo_path: &std::path::Path,
repo_id: &str,
findings: &[Finding],
) -> Result<GraphContext, AgentError> {
let graph_build_id = uuid::Uuid::new_v4().to_string();
let engine = compliance_graph::GraphEngine::new(50_000);
let (mut code_graph, build_run) =
engine
.build_graph(repo_path, repo_id, &graph_build_id)
.map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
// Apply community detection
compliance_graph::graph::community::apply_communities(&mut code_graph);
// Store graph in MongoDB
let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
store
.delete_repo_graph(repo_id)
.await
.map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
store
.store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
.await
.map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
// Compute impact analysis for each finding
let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
let mut impacts = Vec::new();
for finding in findings {
if let Some(file_path) = &finding.file_path {
let impact = analyzer.analyze(
repo_id,
&finding.fingerprint,
&graph_build_id,
file_path,
finding.line_number,
);
store
.store_impact(&impact)
.await
.map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
impacts.push(impact);
}
}
Ok(GraphContext {
node_count: build_run.node_count,
edge_count: build_run.edge_count,
community_count: build_run.community_count,
impacts,
})
}
/// Trigger DAST scan if a target is configured for this repo
pub(super) async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
use futures_util::TryStreamExt;
let filter = mongodb::bson::doc! { "repo_id": repo_id };
let targets: Vec<compliance_core::models::DastTarget> =
match self.db.dast_targets().find(filter).await {
Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
Err(_) => return,
};
if targets.is_empty() {
tracing::info!("[{repo_id}] No DAST targets configured, skipping");
return;
}
for target in targets {
let db = self.db.clone();
let scan_run_id = scan_run_id.to_string();
tokio::spawn(async move {
let orchestrator = compliance_dast::DastOrchestrator::new(100);
match orchestrator.run_scan(&target, Vec::new()).await {
Ok((mut scan_run, findings)) => {
scan_run.sast_scan_run_id = Some(scan_run_id);
if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
tracing::error!("Failed to store DAST scan run: {e}");
}
for finding in &findings {
if let Err(e) = db.dast_findings().insert_one(finding).await {
tracing::error!("Failed to store DAST finding: {e}");
}
}
tracing::info!("DAST scan complete: {} findings", findings.len());
}
Err(e) => {
tracing::error!("DAST scan failed: {e}");
}
}
});
}
}
}

View File

@@ -1,259 +0,0 @@
use mongodb::bson::doc;
use compliance_core::models::*;
use super::orchestrator::{extract_base_url, PipelineOrchestrator};
use super::tracker_dispatch::TrackerDispatch;
use crate::error::AgentError;
use crate::trackers;
impl PipelineOrchestrator {
/// Build an issue tracker client from a repository's tracker configuration.
/// Returns `None` if the repo has no tracker configured.
pub(super) fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
let tracker_type = repo.tracker_type.as_ref()?;
// Per-repo token takes precedence, fall back to global config
match tracker_type {
TrackerType::GitHub => {
let token = repo.tracker_token.clone().or_else(|| {
self.config.github_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
match trackers::github::GitHubTracker::new(&secret) {
Ok(t) => Some(TrackerDispatch::GitHub(t)),
Err(e) => {
tracing::warn!("Failed to build GitHub tracker: {e}");
None
}
}
}
TrackerType::GitLab => {
let base_url = self
.config
.gitlab_url
.clone()
.unwrap_or_else(|| "https://gitlab.com".to_string());
let token = repo.tracker_token.clone().or_else(|| {
self.config.gitlab_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::GitLab(
trackers::gitlab::GitLabTracker::new(base_url, secret),
))
}
TrackerType::Gitea => {
let token = repo.tracker_token.clone()?;
let base_url = extract_base_url(&repo.git_url)?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
base_url, secret,
)))
}
TrackerType::Jira => {
let base_url = self.config.jira_url.clone()?;
let email = self.config.jira_email.clone()?;
let project_key = self.config.jira_project_key.clone()?;
let token = repo.tracker_token.clone().or_else(|| {
self.config.jira_api_token.as_ref().map(|t| {
use secrecy::ExposeSecret;
t.expose_secret().to_string()
})
})?;
let secret = secrecy::SecretString::from(token);
Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
base_url,
email,
secret,
project_key,
)))
}
}
}
/// Create tracker issues for new findings (severity >= Medium).
/// Checks for duplicates via fingerprint search before creating.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub(super) async fn create_tracker_issues(
&self,
repo: &TrackedRepository,
repo_id: &str,
new_findings: &[Finding],
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::info!("[{repo_id}] No issue tracker configured, skipping");
return Ok(());
}
};
let owner = match repo.tracker_owner.as_deref() {
Some(o) => o,
None => {
tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
return Ok(());
}
};
let tracker_repo_name = match repo.tracker_repo.as_deref() {
Some(r) => r,
None => {
tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
return Ok(());
}
};
// Only create issues for medium+ severity findings
let actionable: Vec<&Finding> = new_findings
.iter()
.filter(|f| {
matches!(
f.severity,
Severity::Medium | Severity::High | Severity::Critical
)
})
.collect();
if actionable.is_empty() {
tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
return Ok(());
}
tracing::info!(
"[{repo_id}] Creating issues for {} findings via {}",
actionable.len(),
tracker.name()
);
let mut created = 0u32;
for finding in actionable {
let title = format!(
"[{}] {}: {}",
finding.severity, finding.scanner, finding.title
);
// Check if an issue already exists by fingerprint first, then by title
let mut found_existing = false;
for search_term in [&finding.fingerprint, &title] {
match tracker
.find_existing_issue(owner, tracker_repo_name, search_term)
.await
{
Ok(Some(existing)) => {
tracing::debug!(
"[{repo_id}] Issue already exists for '{}': {}",
search_term,
existing.external_url
);
found_existing = true;
break;
}
Ok(None) => {}
Err(e) => {
tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
}
}
}
if found_existing {
continue;
}
let body = format_issue_body(finding);
let labels = vec![
format!("severity:{}", finding.severity),
format!("scanner:{}", finding.scanner),
"compliance-scanner".to_string(),
];
match tracker
.create_issue(owner, tracker_repo_name, &title, &body, &labels)
.await
{
Ok(mut issue) => {
issue.finding_id = finding
.id
.as_ref()
.map(|id| id.to_hex())
.unwrap_or_default();
// Update the finding with the issue URL
if let Some(finding_id) = &finding.id {
let _ = self
.db
.findings()
.update_one(
doc! { "_id": finding_id },
doc! { "$set": { "tracker_issue_url": &issue.external_url } },
)
.await;
}
// Store the tracker issue record
if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
}
created += 1;
}
Err(e) => {
tracing::warn!(
"[{repo_id}] Failed to create issue for {}: {e}",
finding.fingerprint
);
}
}
}
tracing::info!("[{repo_id}] Created {created} tracker issues");
Ok(())
}
}
/// Format a finding into a markdown issue body for the tracker.
pub(super) fn format_issue_body(finding: &Finding) -> String {
let mut body = String::new();
body.push_str(&format!("## {} Finding\n\n", finding.severity));
body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
body.push_str(&format!("**Severity:** {}\n", finding.severity));
if let Some(rule) = &finding.rule_id {
body.push_str(&format!("**Rule:** {}\n", rule));
}
if let Some(cwe) = &finding.cwe {
body.push_str(&format!("**CWE:** {}\n", cwe));
}
body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
if let Some(file_path) = &finding.file_path {
body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
if let Some(line) = finding.line_number {
body.push_str(&format!(" (line {})", line));
}
body.push('\n');
}
if let Some(snippet) = &finding.code_snippet {
body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
}
if let Some(remediation) = &finding.remediation {
body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
}
if let Some(fix) = &finding.suggested_fix {
body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
}
body.push_str(&format!(
"\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
finding.fingerprint
));
body
}

View File

@@ -1,251 +0,0 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
use super::run_with_timeout;
pub(super) async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("cargo")
.args([
"clippy",
"--message-format=json",
"--quiet",
"--",
"-W",
"clippy::all",
])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "clippy".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "clippy").await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let mut findings = Vec::new();
for line in stdout.lines() {
let msg: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(_) => continue,
};
if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
continue;
}
let message = match msg.get("message") {
Some(m) => m,
None => continue,
};
let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
if level != "warning" && level != "error" {
continue;
}
let text = message
.get("message")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let code = message
.get("code")
.and_then(|v| v.get("code"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
if text.starts_with("aborting due to") || code.is_empty() {
continue;
}
let (file_path, line_number) = extract_primary_span(message);
let severity = if level == "error" {
Severity::High
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"clippy",
&code,
&file_path,
&line_number.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"clippy".to_string(),
ScanType::Lint,
format!("[clippy] {text}"),
text,
severity,
);
finding.rule_id = Some(code);
if !file_path.is_empty() {
finding.file_path = Some(file_path);
}
if line_number > 0 {
finding.line_number = Some(line_number);
}
findings.push(finding);
}
Ok(findings)
}
fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
let spans = match message.get("spans").and_then(|v| v.as_array()) {
Some(s) => s,
None => return (String::new(), 0),
};
for span in spans {
if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
let file = span
.get("file_name")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
return (file, line);
}
}
(String::new(), 0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_primary_span_with_primary() {
let msg = serde_json::json!({
"spans": [
{
"file_name": "src/lib.rs",
"line_start": 42,
"is_primary": true
}
]
});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "src/lib.rs");
assert_eq!(line, 42);
}
#[test]
fn extract_primary_span_no_primary() {
let msg = serde_json::json!({
"spans": [
{
"file_name": "src/lib.rs",
"line_start": 42,
"is_primary": false
}
]
});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "");
assert_eq!(line, 0);
}
#[test]
fn extract_primary_span_multiple_spans() {
let msg = serde_json::json!({
"spans": [
{
"file_name": "src/other.rs",
"line_start": 10,
"is_primary": false
},
{
"file_name": "src/main.rs",
"line_start": 99,
"is_primary": true
}
]
});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "src/main.rs");
assert_eq!(line, 99);
}
#[test]
fn extract_primary_span_no_spans() {
let msg = serde_json::json!({});
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "");
assert_eq!(line, 0);
}
#[test]
fn extract_primary_span_empty_spans() {
let msg = serde_json::json!({ "spans": [] });
let (file, line) = extract_primary_span(&msg);
assert_eq!(file, "");
assert_eq!(line, 0);
}
#[test]
fn parse_clippy_compiler_message_line() {
let line = r#"{"reason":"compiler-message","message":{"level":"warning","message":"unused variable","code":{"code":"unused_variables"},"spans":[{"file_name":"src/main.rs","line_start":5,"is_primary":true}]}}"#;
let msg: serde_json::Value = serde_json::from_str(line).unwrap();
assert_eq!(
msg.get("reason").and_then(|v| v.as_str()),
Some("compiler-message")
);
let message = msg.get("message").unwrap();
assert_eq!(
message.get("level").and_then(|v| v.as_str()),
Some("warning")
);
assert_eq!(
message.get("message").and_then(|v| v.as_str()),
Some("unused variable")
);
assert_eq!(
message
.get("code")
.and_then(|v| v.get("code"))
.and_then(|v| v.as_str()),
Some("unused_variables")
);
let (file, line_num) = extract_primary_span(message);
assert_eq!(file, "src/main.rs");
assert_eq!(line_num, 5);
}
#[test]
fn skip_non_compiler_message() {
let line = r#"{"reason":"build-script-executed","package_id":"foo 0.1.0"}"#;
let msg: serde_json::Value = serde_json::from_str(line).unwrap();
assert_ne!(
msg.get("reason").and_then(|v| v.as_str()),
Some("compiler-message")
);
}
#[test]
fn skip_aborting_message() {
let text = "aborting due to 3 previous errors";
assert!(text.starts_with("aborting due to"));
}
}

View File

@@ -1,183 +0,0 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
use super::run_with_timeout;
pub(super) async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
// Use the project-local eslint binary directly, not npx (which can hang downloading)
let eslint_bin = repo_path.join("node_modules/.bin/eslint");
let child = Command::new(eslint_bin)
.args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "eslint".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "eslint").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
let mut findings = Vec::new();
for file_result in results {
for msg in file_result.messages {
let severity = match msg.severity {
2 => Severity::Medium,
_ => Severity::Low,
};
let rule_id = msg.rule_id.unwrap_or_default();
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"eslint",
&rule_id,
&file_result.file_path,
&msg.line.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"eslint".to_string(),
ScanType::Lint,
format!("[eslint] {}", msg.message),
msg.message,
severity,
);
finding.rule_id = Some(rule_id);
finding.file_path = Some(file_result.file_path.clone());
finding.line_number = Some(msg.line);
findings.push(finding);
}
}
Ok(findings)
}
#[derive(serde::Deserialize)]
struct EslintFileResult {
#[serde(rename = "filePath")]
file_path: String,
messages: Vec<EslintMessage>,
}
#[derive(serde::Deserialize)]
struct EslintMessage {
#[serde(rename = "ruleId")]
rule_id: Option<String>,
severity: u8,
message: String,
line: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deserialize_eslint_output() {
let json = r#"[
{
"filePath": "/home/user/project/src/app.js",
"messages": [
{
"ruleId": "no-unused-vars",
"severity": 2,
"message": "'x' is defined but never used.",
"line": 10
},
{
"ruleId": "semi",
"severity": 1,
"message": "Missing semicolon.",
"line": 15
}
]
}
]"#;
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].file_path, "/home/user/project/src/app.js");
assert_eq!(results[0].messages.len(), 2);
assert_eq!(
results[0].messages[0].rule_id,
Some("no-unused-vars".to_string())
);
assert_eq!(results[0].messages[0].severity, 2);
assert_eq!(results[0].messages[0].line, 10);
assert_eq!(results[0].messages[1].severity, 1);
}
#[test]
fn deserialize_eslint_null_rule_id() {
let json = r#"[
{
"filePath": "src/index.js",
"messages": [
{
"ruleId": null,
"severity": 2,
"message": "Parsing error: Unexpected token",
"line": 1
}
]
}
]"#;
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert_eq!(results[0].messages[0].rule_id, None);
}
#[test]
fn deserialize_eslint_empty_messages() {
let json = r#"[{"filePath": "src/clean.js", "messages": []}]"#;
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert_eq!(results[0].messages.len(), 0);
}
#[test]
fn deserialize_eslint_empty_array() {
let json = "[]";
let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
assert!(results.is_empty());
}
#[test]
fn eslint_severity_mapping() {
// severity 2 = error -> Medium, anything else -> Low
assert_eq!(
match 2u8 {
2 => "Medium",
_ => "Low",
},
"Medium"
);
assert_eq!(
match 1u8 {
2 => "Medium",
_ => "Low",
},
"Low"
);
assert_eq!(
match 0u8 {
2 => "Medium",
_ => "Low",
},
"Low"
);
}
}

View File

@@ -1,97 +0,0 @@
mod clippy;
mod eslint;
mod ruff;
use std::path::Path;
use std::time::Duration;
use compliance_core::models::ScanType;
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
/// Timeout for each individual lint command
pub(crate) const LINT_TIMEOUT: Duration = Duration::from_secs(120);
pub struct LintScanner;
impl Scanner for LintScanner {
fn name(&self) -> &str {
"lint"
}
fn scan_type(&self) -> ScanType {
ScanType::Lint
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut all_findings = Vec::new();
// Detect which languages are present and run appropriate linters
if has_rust_project(repo_path) {
match clippy::run_clippy(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Clippy failed: {e}"),
}
}
if has_js_project(repo_path) {
match eslint::run_eslint(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("ESLint failed: {e}"),
}
}
if has_python_project(repo_path) {
match ruff::run_ruff(repo_path, repo_id).await {
Ok(findings) => all_findings.extend(findings),
Err(e) => tracing::warn!("Ruff failed: {e}"),
}
}
Ok(ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
})
}
}
fn has_rust_project(repo_path: &Path) -> bool {
repo_path.join("Cargo.toml").exists()
}
fn has_js_project(repo_path: &Path) -> bool {
// Only run if eslint is actually installed in the project
repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
}
fn has_python_project(repo_path: &Path) -> bool {
repo_path.join("pyproject.toml").exists()
|| repo_path.join("setup.py").exists()
|| repo_path.join("requirements.txt").exists()
}
/// Run a command with a timeout, returning its output or an error
pub(crate) async fn run_with_timeout(
child: tokio::process::Child,
scanner_name: &str,
) -> Result<std::process::Output, CoreError> {
let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
match result {
Ok(Ok(output)) => Ok(output),
Ok(Err(e)) => Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(e),
}),
Err(_) => {
// Process is dropped here which sends SIGKILL on Unix
Err(CoreError::Scanner {
scanner: scanner_name.to_string(),
source: Box::new(std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
)),
})
}
}
}

View File

@@ -1,150 +0,0 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::CoreError;
use tokio::process::Command;
use crate::pipeline::dedup;
use super::run_with_timeout;
pub(super) async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
let child = Command::new("ruff")
.args(["check", ".", "--output-format", "json", "--exit-zero"])
.current_dir(repo_path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| CoreError::Scanner {
scanner: "ruff".to_string(),
source: Box::new(e),
})?;
let output = run_with_timeout(child, "ruff").await?;
if output.stdout.is_empty() {
return Ok(Vec::new());
}
let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.map(|r| {
let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
Severity::Medium
} else {
Severity::Low
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"ruff",
&r.code,
&r.filename,
&r.location.row.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"ruff".to_string(),
ScanType::Lint,
format!("[ruff] {}: {}", r.code, r.message),
r.message,
severity,
);
finding.rule_id = Some(r.code);
finding.file_path = Some(r.filename);
finding.line_number = Some(r.location.row);
finding
})
.collect();
Ok(findings)
}
#[derive(serde::Deserialize)]
struct RuffResult {
code: String,
message: String,
filename: String,
location: RuffLocation,
}
#[derive(serde::Deserialize)]
struct RuffLocation {
row: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deserialize_ruff_output() {
let json = r#"[
{
"code": "E501",
"message": "Line too long (120 > 79 characters)",
"filename": "src/main.py",
"location": {"row": 42}
},
{
"code": "F401",
"message": "`os` imported but unused",
"filename": "src/utils.py",
"location": {"row": 1}
}
]"#;
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].code, "E501");
assert_eq!(results[0].filename, "src/main.py");
assert_eq!(results[0].location.row, 42);
assert_eq!(results[1].code, "F401");
assert_eq!(results[1].location.row, 1);
}
#[test]
fn deserialize_ruff_empty() {
let json = "[]";
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
assert!(results.is_empty());
}
#[test]
fn ruff_severity_e_and_f_are_medium() {
for code in &["E501", "E302", "F401", "F811"] {
let is_medium = code.starts_with('E') || code.starts_with('F');
assert!(is_medium, "Expected {code} to be Medium severity");
}
}
#[test]
fn ruff_severity_others_are_low() {
for code in &["W291", "I001", "D100", "C901", "N801"] {
let is_medium = code.starts_with('E') || code.starts_with('F');
assert!(!is_medium, "Expected {code} to be Low severity");
}
}
#[test]
fn deserialize_ruff_with_extra_fields() {
// Ruff output may contain additional fields we don't use
let json = r#"[{
"code": "W291",
"message": "Trailing whitespace",
"filename": "app.py",
"location": {"row": 3, "column": 10},
"end_location": {"row": 3, "column": 11},
"fix": null,
"noqa_row": 3
}]"#;
let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].code, "W291");
}
}

View File

@@ -1,14 +1,7 @@
pub mod code_review;
pub mod cve;
pub mod dedup;
pub mod git;
pub mod gitleaks;
mod graph_build;
mod issue_creation;
pub mod lint;
pub mod orchestrator;
pub mod patterns;
mod pr_review;
pub mod sbom;
pub mod semgrep;
mod tracker_dispatch;

View File

@@ -1,7 +1,6 @@
use std::sync::Arc;
use mongodb::bson::doc;
use tracing::Instrument;
use compliance_core::models::*;
use compliance_core::traits::Scanner;
@@ -12,8 +11,6 @@ use crate::error::AgentError;
use crate::llm::LlmClient;
use crate::pipeline::cve::CveScanner;
use crate::pipeline::git::GitOps;
use crate::pipeline::gitleaks::GitleaksScanner;
use crate::pipeline::lint::LintScanner;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
use crate::pipeline::sbom::SbomScanner;
use crate::pipeline::semgrep::SemgrepScanner;
@@ -29,10 +26,10 @@ pub struct GraphContext {
}
pub struct PipelineOrchestrator {
pub(super) config: AgentConfig,
pub(super) db: Database,
pub(super) llm: Arc<LlmClient>,
pub(super) http: reqwest::Client,
config: AgentConfig,
db: Database,
llm: Arc<LlmClient>,
http: reqwest::Client,
}
impl PipelineOrchestrator {
@@ -50,7 +47,6 @@ impl PipelineOrchestrator {
}
}
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, trigger = ?trigger))]
pub async fn run(&self, repo_id: &str, trigger: ScanTrigger) -> Result<(), AgentError> {
// Look up the repository
let repo = self
@@ -90,7 +86,6 @@ impl PipelineOrchestrator {
.await?;
}
Err(e) => {
tracing::error!(repo_id, error = %e, "Scan pipeline failed");
self.db
.scan_runs()
.update_one(
@@ -110,7 +105,6 @@ impl PipelineOrchestrator {
result.map(|_| ())
}
#[tracing::instrument(skip_all, fields(repo_id = repo.name.as_str()))]
async fn run_pipeline(
&self,
repo: &TrackedRepository,
@@ -120,8 +114,7 @@ impl PipelineOrchestrator {
// Stage 0: Change detection
tracing::info!("[{repo_id}] Stage 0: Change detection");
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let git_ops = GitOps::new(&self.config.git_clone_base_path);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
@@ -135,13 +128,8 @@ impl PipelineOrchestrator {
// Stage 1: Semgrep SAST
tracing::info!("[{repo_id}] Stage 1: Semgrep SAST");
self.update_phase(scan_run_id, "sast").await;
match async {
let semgrep = SemgrepScanner;
semgrep.scan(&repo_path, &repo_id).await
}
.instrument(tracing::info_span!("stage_sast"))
.await
{
let semgrep = SemgrepScanner;
match semgrep.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Semgrep failed: {e}"),
}
@@ -149,13 +137,8 @@ impl PipelineOrchestrator {
// Stage 2: SBOM Generation
tracing::info!("[{repo_id}] Stage 2: SBOM Generation");
self.update_phase(scan_run_id, "sbom_generation").await;
let mut sbom_entries = match async {
let sbom_scanner = SbomScanner;
sbom_scanner.scan(&repo_path, &repo_id).await
}
.instrument(tracing::info_span!("stage_sbom_generation"))
.await
{
let sbom_scanner = SbomScanner;
let mut sbom_entries = match sbom_scanner.scan(&repo_path, &repo_id).await {
Ok(output) => output.sbom_entries,
Err(e) => {
tracing::warn!("[{repo_id}] SBOM generation failed: {e}");
@@ -174,13 +157,9 @@ impl PipelineOrchestrator {
k.expose_secret().to_string()
}),
);
let cve_alerts = match async {
cve_scanner
.scan_dependencies(&repo_id, &mut sbom_entries)
.await
}
.instrument(tracing::info_span!("stage_cve_scanning"))
.await
let cve_alerts = match cve_scanner
.scan_dependencies(&repo_id, &mut sbom_entries)
.await
{
Ok(alerts) => alerts,
Err(e) => {
@@ -192,63 +171,23 @@ impl PipelineOrchestrator {
// Stage 4: Pattern Scanning (GDPR + OAuth)
tracing::info!("[{repo_id}] Stage 4: Pattern Scanning");
self.update_phase(scan_run_id, "pattern_scanning").await;
{
let pattern_findings = async {
let mut findings = Vec::new();
let gdpr = GdprPatternScanner::new();
match gdpr.scan(&repo_path, &repo_id).await {
Ok(output) => findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"),
}
let oauth = OAuthPatternScanner::new();
match oauth.scan(&repo_path, &repo_id).await {
Ok(output) => findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
}
findings
}
.instrument(tracing::info_span!("stage_pattern_scanning"))
.await;
all_findings.extend(pattern_findings);
}
// Stage 4a: Secret Detection (Gitleaks)
tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
self.update_phase(scan_run_id, "secret_detection").await;
match async {
let gitleaks = GitleaksScanner;
gitleaks.scan(&repo_path, &repo_id).await
}
.instrument(tracing::info_span!("stage_secret_detection"))
.await
{
let gdpr = GdprPatternScanner::new();
match gdpr.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"),
}
// Stage 4b: Lint Scanning
tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
self.update_phase(scan_run_id, "lint_scanning").await;
match async {
let lint = LintScanner;
lint.scan(&repo_path, &repo_id).await
}
.instrument(tracing::info_span!("stage_lint_scanning"))
.await
{
let oauth = OAuthPatternScanner::new();
match oauth.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
}
// Stage 4.5: Graph Building
tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
self.update_phase(scan_run_id, "graph_building").await;
let graph_context = match async {
self.build_code_graph(&repo_path, &repo_id, &all_findings)
.await
}
.instrument(tracing::info_span!("stage_graph_building"))
.await
let graph_context = match self
.build_code_graph(&repo_path, &repo_id, &all_findings)
.await
{
Ok(ctx) => Some(ctx),
Err(e) => {
@@ -273,7 +212,6 @@ impl PipelineOrchestrator {
// Dedup against existing findings and insert new ones
let mut new_count = 0u32;
let mut new_findings: Vec<Finding> = Vec::new();
for mut finding in all_findings {
finding.scan_run_id = Some(scan_run_id.to_string());
// Check if fingerprint already exists
@@ -283,22 +221,12 @@ impl PipelineOrchestrator {
.find_one(doc! { "fingerprint": &finding.fingerprint })
.await?;
if existing.is_none() {
let result = self.db.findings().insert_one(&finding).await?;
finding.id = result.inserted_id.as_object_id();
new_findings.push(finding);
self.db.findings().insert_one(&finding).await?;
new_count += 1;
}
}
// Remove stale SBOM entries for this repo before reinserting
if !sbom_entries.is_empty() {
self.db
.sbom_entries()
.delete_many(doc! { "repo_id": &repo.id })
.await?;
}
// Persist SBOM entries
// Persist SBOM entries (upsert by repo_id + name + version)
for entry in &sbom_entries {
let filter = doc! {
"repo_id": &entry.repo_id,
@@ -334,12 +262,7 @@ impl PipelineOrchestrator {
// Stage 6: Issue Creation
tracing::info!("[{repo_id}] Stage 6: Issue Creation");
self.update_phase(scan_run_id, "issue_creation").await;
if let Err(e) = self
.create_tracker_issues(repo, &repo_id, &new_findings)
.await
{
tracing::warn!("[{repo_id}] Issue creation failed: {e}");
}
// Issue creation is handled by the trackers module - deferred to agent
// Stage 7: Update repository
self.db
@@ -365,7 +288,107 @@ impl PipelineOrchestrator {
Ok(new_count)
}
pub(super) async fn update_phase(&self, scan_run_id: &str, phase: &str) {
/// Build the code knowledge graph for a repo and compute impact analyses
async fn build_code_graph(
&self,
repo_path: &std::path::Path,
repo_id: &str,
findings: &[Finding],
) -> Result<GraphContext, AgentError> {
let graph_build_id = uuid::Uuid::new_v4().to_string();
let engine = compliance_graph::GraphEngine::new(50_000);
let (mut code_graph, build_run) =
engine
.build_graph(repo_path, repo_id, &graph_build_id)
.map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
// Apply community detection
compliance_graph::graph::community::apply_communities(&mut code_graph);
// Store graph in MongoDB
let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
store
.delete_repo_graph(repo_id)
.await
.map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
store
.store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
.await
.map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
// Compute impact analysis for each finding
let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
let mut impacts = Vec::new();
for finding in findings {
if let Some(file_path) = &finding.file_path {
let impact = analyzer.analyze(
repo_id,
&finding.fingerprint,
&graph_build_id,
file_path,
finding.line_number,
);
store
.store_impact(&impact)
.await
.map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
impacts.push(impact);
}
}
Ok(GraphContext {
node_count: build_run.node_count,
edge_count: build_run.edge_count,
community_count: build_run.community_count,
impacts,
})
}
/// Trigger DAST scan if a target is configured for this repo
async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
use futures_util::TryStreamExt;
let filter = mongodb::bson::doc! { "repo_id": repo_id };
let targets: Vec<compliance_core::models::DastTarget> =
match self.db.dast_targets().find(filter).await {
Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
Err(_) => return,
};
if targets.is_empty() {
tracing::info!("[{repo_id}] No DAST targets configured, skipping");
return;
}
for target in targets {
let db = self.db.clone();
let scan_run_id = scan_run_id.to_string();
tokio::spawn(async move {
let orchestrator = compliance_dast::DastOrchestrator::new(100);
match orchestrator.run_scan(&target, Vec::new()).await {
Ok((mut scan_run, findings)) => {
scan_run.sast_scan_run_id = Some(scan_run_id);
if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
tracing::error!("Failed to store DAST scan run: {e}");
}
for finding in &findings {
if let Err(e) = db.dast_findings().insert_one(finding).await {
tracing::error!("Failed to store DAST finding: {e}");
}
}
tracing::info!("DAST scan complete: {} findings", findings.len());
}
Err(e) => {
tracing::error!("DAST scan failed: {e}");
}
}
});
}
}
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
let _ = self
.db
@@ -381,28 +404,3 @@ impl PipelineOrchestrator {
}
}
}
/// Extract the scheme + host from a git URL.
/// e.g. "https://gitea.example.com/owner/repo.git" -> "https://gitea.example.com"
/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" -> "https://gitea.example.com"
pub(super) fn extract_base_url(git_url: &str) -> Option<String> {
if let Some(rest) = git_url.strip_prefix("https://") {
let host = rest.split('/').next()?;
Some(format!("https://{host}"))
} else if let Some(rest) = git_url.strip_prefix("http://") {
let host = rest.split('/').next()?;
Some(format!("http://{host}"))
} else if let Some(rest) = git_url.strip_prefix("ssh://") {
// ssh://git@host:port/path -> extract host
let after_at = rest.find('@').map(|i| &rest[i + 1..]).unwrap_or(rest);
let host = after_at.split(&[':', '/'][..]).next()?;
Some(format!("https://{host}"))
} else if let Some(at_pos) = git_url.find('@') {
// SCP-style: git@host:owner/repo.git
let after_at = &git_url[at_pos + 1..];
let host = after_at.split(':').next()?;
Some(format!("https://{host}"))
} else {
None
}
}

View File

@@ -33,7 +33,6 @@ struct PatternRule {
file_extensions: Vec<String>,
}
#[allow(clippy::new_without_default)]
impl GdprPatternScanner {
pub fn new() -> Self {
let patterns = vec![
@@ -83,7 +82,6 @@ impl Scanner for GdprPatternScanner {
ScanType::Gdpr
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let findings = scan_with_patterns(
repo_path,
@@ -99,7 +97,6 @@ impl Scanner for GdprPatternScanner {
}
}
#[allow(clippy::new_without_default)]
impl OAuthPatternScanner {
pub fn new() -> Self {
let patterns = vec![
@@ -149,7 +146,6 @@ impl Scanner for OAuthPatternScanner {
ScanType::OAuth
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let findings = scan_with_patterns(
repo_path,
@@ -258,159 +254,3 @@ fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {
Ok(entries)
}
#[cfg(test)]
mod tests {
use super::*;
// --- compile_regex tests ---
#[test]
fn compile_regex_valid_pattern() {
let re = compile_regex(r"\bfoo\b");
assert!(re.is_match("hello foo bar"));
assert!(!re.is_match("foobar"));
}
#[test]
fn compile_regex_invalid_pattern_returns_fallback() {
// An invalid regex should return the fallback "^$" that only matches empty strings
let re = compile_regex(r"[invalid");
assert!(re.is_match(""));
assert!(!re.is_match("anything"));
}
// --- GDPR pattern tests ---
#[test]
fn gdpr_pii_logging_matches() {
let scanner = GdprPatternScanner::new();
let pattern = &scanner.patterns[0]; // gdpr-pii-logging
// Regex: (log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(pii_keyword)\b
assert!(pattern.pattern.is_match("console.log(email)"));
assert!(pattern.pattern.is_match("console.log(user.ssn)"));
assert!(pattern.pattern.is_match("print(phone_number)"));
assert!(pattern.pattern.is_match("tracing::(ip_addr)"));
assert!(pattern.pattern.is_match("log.debug(credit_card)"));
}
#[test]
fn gdpr_pii_logging_no_false_positive() {
let scanner = GdprPatternScanner::new();
let pattern = &scanner.patterns[0];
// Regular logging without PII fields should not match
assert!(!pattern
.pattern
.is_match("logger.info(\"request completed\")"));
assert!(!pattern.pattern.is_match("let email = user.email;"));
}
#[test]
fn gdpr_no_consent_matches() {
let scanner = GdprPatternScanner::new();
let pattern = &scanner.patterns[1]; // gdpr-no-consent
assert!(pattern.pattern.is_match("collect personal data"));
assert!(pattern.pattern.is_match("store user_data in db"));
assert!(pattern.pattern.is_match("save pii to disk"));
}
#[test]
fn gdpr_user_model_matches() {
let scanner = GdprPatternScanner::new();
let pattern = &scanner.patterns[2]; // gdpr-no-delete-endpoint
assert!(pattern.pattern.is_match("struct User {"));
assert!(pattern.pattern.is_match("class User(Model):"));
}
#[test]
fn gdpr_hardcoded_retention_matches() {
let scanner = GdprPatternScanner::new();
let pattern = &scanner.patterns[3]; // gdpr-hardcoded-retention
assert!(pattern.pattern.is_match("retention = 30"));
assert!(pattern.pattern.is_match("ttl: 3600"));
assert!(pattern.pattern.is_match("expire = 86400"));
}
// --- OAuth pattern tests ---
#[test]
fn oauth_implicit_grant_matches() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[0]; // oauth-implicit-grant
assert!(pattern.pattern.is_match("response_type = \"token\""));
assert!(pattern.pattern.is_match("grant_type: implicit"));
assert!(pattern.pattern.is_match("response_type='token'"));
}
#[test]
fn oauth_implicit_grant_no_false_positive() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[0];
assert!(!pattern.pattern.is_match("response_type = \"code\""));
assert!(!pattern.pattern.is_match("grant_type: authorization_code"));
}
#[test]
fn oauth_authorization_code_matches() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[1]; // oauth-missing-pkce
assert!(pattern.pattern.is_match("uses authorization_code flow"));
assert!(pattern.pattern.is_match("authorization code grant"));
}
#[test]
fn oauth_token_localstorage_matches() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[2]; // oauth-token-localstorage
assert!(pattern
.pattern
.is_match("localStorage.setItem('access_token', tok)"));
assert!(pattern
.pattern
.is_match("localStorage.getItem(\"refresh_token\")"));
}
#[test]
fn oauth_token_localstorage_no_false_positive() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[2];
assert!(!pattern
.pattern
.is_match("localStorage.setItem('theme', 'dark')"));
assert!(!pattern
.pattern
.is_match("sessionStorage.setItem('token', t)"));
}
#[test]
fn oauth_token_url_matches() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[3]; // oauth-token-url
assert!(pattern.pattern.is_match("access_token = build_url(query)"));
assert!(pattern.pattern.is_match("bearer = url.param"));
}
// --- Pattern rule file extension filtering ---
#[test]
fn gdpr_patterns_cover_common_languages() {
let scanner = GdprPatternScanner::new();
for pattern in &scanner.patterns {
assert!(
pattern.file_extensions.contains(&"rs".to_string()),
"Pattern {} should cover .rs files",
pattern.id
);
}
}
#[test]
fn oauth_localstorage_only_js_ts() {
let scanner = OAuthPatternScanner::new();
let pattern = &scanner.patterns[2]; // oauth-token-localstorage
assert!(pattern.file_extensions.contains(&"js".to_string()));
assert!(pattern.file_extensions.contains(&"ts".to_string()));
assert!(!pattern.file_extensions.contains(&"rs".to_string()));
assert!(!pattern.file_extensions.contains(&"py".to_string()));
}
}

View File

@@ -1,183 +0,0 @@
use compliance_core::models::*;
use super::dedup::compute_fingerprint;
use super::orchestrator::PipelineOrchestrator;
use crate::error::AgentError;
use crate::pipeline::code_review::CodeReviewScanner;
use crate::pipeline::git::GitOps;
use crate::pipeline::semgrep::SemgrepScanner;
use compliance_core::traits::Scanner;
impl PipelineOrchestrator {
/// Run an incremental scan on a PR diff and post review comments.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
pub async fn run_pr_review(
&self,
repo: &TrackedRepository,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
return Ok(());
}
};
let owner = repo.tracker_owner.as_deref().unwrap_or("");
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
if owner.is_empty() || tracker_repo_name.is_empty() {
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
return Ok(());
}
// Clone/fetch the repo
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
// Get diff between base and head
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
if diff_files.is_empty() {
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
return Ok(());
}
// Run semgrep on the full repo but we'll filter findings to changed files
let changed_paths: std::collections::HashSet<String> =
diff_files.iter().map(|f| f.path.clone()).collect();
let mut pr_findings: Vec<Finding> = Vec::new();
// SAST scan (semgrep)
match SemgrepScanner.scan(&repo_path, repo_id).await {
Ok(output) => {
for f in output.findings {
if let Some(fp) = &f.file_path {
if changed_paths.contains(fp.as_str()) {
pr_findings.push(f);
}
}
}
}
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
}
// LLM code review on the diff
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, repo_id, base_sha, head_sha)
.await;
pr_findings.extend(review_output.findings);
if pr_findings.is_empty() {
// Post a clean review
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
"Compliance scan: no issues found in this PR.",
Vec::new(),
)
.await
{
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
}
return Ok(());
}
// Dedup findings by fingerprint to avoid duplicate comments
let mut seen_fps = std::collections::HashSet::new();
let mut unique_findings: Vec<&Finding> = Vec::new();
for finding in &pr_findings {
let fp = compute_fingerprint(&[
repo_id,
&pr_number.to_string(),
finding.file_path.as_deref().unwrap_or(""),
&finding.line_number.unwrap_or(0).to_string(),
&finding.title,
]);
if seen_fps.insert(fp) {
unique_findings.push(finding);
}
}
let pr_findings = unique_findings;
// Build review comments from findings
let mut review_comments = Vec::new();
for finding in &pr_findings {
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
let fp = compute_fingerprint(&[
repo_id,
&pr_number.to_string(),
path,
&line.to_string(),
&finding.title,
]);
let comment_body = format!(
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*\n\n<!-- compliance-fp:{fp} -->",
finding.severity,
finding.title,
finding.description,
finding.scanner,
finding
.cwe
.as_deref()
.map(|c| format!("CWE: {c}"))
.unwrap_or_default(),
);
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
path: path.clone(),
line,
body: comment_body,
});
}
}
let summary = format!(
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
pr_findings.len(),
pr_findings
.iter()
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
.collect::<Vec<_>>()
.join("\n"),
);
if review_comments.is_empty() {
// All findings were on files/lines we can't comment on inline
if let Err(e) = tracker
.create_pr_review(owner, tracker_repo_name, pr_number, &summary, Vec::new())
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review summary: {e}");
}
return Ok(());
}
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
&summary,
review_comments,
)
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
} else {
tracing::info!(
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
pr_findings.len()
);
}
Ok(())
}
}

View File

@@ -0,0 +1,190 @@
use std::path::Path;
use compliance_core::models::{SbomEntry, ScanType, VulnRef};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
pub struct SbomScanner;
impl Scanner for SbomScanner {
fn name(&self) -> &str {
"sbom"
}
fn scan_type(&self) -> ScanType {
ScanType::Sbom
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut entries = Vec::new();
// Run syft for SBOM generation
match run_syft(repo_path, repo_id).await {
Ok(syft_entries) => entries.extend(syft_entries),
Err(e) => tracing::warn!("syft failed: {e}"),
}
// Run cargo-audit for Rust-specific vulns
match run_cargo_audit(repo_path, repo_id).await {
Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
}
Ok(ScanOutput {
findings: Vec::new(),
sbom_entries: entries,
})
}
}
async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
let output = tokio::process::Command::new("syft")
.arg(repo_path)
.args(["-o", "cyclonedx-json"])
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "syft".to_string(),
source: Box::new(e),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CoreError::Scanner {
scanner: "syft".to_string(),
source: format!("syft exited with {}: {stderr}", output.status).into(),
});
}
let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
let entries = cdx
.components
.unwrap_or_default()
.into_iter()
.map(|c| {
let mut entry = SbomEntry::new(
repo_id.to_string(),
c.name,
c.version.unwrap_or_else(|| "unknown".to_string()),
c.component_type.unwrap_or_else(|| "library".to_string()),
);
entry.purl = c.purl;
entry.license = c.licenses.and_then(|ls| {
ls.first().and_then(|l| {
l.license.as_ref().map(|lic| {
lic.id
.clone()
.unwrap_or_else(|| lic.name.clone().unwrap_or_default())
})
})
});
entry
})
.collect();
Ok(entries)
}
async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVuln>, CoreError> {
let cargo_lock = repo_path.join("Cargo.lock");
if !cargo_lock.exists() {
return Ok(Vec::new());
}
let output = tokio::process::Command::new("cargo")
.args(["audit", "--json"])
.current_dir(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "cargo-audit".to_string(),
source: Box::new(e),
})?;
let result: CargoAuditOutput =
serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
vulnerabilities: CargoAuditVulns { list: Vec::new() },
});
let vulns = result
.vulnerabilities
.list
.into_iter()
.map(|v| AuditVuln {
package: v.advisory.package,
id: v.advisory.id,
url: v.advisory.url,
})
.collect();
Ok(vulns)
}
fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<AuditVuln>) {
for vuln in vulns {
if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
entry.known_vulnerabilities.push(VulnRef {
id: vuln.id.clone(),
source: "cargo-audit".to_string(),
severity: None,
url: Some(vuln.url),
});
}
}
}
// CycloneDX JSON types
#[derive(serde::Deserialize)]
struct CycloneDxBom {
components: Option<Vec<CdxComponent>>,
}
#[derive(serde::Deserialize)]
struct CdxComponent {
name: String,
version: Option<String>,
#[serde(rename = "type")]
component_type: Option<String>,
purl: Option<String>,
licenses: Option<Vec<CdxLicenseWrapper>>,
}
#[derive(serde::Deserialize)]
struct CdxLicenseWrapper {
license: Option<CdxLicense>,
}
#[derive(serde::Deserialize)]
struct CdxLicense {
id: Option<String>,
name: Option<String>,
}
// Cargo audit types
#[derive(serde::Deserialize)]
struct CargoAuditOutput {
vulnerabilities: CargoAuditVulns,
}
#[derive(serde::Deserialize)]
struct CargoAuditVulns {
list: Vec<CargoAuditEntry>,
}
#[derive(serde::Deserialize)]
struct CargoAuditEntry {
advisory: CargoAuditAdvisory,
}
#[derive(serde::Deserialize)]
struct CargoAuditAdvisory {
id: String,
package: String,
url: String,
}
struct AuditVuln {
package: String,
id: String,
url: String,
}

View File

@@ -1,72 +0,0 @@
use std::path::Path;
use compliance_core::CoreError;
pub(super) struct AuditVuln {
pub package: String,
pub id: String,
pub url: String,
}
#[tracing::instrument(skip_all)]
pub(super) async fn run_cargo_audit(
repo_path: &Path,
_repo_id: &str,
) -> Result<Vec<AuditVuln>, CoreError> {
let cargo_lock = repo_path.join("Cargo.lock");
if !cargo_lock.exists() {
return Ok(Vec::new());
}
let output = tokio::process::Command::new("cargo")
.args(["audit", "--json"])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "cargo-audit".to_string(),
source: Box::new(e),
})?;
let result: CargoAuditOutput =
serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
vulnerabilities: CargoAuditVulns { list: Vec::new() },
});
let vulns = result
.vulnerabilities
.list
.into_iter()
.map(|v| AuditVuln {
package: v.advisory.package,
id: v.advisory.id,
url: v.advisory.url,
})
.collect();
Ok(vulns)
}
// Cargo audit types
#[derive(serde::Deserialize)]
struct CargoAuditOutput {
vulnerabilities: CargoAuditVulns,
}
#[derive(serde::Deserialize)]
struct CargoAuditVulns {
list: Vec<CargoAuditEntry>,
}
#[derive(serde::Deserialize)]
struct CargoAuditEntry {
advisory: CargoAuditAdvisory,
}
#[derive(serde::Deserialize)]
struct CargoAuditAdvisory {
id: String,
package: String,
url: String,
}

View File

@@ -1,216 +0,0 @@
mod cargo_audit;
mod syft;
use std::path::Path;
use compliance_core::models::{SbomEntry, ScanType, VulnRef};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
pub struct SbomScanner;
impl Scanner for SbomScanner {
fn name(&self) -> &str {
"sbom"
}
fn scan_type(&self) -> ScanType {
ScanType::Sbom
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut entries = Vec::new();
// Generate missing lock files so Syft can resolve the full dependency tree
generate_lockfiles(repo_path).await;
// Run syft for SBOM generation
match syft::run_syft(repo_path, repo_id).await {
Ok(syft_entries) => entries.extend(syft_entries),
Err(e) => tracing::warn!("syft failed: {e}"),
}
// Enrich Cargo entries with license info from cargo metadata
enrich_cargo_licenses(repo_path, &mut entries).await;
// Run cargo-audit for Rust-specific vulns
match cargo_audit::run_cargo_audit(repo_path, repo_id).await {
Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
}
Ok(ScanOutput {
findings: Vec::new(),
sbom_entries: entries,
})
}
}
/// Generate missing lock files so Syft can resolve the full dependency tree.
/// This handles repos that gitignore their lock files (common for Rust libraries).
#[tracing::instrument(skip_all)]
async fn generate_lockfiles(repo_path: &Path) {
// Cargo: generate Cargo.lock if Cargo.toml exists without it
if repo_path.join("Cargo.toml").exists() && !repo_path.join("Cargo.lock").exists() {
tracing::info!("generating Cargo.lock for SBOM scan");
let result = tokio::process::Command::new("cargo")
.args(["generate-lockfile"])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.output()
.await;
match result {
Ok(o) if o.status.success() => tracing::info!("Cargo.lock generated"),
Ok(o) => tracing::warn!(
"cargo generate-lockfile failed: {}",
String::from_utf8_lossy(&o.stderr)
),
Err(e) => tracing::warn!("cargo generate-lockfile error: {e}"),
}
}
// pip: generate a requirements lock if only pyproject.toml / setup.py exists
let has_pip_manifest = repo_path.join("pyproject.toml").exists()
|| repo_path.join("setup.py").exists()
|| repo_path.join("setup.cfg").exists();
let has_pip_lock = repo_path.join("requirements.txt").exists()
|| repo_path.join("requirements-lock.txt").exists()
|| repo_path.join("poetry.lock").exists()
|| repo_path.join("Pipfile.lock").exists();
if has_pip_manifest && !has_pip_lock {
// Try pip-compile (pip-tools) first, fall back to pip freeze approach
tracing::info!("attempting to generate pip requirements for SBOM scan");
if repo_path.join("pyproject.toml").exists() {
let result = tokio::process::Command::new("pip-compile")
.args([
"--quiet",
"--output-file",
"requirements.txt",
"pyproject.toml",
])
.current_dir(repo_path)
.output()
.await;
match result {
Ok(o) if o.status.success() => {
tracing::info!("requirements.txt generated via pip-compile")
}
_ => tracing::warn!(
"pip-compile not available or failed, Syft will parse pyproject.toml directly"
),
}
}
}
// npm: generate package-lock.json if package.json exists without it
let has_npm_lock = repo_path.join("package-lock.json").exists()
|| repo_path.join("yarn.lock").exists()
|| repo_path.join("pnpm-lock.yaml").exists();
if repo_path.join("package.json").exists() && !has_npm_lock {
tracing::info!("generating package-lock.json for SBOM scan");
let result = tokio::process::Command::new("npm")
.args(["install", "--package-lock-only", "--ignore-scripts"])
.current_dir(repo_path)
.output()
.await;
match result {
Ok(o) if o.status.success() => tracing::info!("package-lock.json generated"),
Ok(o) => tracing::warn!(
"npm install --package-lock-only failed: {}",
String::from_utf8_lossy(&o.stderr)
),
Err(e) => tracing::warn!("npm lock generation error: {e}"),
}
}
}
/// Enrich Cargo SBOM entries with license info from `cargo metadata`.
/// Syft doesn't read license data from Cargo.lock, so we fill it in.
#[tracing::instrument(skip_all)]
async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
if !repo_path.join("Cargo.toml").exists() {
return;
}
let has_cargo_entries = entries.iter().any(|e| e.package_manager == "cargo");
if !has_cargo_entries {
return;
}
let output = match tokio::process::Command::new("cargo")
.args(["metadata", "--format-version", "1"])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.output()
.await
{
Ok(o) if o.status.success() => o,
Ok(o) => {
tracing::warn!(
"cargo metadata failed: {}",
String::from_utf8_lossy(&o.stderr)
);
return;
}
Err(e) => {
tracing::warn!("cargo metadata error: {e}");
return;
}
};
let meta: CargoMetadata = match serde_json::from_slice(&output.stdout) {
Ok(m) => m,
Err(e) => {
tracing::warn!("failed to parse cargo metadata: {e}");
return;
}
};
// Build a lookup: (name, version) -> license
let license_map: std::collections::HashMap<(&str, &str), &str> = meta
.packages
.iter()
.filter_map(|p| {
p.license
.as_deref()
.map(|l| (p.name.as_str(), p.version.as_str(), l))
})
.map(|(n, v, l)| ((n, v), l))
.collect();
for entry in entries.iter_mut() {
if entry.package_manager != "cargo" || entry.license.is_some() {
continue;
}
if let Some(license) = license_map.get(&(entry.name.as_str(), entry.version.as_str())) {
entry.license = Some(license.to_string());
}
}
}
fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<cargo_audit::AuditVuln>) {
for vuln in vulns {
if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
entry.known_vulnerabilities.push(VulnRef {
id: vuln.id.clone(),
source: "cargo-audit".to_string(),
severity: None,
url: Some(vuln.url),
});
}
}
}
// Cargo metadata types
#[derive(serde::Deserialize)]
struct CargoMetadata {
packages: Vec<CargoPackage>,
}
#[derive(serde::Deserialize)]
struct CargoPackage {
name: String,
version: String,
license: Option<String>,
}

View File

@@ -1,355 +0,0 @@
use std::path::Path;
use compliance_core::models::SbomEntry;
use compliance_core::CoreError;
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub(super) async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
let output = tokio::process::Command::new("syft")
.arg(repo_path)
.args(["-o", "cyclonedx-json"])
// Enable remote license lookups for all ecosystems
.env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
.env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
.env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
.env("SYFT_JAVA_USE_NETWORK", "true")
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "syft".to_string(),
source: Box::new(e),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CoreError::Scanner {
scanner: "syft".to_string(),
source: format!("syft exited with {}: {stderr}", output.status).into(),
});
}
let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
let entries = cdx
.components
.unwrap_or_default()
.into_iter()
.map(|c| {
let package_manager = c
.purl
.as_deref()
.and_then(extract_ecosystem_from_purl)
.unwrap_or_else(|| "unknown".to_string());
let mut entry = SbomEntry::new(
repo_id.to_string(),
c.name,
c.version.unwrap_or_else(|| "unknown".to_string()),
package_manager,
);
entry.purl = c.purl;
entry.license = c.licenses.and_then(|ls| extract_license(&ls));
entry
})
.collect();
Ok(entries)
}
// CycloneDX JSON types
#[derive(serde::Deserialize)]
struct CycloneDxBom {
components: Option<Vec<CdxComponent>>,
}
#[derive(serde::Deserialize)]
struct CdxComponent {
name: String,
version: Option<String>,
#[serde(rename = "type")]
#[allow(dead_code)]
component_type: Option<String>,
purl: Option<String>,
licenses: Option<Vec<CdxLicenseWrapper>>,
}
#[derive(serde::Deserialize)]
struct CdxLicenseWrapper {
license: Option<CdxLicense>,
/// SPDX license expression (e.g. "MIT OR Apache-2.0")
expression: Option<String>,
}
#[derive(serde::Deserialize)]
struct CdxLicense {
id: Option<String>,
name: Option<String>,
}
/// Extract the best license string from CycloneDX license entries.
/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License").
fn extract_license(entries: &[CdxLicenseWrapper]) -> Option<String> {
// First pass: look for SPDX expressions (most precise for dual-licensed packages)
for entry in entries {
if let Some(ref expr) = entry.expression {
if !expr.is_empty() {
return Some(expr.clone());
}
}
}
// Second pass: collect license.id or license.name from all entries
let parts: Vec<String> = entries
.iter()
.filter_map(|e| {
e.license.as_ref().and_then(|lic| {
lic.id
.clone()
.or_else(|| lic.name.clone())
.filter(|s| !s.is_empty())
})
})
.collect();
if parts.is_empty() {
return None;
}
Some(parts.join(" OR "))
}
/// Extract the ecosystem/package-manager from a PURL string.
/// e.g. "pkg:npm/lodash@4.17.21" -> "npm", "pkg:cargo/serde@1.0" -> "cargo"
fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
let rest = purl.strip_prefix("pkg:")?;
let ecosystem = rest.split('/').next()?;
if ecosystem.is_empty() {
return None;
}
// Normalise common PURL types to user-friendly names
let normalised = match ecosystem {
"golang" => "go",
"pypi" => "pip",
_ => ecosystem,
};
Some(normalised.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
// --- extract_ecosystem_from_purl tests ---
#[test]
fn purl_npm() {
assert_eq!(
extract_ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
Some("npm".to_string())
);
}
#[test]
fn purl_cargo() {
assert_eq!(
extract_ecosystem_from_purl("pkg:cargo/serde@1.0.197"),
Some("cargo".to_string())
);
}
#[test]
fn purl_golang_normalised() {
assert_eq!(
extract_ecosystem_from_purl("pkg:golang/github.com/gin-gonic/gin@1.9.1"),
Some("go".to_string())
);
}
#[test]
fn purl_pypi_normalised() {
assert_eq!(
extract_ecosystem_from_purl("pkg:pypi/requests@2.31.0"),
Some("pip".to_string())
);
}
#[test]
fn purl_maven() {
assert_eq!(
extract_ecosystem_from_purl("pkg:maven/org.apache.commons/commons-lang3@3.14.0"),
Some("maven".to_string())
);
}
#[test]
fn purl_missing_prefix() {
assert_eq!(extract_ecosystem_from_purl("npm/lodash@4.17.21"), None);
}
#[test]
fn purl_empty_ecosystem() {
assert_eq!(extract_ecosystem_from_purl("pkg:/lodash@4.17.21"), None);
}
#[test]
fn purl_empty_string() {
assert_eq!(extract_ecosystem_from_purl(""), None);
}
#[test]
fn purl_just_prefix() {
assert_eq!(extract_ecosystem_from_purl("pkg:"), None);
}
// --- extract_license tests ---
#[test]
fn license_from_expression() {
let entries = vec![CdxLicenseWrapper {
license: None,
expression: Some("MIT OR Apache-2.0".to_string()),
}];
assert_eq!(
extract_license(&entries),
Some("MIT OR Apache-2.0".to_string())
);
}
#[test]
fn license_from_id() {
let entries = vec![CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("MIT".to_string()),
name: None,
}),
expression: None,
}];
assert_eq!(extract_license(&entries), Some("MIT".to_string()));
}
#[test]
fn license_from_name_fallback() {
let entries = vec![CdxLicenseWrapper {
license: Some(CdxLicense {
id: None,
name: Some("MIT License".to_string()),
}),
expression: None,
}];
assert_eq!(extract_license(&entries), Some("MIT License".to_string()));
}
#[test]
fn license_expression_preferred_over_id() {
let entries = vec![
CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("MIT".to_string()),
name: None,
}),
expression: None,
},
CdxLicenseWrapper {
license: None,
expression: Some("MIT AND Apache-2.0".to_string()),
},
];
// Expression should be preferred (first pass finds it)
assert_eq!(
extract_license(&entries),
Some("MIT AND Apache-2.0".to_string())
);
}
#[test]
fn license_multiple_ids_joined() {
let entries = vec![
CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("MIT".to_string()),
name: None,
}),
expression: None,
},
CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("Apache-2.0".to_string()),
name: None,
}),
expression: None,
},
];
assert_eq!(
extract_license(&entries),
Some("MIT OR Apache-2.0".to_string())
);
}
#[test]
fn license_empty_entries() {
let entries: Vec<CdxLicenseWrapper> = vec![];
assert_eq!(extract_license(&entries), None);
}
#[test]
fn license_all_empty_strings() {
let entries = vec![CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some(String::new()),
name: Some(String::new()),
}),
expression: Some(String::new()),
}];
assert_eq!(extract_license(&entries), None);
}
#[test]
fn license_none_fields() {
let entries = vec![CdxLicenseWrapper {
license: None,
expression: None,
}];
assert_eq!(extract_license(&entries), None);
}
// --- CycloneDX deserialization tests ---
#[test]
fn deserialize_cyclonedx_bom() {
let json = r#"{
"components": [
{
"name": "serde",
"version": "1.0.197",
"type": "library",
"purl": "pkg:cargo/serde@1.0.197",
"licenses": [
{"expression": "MIT OR Apache-2.0"}
]
}
]
}"#;
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
let components = bom.components.unwrap();
assert_eq!(components.len(), 1);
assert_eq!(components[0].name, "serde");
assert_eq!(components[0].version, Some("1.0.197".to_string()));
assert_eq!(
components[0].purl,
Some("pkg:cargo/serde@1.0.197".to_string())
);
}
#[test]
fn deserialize_cyclonedx_no_components() {
let json = r#"{}"#;
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
assert!(bom.components.is_none());
}
#[test]
fn deserialize_cyclonedx_minimal_component() {
let json = r#"{"components": [{"name": "foo"}]}"#;
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
let c = &bom.components.unwrap()[0];
assert_eq!(c.name, "foo");
assert!(c.version.is_none());
assert!(c.purl.is_none());
assert!(c.licenses.is_none());
}
}

View File

@@ -17,7 +17,6 @@ impl Scanner for SemgrepScanner {
ScanType::Sast
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let output = tokio::process::Command::new("semgrep")
.args(["--config=auto", "--json", "--quiet"])
@@ -108,124 +107,3 @@ struct SemgrepExtra {
#[serde(default)]
metadata: Option<serde_json::Value>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deserialize_semgrep_output() {
let json = r#"{
"results": [
{
"check_id": "python.lang.security.audit.exec-detected",
"path": "src/main.py",
"start": {"line": 15},
"extra": {
"message": "Detected use of exec()",
"severity": "ERROR",
"lines": "exec(user_input)",
"metadata": {"cwe": "CWE-78"}
}
}
]
}"#;
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
assert_eq!(output.results.len(), 1);
let r = &output.results[0];
assert_eq!(r.check_id, "python.lang.security.audit.exec-detected");
assert_eq!(r.path, "src/main.py");
assert_eq!(r.start.line, 15);
assert_eq!(r.extra.message, "Detected use of exec()");
assert_eq!(r.extra.severity, "ERROR");
assert_eq!(r.extra.lines, "exec(user_input)");
assert_eq!(
r.extra
.metadata
.as_ref()
.unwrap()
.get("cwe")
.unwrap()
.as_str(),
Some("CWE-78")
);
}
#[test]
fn deserialize_semgrep_empty_results() {
let json = r#"{"results": []}"#;
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
assert!(output.results.is_empty());
}
#[test]
fn deserialize_semgrep_no_metadata() {
let json = r#"{
"results": [
{
"check_id": "rule-1",
"path": "app.py",
"start": {"line": 1},
"extra": {
"message": "found something",
"severity": "WARNING",
"lines": "import os"
}
}
]
}"#;
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
assert!(output.results[0].extra.metadata.is_none());
}
#[test]
fn semgrep_severity_mapping() {
let cases = vec![
("ERROR", "High"),
("WARNING", "Medium"),
("INFO", "Low"),
("UNKNOWN", "Info"),
];
for (input, expected) in cases {
let result = match input {
"ERROR" => "High",
"WARNING" => "Medium",
"INFO" => "Low",
_ => "Info",
};
assert_eq!(result, expected, "Severity for '{input}'");
}
}
#[test]
fn deserialize_semgrep_multiple_results() {
let json = r#"{
"results": [
{
"check_id": "rule-a",
"path": "a.py",
"start": {"line": 1},
"extra": {
"message": "msg a",
"severity": "ERROR",
"lines": "line a"
}
},
{
"check_id": "rule-b",
"path": "b.py",
"start": {"line": 99},
"extra": {
"message": "msg b",
"severity": "INFO",
"lines": "line b"
}
}
]
}"#;
let output: SemgrepOutput = serde_json::from_str(json).unwrap();
assert_eq!(output.results.len(), 2);
assert_eq!(output.results[1].start.line, 99);
}
}

View File

@@ -1,81 +0,0 @@
use compliance_core::models::TrackerIssue;
use compliance_core::traits::issue_tracker::IssueTracker;
use crate::trackers;
/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
pub(crate) enum TrackerDispatch {
GitHub(trackers::github::GitHubTracker),
GitLab(trackers::gitlab::GitLabTracker),
Gitea(trackers::gitea::GiteaTracker),
Jira(trackers::jira::JiraTracker),
}
impl TrackerDispatch {
pub(crate) fn name(&self) -> &str {
match self {
Self::GitHub(t) => t.name(),
Self::GitLab(t) => t.name(),
Self::Gitea(t) => t.name(),
Self::Jira(t) => t.name(),
}
}
pub(crate) async fn create_issue(
&self,
owner: &str,
repo: &str,
title: &str,
body: &str,
labels: &[String],
) -> Result<TrackerIssue, compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
}
}
pub(crate) async fn find_existing_issue(
&self,
owner: &str,
repo: &str,
fingerprint: &str,
) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
}
}
pub(crate) async fn create_pr_review(
&self,
owner: &str,
repo: &str,
pr_number: u64,
body: &str,
comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
) -> Result<(), compliance_core::error::CoreError> {
match self {
Self::GitHub(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::GitLab(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Gitea(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
Self::Jira(t) => {
t.create_pr_review(owner, repo, pr_number, body, comments)
.await
}
}
}
}

View File

@@ -82,158 +82,24 @@ async fn scan_all_repos(agent: &ComplianceAgent) {
}
async fn monitor_cves(agent: &ComplianceAgent) {
use compliance_core::models::notification::{parse_severity, CveNotification};
use compliance_core::models::SbomEntry;
use futures_util::StreamExt;
// Fetch all SBOM entries grouped by repo
// Re-scan all SBOM entries for new CVEs
let cursor = match agent.db.sbom_entries().find(doc! {}).await {
Ok(c) => c,
Err(e) => {
tracing::error!("CVE monitor: failed to list SBOM entries: {e}");
tracing::error!("Failed to list SBOM entries for CVE monitoring: {e}");
return;
}
};
let entries: Vec<SbomEntry> = cursor.filter_map(|r| async { r.ok() }).collect().await;
let entries: Vec<_> = cursor.filter_map(|r| async { r.ok() }).collect().await;
if entries.is_empty() {
tracing::debug!("CVE monitor: no SBOM entries, skipping");
return;
}
tracing::info!(
"CVE monitor: checking {} dependencies for new CVEs",
entries.len()
);
// Build a repo_id → repo_name lookup
let repo_ids: std::collections::HashSet<String> =
entries.iter().map(|e| e.repo_id.clone()).collect();
let mut repo_names: std::collections::HashMap<String, String> =
std::collections::HashMap::new();
for rid in &repo_ids {
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(rid) {
if let Ok(Some(repo)) = agent.db.repositories().find_one(doc! { "_id": oid }).await {
repo_names.insert(rid.clone(), repo.name.clone());
}
}
}
// Use the existing CveScanner to query OSV.dev
let nvd_key = agent.config.nvd_api_key.as_ref().map(|k| {
use secrecy::ExposeSecret;
k.expose_secret().to_string()
});
let scanner = crate::pipeline::cve::CveScanner::new(
agent.http.clone(),
agent.config.searxng_url.clone(),
nvd_key,
);
// Group entries by repo for scanning
let mut entries_by_repo: std::collections::HashMap<String, Vec<SbomEntry>> =
std::collections::HashMap::new();
for entry in entries {
entries_by_repo
.entry(entry.repo_id.clone())
.or_default()
.push(entry);
}
let mut new_notifications = 0u32;
for (repo_id, mut repo_entries) in entries_by_repo {
let repo_name = repo_names
.get(&repo_id)
.cloned()
.unwrap_or_else(|| repo_id.clone());
// Scan dependencies for CVEs
let alerts = match scanner.scan_dependencies(&repo_id, &mut repo_entries).await {
Ok(a) => a,
Err(e) => {
tracing::warn!("CVE monitor: scan failed for {repo_name}: {e}");
continue;
}
};
// Upsert CVE alerts (existing logic)
for alert in &alerts {
let filter = doc! { "cve_id": &alert.cve_id, "repo_id": &alert.repo_id };
let update = doc! { "$setOnInsert": mongodb::bson::to_bson(alert).unwrap_or_default() };
let _ = agent
.db
.cve_alerts()
.update_one(filter, update)
.upsert(true)
.await;
}
// Update SBOM entries with discovered vulnerabilities
for entry in &repo_entries {
if entry.known_vulnerabilities.is_empty() {
continue;
}
if let Some(entry_id) = &entry.id {
let _ = agent
.db
.sbom_entries()
.update_one(
doc! { "_id": entry_id },
doc! { "$set": {
"known_vulnerabilities": mongodb::bson::to_bson(&entry.known_vulnerabilities).unwrap_or_default(),
"updated_at": mongodb::bson::DateTime::now(),
}},
)
.await;
}
}
// Create notifications for NEW CVEs (dedup against existing notifications)
for alert in &alerts {
let filter = doc! {
"cve_id": &alert.cve_id,
"repo_id": &alert.repo_id,
"package_name": &alert.affected_package,
"package_version": &alert.affected_version,
};
// Only insert if not already exists (upsert with $setOnInsert)
let severity = parse_severity(alert.severity.as_deref(), alert.cvss_score);
let mut notification = CveNotification::new(
alert.cve_id.clone(),
repo_id.clone(),
repo_name.clone(),
alert.affected_package.clone(),
alert.affected_version.clone(),
severity,
);
notification.cvss_score = alert.cvss_score;
notification.summary = alert.summary.clone();
notification.url = Some(format!("https://osv.dev/vulnerability/{}", alert.cve_id));
let update = doc! {
"$setOnInsert": mongodb::bson::to_bson(&notification).unwrap_or_default()
};
match agent
.db
.cve_notifications()
.update_one(filter, update)
.upsert(true)
.await
{
Ok(result) if result.upserted_id.is_some() => {
new_notifications += 1;
}
Err(e) => {
tracing::warn!("CVE monitor: failed to create notification: {e}");
}
_ => {} // Already exists
}
}
}
if new_notifications > 0 {
tracing::info!("CVE monitor: created {new_notifications} new notification(s)");
} else {
tracing::info!("CVE monitor: no new CVEs found");
}
tracing::info!("CVE monitor: checking {} dependencies", entries.len());
// The actual CVE checking is handled by the CveScanner in the pipeline
// This is a simplified version that just logs the activity
}

View File

@@ -1,53 +0,0 @@
use std::path::Path;
use crate::error::AgentError;
/// Ensure the SSH key pair exists at the given path, generating it if missing.
/// Returns the public key contents.
pub fn ensure_ssh_key(key_path: &str) -> Result<String, AgentError> {
let private_path = Path::new(key_path);
let public_path = private_path.with_extension("pub");
if private_path.exists() && public_path.exists() {
return std::fs::read_to_string(&public_path)
.map_err(|e| AgentError::Config(format!("Failed to read SSH public key: {e}")));
}
// Create parent directory
if let Some(parent) = private_path.parent() {
std::fs::create_dir_all(parent)?;
}
// Generate ed25519 key pair using ssh-keygen
let output = std::process::Command::new("ssh-keygen")
.args([
"-t",
"ed25519",
"-f",
key_path,
"-N",
"", // no passphrase
"-C",
"compliance-scanner-agent",
])
.output()
.map_err(|e| AgentError::Config(format!("Failed to run ssh-keygen: {e}")))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(AgentError::Config(format!("ssh-keygen failed: {stderr}")));
}
// Set correct permissions
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(private_path, std::fs::Permissions::from_mode(0o600))?;
}
let public_key = std::fs::read_to_string(&public_path)
.map_err(|e| AgentError::Config(format!("Failed to read generated SSH public key: {e}")))?;
tracing::info!("Generated new SSH key pair at {key_path}");
Ok(public_key)
}

View File

@@ -1,274 +0,0 @@
use compliance_core::error::CoreError;
use compliance_core::models::{TrackerIssue, TrackerType};
use compliance_core::traits::issue_tracker::{IssueTracker, ReviewComment};
use secrecy::{ExposeSecret, SecretString};
pub struct GiteaTracker {
base_url: String,
http: reqwest::Client,
token: SecretString,
}
impl GiteaTracker {
pub fn new(base_url: String, token: SecretString) -> Self {
Self {
base_url: base_url.trim_end_matches('/').to_string(),
http: reqwest::Client::new(),
token,
}
}
fn api_url(&self, path: &str) -> String {
format!("{}/api/v1{}", self.base_url, path)
}
}
impl IssueTracker for GiteaTracker {
fn name(&self) -> &str {
"gitea"
}
async fn create_issue(
&self,
owner: &str,
repo: &str,
title: &str,
body: &str,
labels: &[String],
) -> Result<TrackerIssue, CoreError> {
let url = self.api_url(&format!("/repos/{owner}/{repo}/issues"));
// Gitea expects label IDs (integers), not names. Append label names
// to the body instead since resolving IDs would require extra API calls.
let mut full_body = body.to_string();
if !labels.is_empty() {
full_body.push_str("\n\n**Labels:** ");
full_body.push_str(&labels.join(", "));
}
let payload = serde_json::json!({
"title": title,
"body": full_body,
});
let resp = self
.http
.post(&url)
.header(
"Authorization",
format!("token {}", self.token.expose_secret()),
)
.json(&payload)
.send()
.await
.map_err(|e| CoreError::IssueTracker(format!("Gitea create issue failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(CoreError::IssueTracker(format!(
"Gitea returned {status}: {text}"
)));
}
let issue: serde_json::Value = resp
.json()
.await
.map_err(|e| CoreError::IssueTracker(format!("Failed to parse Gitea response: {e}")))?;
Ok(TrackerIssue::new(
String::new(),
TrackerType::Gitea,
issue["number"].to_string(),
issue["html_url"].as_str().unwrap_or("").to_string(),
title.to_string(),
))
}
async fn update_issue_status(
&self,
owner: &str,
repo: &str,
external_id: &str,
status: &str,
) -> Result<(), CoreError> {
let url = self.api_url(&format!("/repos/{owner}/{repo}/issues/{external_id}"));
let state = match status {
"closed" | "resolved" => "closed",
_ => "open",
};
let resp = self
.http
.patch(&url)
.header(
"Authorization",
format!("token {}", self.token.expose_secret()),
)
.json(&serde_json::json!({ "state": state }))
.send()
.await
.map_err(|e| CoreError::IssueTracker(format!("Gitea update issue failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(CoreError::IssueTracker(format!(
"Gitea update issue returned {status}: {text}"
)));
}
Ok(())
}
async fn add_comment(
&self,
owner: &str,
repo: &str,
external_id: &str,
body: &str,
) -> Result<(), CoreError> {
let url = self.api_url(&format!(
"/repos/{owner}/{repo}/issues/{external_id}/comments"
));
let resp = self
.http
.post(&url)
.header(
"Authorization",
format!("token {}", self.token.expose_secret()),
)
.json(&serde_json::json!({ "body": body }))
.send()
.await
.map_err(|e| CoreError::IssueTracker(format!("Gitea add comment failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(CoreError::IssueTracker(format!(
"Gitea add comment returned {status}: {text}"
)));
}
Ok(())
}
async fn create_pr_review(
&self,
owner: &str,
repo: &str,
pr_number: u64,
body: &str,
comments: Vec<ReviewComment>,
) -> Result<(), CoreError> {
let url = self.api_url(&format!("/repos/{owner}/{repo}/pulls/{pr_number}/reviews"));
let review_comments: Vec<serde_json::Value> = comments
.iter()
.map(|c| {
serde_json::json!({
"path": c.path,
"new_position": c.line,
"body": c.body,
})
})
.collect();
let resp = self
.http
.post(&url)
.header(
"Authorization",
format!("token {}", self.token.expose_secret()),
)
.json(&serde_json::json!({
"body": body,
"event": "COMMENT",
"comments": review_comments,
}))
.send()
.await
.map_err(|e| CoreError::IssueTracker(format!("Gitea PR review failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
// If inline comments caused the failure, retry with just the summary body
if !comments.is_empty() {
tracing::warn!(
"Gitea PR review with inline comments failed ({status}): {text}, retrying as plain comment"
);
let fallback_url = self.api_url(&format!(
"/repos/{owner}/{repo}/issues/{pr_number}/comments"
));
let fallback_resp = self
.http
.post(&fallback_url)
.header(
"Authorization",
format!("token {}", self.token.expose_secret()),
)
.json(&serde_json::json!({ "body": body }))
.send()
.await
.map_err(|e| {
CoreError::IssueTracker(format!("Gitea PR comment fallback failed: {e}"))
})?;
if !fallback_resp.status().is_success() {
let fb_status = fallback_resp.status();
let fb_text = fallback_resp.text().await.unwrap_or_default();
return Err(CoreError::IssueTracker(format!(
"Gitea PR comment fallback returned {fb_status}: {fb_text}"
)));
}
return Ok(());
}
return Err(CoreError::IssueTracker(format!(
"Gitea PR review returned {status}: {text}"
)));
}
Ok(())
}
async fn find_existing_issue(
&self,
owner: &str,
repo: &str,
fingerprint: &str,
) -> Result<Option<TrackerIssue>, CoreError> {
let url = self.api_url(&format!(
"/repos/{owner}/{repo}/issues?type=issues&state=all&q={fingerprint}"
));
let resp = self
.http
.get(&url)
.header(
"Authorization",
format!("token {}", self.token.expose_secret()),
)
.send()
.await
.map_err(|e| CoreError::IssueTracker(format!("Gitea search failed: {e}")))?;
let issues: Vec<serde_json::Value> = resp.json().await.unwrap_or_default();
if let Some(issue) = issues.first() {
Ok(Some(TrackerIssue::new(
String::new(),
TrackerType::Gitea,
issue["number"].to_string(),
issue["html_url"].as_str().unwrap_or("").to_string(),
issue["title"].as_str().unwrap_or("").to_string(),
)))
} else {
Ok(None)
}
}
}

View File

@@ -1,4 +1,3 @@
pub mod gitea;
pub mod github;
pub mod gitlab;
pub mod jira;

View File

@@ -1,138 +0,0 @@
use std::sync::Arc;
use axum::body::Bytes;
use axum::extract::{Extension, Path};
use axum::http::{HeaderMap, StatusCode};
use hmac::{Hmac, Mac};
use sha2::Sha256;
use compliance_core::models::ScanTrigger;
use crate::agent::ComplianceAgent;
type HmacSha256 = Hmac<Sha256>;
pub async fn handle_gitea_webhook(
Extension(agent): Extension<Arc<ComplianceAgent>>,
Path(repo_id): Path<String>,
headers: HeaderMap,
body: Bytes,
) -> StatusCode {
// Look up the repo to get its webhook secret
let oid = match mongodb::bson::oid::ObjectId::parse_str(&repo_id) {
Ok(oid) => oid,
Err(_) => return StatusCode::NOT_FOUND,
};
let repo = match agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "_id": oid })
.await
{
Ok(Some(repo)) => repo,
_ => {
tracing::warn!("Gitea webhook: repo {repo_id} not found");
return StatusCode::NOT_FOUND;
}
};
// Verify HMAC-SHA256 signature using the per-repo secret
if let Some(secret) = &repo.webhook_secret {
let signature = headers
.get("x-gitea-signature")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if !verify_signature(secret, &body, signature) {
tracing::warn!("Gitea webhook: invalid signature for repo {repo_id}");
return StatusCode::UNAUTHORIZED;
}
}
let event = headers
.get("x-gitea-event")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
let payload: serde_json::Value = match serde_json::from_slice(&body) {
Ok(v) => v,
Err(e) => {
tracing::warn!("Gitea webhook: invalid JSON: {e}");
return StatusCode::BAD_REQUEST;
}
};
match event {
"push" => {
let agent_clone = (*agent).clone();
let repo_id = repo_id.clone();
tokio::spawn(async move {
tracing::info!("Gitea push webhook: triggering scan for {repo_id}");
if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await {
tracing::error!("Webhook-triggered scan failed: {e}");
}
});
StatusCode::OK
}
"pull_request" => handle_pull_request(agent, &repo_id, &payload).await,
_ => {
tracing::debug!("Gitea webhook: ignoring event '{event}'");
StatusCode::OK
}
}
}
async fn handle_pull_request(
agent: Arc<ComplianceAgent>,
repo_id: &str,
payload: &serde_json::Value,
) -> StatusCode {
let action = payload["action"].as_str().unwrap_or("");
if action != "opened" && action != "synchronized" {
return StatusCode::OK;
}
let pr_number = payload["pull_request"]["number"].as_u64().unwrap_or(0);
let head_sha = payload["pull_request"]["head"]["sha"]
.as_str()
.unwrap_or("");
let base_sha = payload["pull_request"]["base"]["sha"]
.as_str()
.unwrap_or("");
if pr_number == 0 || head_sha.is_empty() || base_sha.is_empty() {
tracing::warn!("Gitea PR webhook: missing required fields");
return StatusCode::BAD_REQUEST;
}
let repo_id = repo_id.to_string();
let head_sha = head_sha.to_string();
let base_sha = base_sha.to_string();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("Gitea PR webhook: reviewing PR #{pr_number} on {repo_id}");
if let Err(e) = agent_clone
.run_pr_review(&repo_id, pr_number, &base_sha, &head_sha)
.await
{
tracing::error!("PR review failed for #{pr_number}: {e}");
}
});
StatusCode::OK
}
fn verify_signature(secret: &str, body: &[u8], signature: &str) -> bool {
// Gitea sends raw hex (no sha256= prefix)
let sig_bytes = match hex::decode(signature) {
Ok(b) => b,
Err(_) => return false,
};
let mut mac = match HmacSha256::new_from_slice(secret.as_bytes()) {
Ok(m) => m,
Err(_) => return false,
};
mac.update(body);
mac.verify_slice(&sig_bytes).is_ok()
}

View File

@@ -1,9 +1,10 @@
use std::sync::Arc;
use axum::body::Bytes;
use axum::extract::{Extension, Path};
use axum::extract::Extension;
use axum::http::{HeaderMap, StatusCode};
use hmac::{Hmac, Mac};
use secrecy::ExposeSecret;
use sha2::Sha256;
use compliance_core::models::ScanTrigger;
@@ -14,37 +15,18 @@ type HmacSha256 = Hmac<Sha256>;
pub async fn handle_github_webhook(
Extension(agent): Extension<Arc<ComplianceAgent>>,
Path(repo_id): Path<String>,
headers: HeaderMap,
body: Bytes,
) -> StatusCode {
// Look up the repo to get its webhook secret
let oid = match mongodb::bson::oid::ObjectId::parse_str(&repo_id) {
Ok(oid) => oid,
Err(_) => return StatusCode::NOT_FOUND,
};
let repo = match agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "_id": oid })
.await
{
Ok(Some(repo)) => repo,
_ => {
tracing::warn!("GitHub webhook: repo {repo_id} not found");
return StatusCode::NOT_FOUND;
}
};
// Verify HMAC-SHA256 signature using the per-repo secret
if let Some(secret) = &repo.webhook_secret {
// Verify HMAC signature
if let Some(secret) = &agent.config.github_webhook_secret {
let signature = headers
.get("x-hub-signature-256")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if !verify_signature(secret, &body, signature) {
tracing::warn!("GitHub webhook: invalid signature for repo {repo_id}");
if !verify_signature(secret.expose_secret(), &body, signature) {
tracing::warn!("GitHub webhook: invalid signature");
return StatusCode::UNAUTHORIZED;
}
}
@@ -63,18 +45,8 @@ pub async fn handle_github_webhook(
};
match event {
"push" => {
let agent_clone = (*agent).clone();
let repo_id = repo_id.clone();
tokio::spawn(async move {
tracing::info!("GitHub push webhook: triggering scan for {repo_id}");
if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await {
tracing::error!("Webhook-triggered scan failed: {e}");
}
});
StatusCode::OK
}
"pull_request" => handle_pull_request(agent, &repo_id, &payload).await,
"push" => handle_push(agent, &payload).await,
"pull_request" => handle_pull_request(agent, &payload).await,
_ => {
tracing::debug!("GitHub webhook: ignoring event '{event}'");
StatusCode::OK
@@ -82,9 +54,43 @@ pub async fn handle_github_webhook(
}
}
async fn handle_push(agent: Arc<ComplianceAgent>, payload: &serde_json::Value) -> StatusCode {
let repo_url = payload["repository"]["clone_url"]
.as_str()
.or_else(|| payload["repository"]["html_url"].as_str())
.unwrap_or("");
if repo_url.is_empty() {
return StatusCode::BAD_REQUEST;
}
// Find matching tracked repository
let repo = agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "git_url": repo_url })
.await
.ok()
.flatten();
if let Some(repo) = repo {
let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("GitHub push webhook: triggering scan for {repo_id}");
if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await {
tracing::error!("Webhook-triggered scan failed: {e}");
}
});
} else {
tracing::debug!("GitHub push webhook: no tracked repo for {repo_url}");
}
StatusCode::OK
}
async fn handle_pull_request(
agent: Arc<ComplianceAgent>,
repo_id: &str,
_agent: Arc<ComplianceAgent>,
payload: &serde_json::Value,
) -> StatusCode {
let action = payload["action"].as_str().unwrap_or("");
@@ -92,37 +98,21 @@ async fn handle_pull_request(
return StatusCode::OK;
}
let repo_url = payload["repository"]["clone_url"].as_str().unwrap_or("");
let pr_number = payload["pull_request"]["number"].as_u64().unwrap_or(0);
let head_sha = payload["pull_request"]["head"]["sha"]
.as_str()
.unwrap_or("");
let base_sha = payload["pull_request"]["base"]["sha"]
.as_str()
.unwrap_or("");
if pr_number == 0 || head_sha.is_empty() || base_sha.is_empty() {
if repo_url.is_empty() || pr_number == 0 {
return StatusCode::BAD_REQUEST;
}
let repo_id = repo_id.to_string();
let head_sha = head_sha.to_string();
let base_sha = base_sha.to_string();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("GitHub PR webhook: reviewing PR #{pr_number} on {repo_id}");
if let Err(e) = agent_clone
.run_pr_review(&repo_id, pr_number, &base_sha, &head_sha)
.await
{
tracing::error!("PR review failed for #{pr_number}: {e}");
}
});
tracing::info!("GitHub PR webhook: PR #{pr_number} {action} on {repo_url}");
// PR review scan would be triggered here - runs incremental SAST on diff
// and posts review comments via the GitHub tracker
StatusCode::OK
}
fn verify_signature(secret: &str, body: &[u8], signature: &str) -> bool {
// GitHub sends sha256=<hex>
let sig = signature.strip_prefix("sha256=").unwrap_or(signature);
let sig_bytes = match hex::decode(sig) {
Ok(b) => b,

View File

@@ -1,8 +1,9 @@
use std::sync::Arc;
use axum::body::Bytes;
use axum::extract::{Extension, Path};
use axum::extract::Extension;
use axum::http::{HeaderMap, StatusCode};
use secrecy::ExposeSecret;
use compliance_core::models::ScanTrigger;
@@ -10,37 +11,18 @@ use crate::agent::ComplianceAgent;
pub async fn handle_gitlab_webhook(
Extension(agent): Extension<Arc<ComplianceAgent>>,
Path(repo_id): Path<String>,
headers: HeaderMap,
body: Bytes,
) -> StatusCode {
// Look up the repo to get its webhook secret
let oid = match mongodb::bson::oid::ObjectId::parse_str(&repo_id) {
Ok(oid) => oid,
Err(_) => return StatusCode::NOT_FOUND,
};
let repo = match agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "_id": oid })
.await
{
Ok(Some(repo)) => repo,
_ => {
tracing::warn!("GitLab webhook: repo {repo_id} not found");
return StatusCode::NOT_FOUND;
}
};
// GitLab sends the secret token in X-Gitlab-Token header (plain text comparison)
if let Some(secret) = &repo.webhook_secret {
// Verify GitLab token
if let Some(secret) = &agent.config.gitlab_webhook_secret {
let token = headers
.get("x-gitlab-token")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if token != secret {
tracing::warn!("GitLab webhook: invalid token for repo {repo_id}");
if token != secret.expose_secret() {
tracing::warn!("GitLab webhook: invalid token");
return StatusCode::UNAUTHORIZED;
}
}
@@ -56,18 +38,8 @@ pub async fn handle_gitlab_webhook(
let event_type = payload["object_kind"].as_str().unwrap_or("");
match event_type {
"push" => {
let agent_clone = (*agent).clone();
let repo_id = repo_id.clone();
tokio::spawn(async move {
tracing::info!("GitLab push webhook: triggering scan for {repo_id}");
if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await {
tracing::error!("Webhook-triggered scan failed: {e}");
}
});
StatusCode::OK
}
"merge_request" => handle_merge_request(agent, &repo_id, &payload).await,
"push" => handle_push(agent, &payload).await,
"merge_request" => handle_merge_request(agent, &payload).await,
_ => {
tracing::debug!("GitLab webhook: ignoring event '{event_type}'");
StatusCode::OK
@@ -75,9 +47,40 @@ pub async fn handle_gitlab_webhook(
}
}
async fn handle_push(agent: Arc<ComplianceAgent>, payload: &serde_json::Value) -> StatusCode {
let repo_url = payload["project"]["git_http_url"]
.as_str()
.or_else(|| payload["project"]["web_url"].as_str())
.unwrap_or("");
if repo_url.is_empty() {
return StatusCode::BAD_REQUEST;
}
let repo = agent
.db
.repositories()
.find_one(mongodb::bson::doc! { "git_url": repo_url })
.await
.ok()
.flatten();
if let Some(repo) = repo {
let repo_id = repo.id.map(|id| id.to_hex()).unwrap_or_default();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("GitLab push webhook: triggering scan for {repo_id}");
if let Err(e) = agent_clone.run_scan(&repo_id, ScanTrigger::Webhook).await {
tracing::error!("Webhook-triggered scan failed: {e}");
}
});
}
StatusCode::OK
}
async fn handle_merge_request(
agent: Arc<ComplianceAgent>,
repo_id: &str,
_agent: Arc<ComplianceAgent>,
payload: &serde_json::Value,
) -> StatusCode {
let action = payload["object_attributes"]["action"]
@@ -88,31 +91,7 @@ async fn handle_merge_request(
}
let mr_iid = payload["object_attributes"]["iid"].as_u64().unwrap_or(0);
let head_sha = payload["object_attributes"]["last_commit"]["id"]
.as_str()
.unwrap_or("");
let base_sha = payload["object_attributes"]["diff_refs"]["base_sha"]
.as_str()
.unwrap_or("");
if mr_iid == 0 || head_sha.is_empty() || base_sha.is_empty() {
tracing::warn!("GitLab MR webhook: missing required fields");
return StatusCode::BAD_REQUEST;
}
let repo_id = repo_id.to_string();
let head_sha = head_sha.to_string();
let base_sha = base_sha.to_string();
let agent_clone = (*agent).clone();
tokio::spawn(async move {
tracing::info!("GitLab MR webhook: reviewing MR !{mr_iid} on {repo_id}");
if let Err(e) = agent_clone
.run_pr_review(&repo_id, mr_iid, &base_sha, &head_sha)
.await
{
tracing::error!("MR review failed for !{mr_iid}: {e}");
}
});
tracing::info!("GitLab MR webhook: MR !{mr_iid} {action}");
StatusCode::OK
}

View File

@@ -1,4 +1,3 @@
pub mod gitea;
pub mod github;
pub mod gitlab;
pub mod server;

View File

@@ -5,23 +5,12 @@ use axum::{Extension, Router};
use crate::agent::ComplianceAgent;
use crate::error::AgentError;
use crate::webhooks::{gitea, github, gitlab};
use crate::webhooks::{github, gitlab};
pub async fn start_webhook_server(agent: &ComplianceAgent) -> Result<(), AgentError> {
let app = Router::new()
// Per-repo webhook URLs: /webhook/{platform}/{repo_id}
.route(
"/webhook/github/{repo_id}",
post(github::handle_github_webhook),
)
.route(
"/webhook/gitlab/{repo_id}",
post(gitlab::handle_gitlab_webhook),
)
.route(
"/webhook/gitea/{repo_id}",
post(gitea::handle_gitea_webhook),
)
.route("/webhook/github", post(github::handle_github_webhook))
.route("/webhook/gitlab", post(gitlab::handle_gitlab_webhook))
.layer(Extension(Arc::new(agent.clone())));
let addr = "0.0.0.0:3002";

View File

@@ -1,165 +0,0 @@
// Shared test harness for E2E / integration tests.
//
// Spins up the agent API server on a random port with an isolated test
// database. Each test gets a fresh database that is dropped on cleanup.
use std::sync::Arc;
use compliance_agent::agent::ComplianceAgent;
use compliance_agent::api;
use compliance_agent::database::Database;
use compliance_core::AgentConfig;
use secrecy::SecretString;
/// A running test server with a unique database.
pub struct TestServer {
pub base_url: String,
pub client: reqwest::Client,
db_name: String,
mongodb_uri: String,
}
impl TestServer {
/// Start an agent API server on a random port with an isolated database.
pub async fn start() -> Self {
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
// Unique database name per test run to avoid collisions
let db_name = format!("test_{}", uuid::Uuid::new_v4().simple());
let db = Database::connect(&mongodb_uri, &db_name)
.await
.expect("Failed to connect to MongoDB — is it running?");
db.ensure_indexes().await.expect("Failed to create indexes");
let config = AgentConfig {
mongodb_uri: mongodb_uri.clone(),
mongodb_database: db_name.clone(),
litellm_url: std::env::var("TEST_LITELLM_URL")
.unwrap_or_else(|_| "http://localhost:4000".into()),
litellm_api_key: SecretString::from(String::new()),
litellm_model: "gpt-4o".into(),
litellm_embed_model: "text-embedding-3-small".into(),
agent_port: 0, // not used — we bind ourselves
scan_schedule: String::new(),
cve_monitor_schedule: String::new(),
git_clone_base_path: "/tmp/compliance-scanner-tests/repos".into(),
ssh_key_path: "/tmp/compliance-scanner-tests/ssh/id_ed25519".into(),
github_token: None,
github_webhook_secret: None,
gitlab_url: None,
gitlab_token: None,
gitlab_webhook_secret: None,
jira_url: None,
jira_email: None,
jira_api_token: None,
jira_project_key: None,
searxng_url: None,
nvd_api_key: None,
keycloak_url: None,
keycloak_realm: None,
keycloak_admin_username: None,
keycloak_admin_password: None,
pentest_verification_email: None,
pentest_imap_host: None,
pentest_imap_port: None,
pentest_imap_tls: false,
pentest_imap_username: None,
pentest_imap_password: None,
};
let agent = ComplianceAgent::new(config, db);
// Build the router with the agent extension
let app = api::routes::build_router()
.layer(axum::extract::Extension(Arc::new(agent)))
.layer(tower_http::cors::CorsLayer::permissive());
// Bind to port 0 to get a random available port
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
.await
.expect("Failed to bind test server");
let port = listener.local_addr().expect("no local addr").port();
tokio::spawn(async move {
axum::serve(listener, app).await.ok();
});
let base_url = format!("http://127.0.0.1:{port}");
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.expect("Failed to build HTTP client");
// Wait for server to be ready
for _ in 0..50 {
if client
.get(format!("{base_url}/api/v1/health"))
.send()
.await
.is_ok()
{
break;
}
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
}
Self {
base_url,
client,
db_name,
mongodb_uri,
}
}
/// GET helper
pub async fn get(&self, path: &str) -> reqwest::Response {
self.client
.get(format!("{}{path}", self.base_url))
.send()
.await
.expect("GET request failed")
}
/// POST helper with JSON body
pub async fn post(&self, path: &str, body: &serde_json::Value) -> reqwest::Response {
self.client
.post(format!("{}{path}", self.base_url))
.json(body)
.send()
.await
.expect("POST request failed")
}
/// PATCH helper with JSON body
pub async fn patch(&self, path: &str, body: &serde_json::Value) -> reqwest::Response {
self.client
.patch(format!("{}{path}", self.base_url))
.json(body)
.send()
.await
.expect("PATCH request failed")
}
/// DELETE helper
pub async fn delete(&self, path: &str) -> reqwest::Response {
self.client
.delete(format!("{}{path}", self.base_url))
.send()
.await
.expect("DELETE request failed")
}
/// Get the unique database name for direct MongoDB access in tests.
pub fn db_name(&self) -> &str {
&self.db_name
}
/// Drop the test database on cleanup
pub async fn cleanup(&self) {
if let Ok(client) = mongodb::Client::with_uri_str(&self.mongodb_uri).await {
client.database(&self.db_name).drop().await.ok();
}
}
}

View File

@@ -1,7 +0,0 @@
// E2E test entry point.
//
// Run with: cargo test -p compliance-agent --test e2e
// Requires: MongoDB running (set TEST_MONGODB_URI if not default)
mod common;
mod integration;

View File

@@ -1,221 +0,0 @@
use crate::common::TestServer;
use serde_json::json;
/// Insert a DAST target directly into MongoDB linked to a repo.
async fn insert_dast_target(server: &TestServer, repo_id: &str, name: &str) -> String {
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
let db = client.database(&server.db_name());
let result = db
.collection::<mongodb::bson::Document>("dast_targets")
.insert_one(mongodb::bson::doc! {
"name": name,
"base_url": format!("https://{name}.example.com"),
"target_type": "webapp",
"repo_id": repo_id,
"rate_limit": 10,
"allow_destructive": false,
"created_at": mongodb::bson::DateTime::now(),
})
.await
.unwrap();
result.inserted_id.as_object_id().unwrap().to_hex()
}
/// Insert a pentest session linked to a target.
async fn insert_pentest_session(server: &TestServer, target_id: &str, repo_id: &str) -> String {
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
let db = client.database(&server.db_name());
let result = db
.collection::<mongodb::bson::Document>("pentest_sessions")
.insert_one(mongodb::bson::doc! {
"target_id": target_id,
"repo_id": repo_id,
"strategy": "comprehensive",
"status": "completed",
"findings_count": 1_i32,
"exploitable_count": 0_i32,
"created_at": mongodb::bson::DateTime::now(),
})
.await
.unwrap();
result.inserted_id.as_object_id().unwrap().to_hex()
}
/// Insert an attack chain node linked to a session.
async fn insert_attack_node(server: &TestServer, session_id: &str) {
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
let db = client.database(&server.db_name());
db.collection::<mongodb::bson::Document>("attack_chain_nodes")
.insert_one(mongodb::bson::doc! {
"session_id": session_id,
"node_id": "node-1",
"tool_name": "recon",
"status": "completed",
"created_at": mongodb::bson::DateTime::now(),
})
.await
.unwrap();
}
/// Insert a DAST finding linked to a target.
async fn insert_dast_finding(server: &TestServer, target_id: &str, session_id: &str) {
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
let db = client.database(&server.db_name());
db.collection::<mongodb::bson::Document>("dast_findings")
.insert_one(mongodb::bson::doc! {
"scan_run_id": "run-1",
"target_id": target_id,
"vuln_type": "xss",
"title": "Reflected XSS",
"description": "XSS in search param",
"severity": "high",
"endpoint": "https://example.com/search",
"method": "GET",
"exploitable": true,
"evidence": [],
"session_id": session_id,
"created_at": mongodb::bson::DateTime::now(),
})
.await
.unwrap();
}
/// Helper to count documents in a collection
async fn count_docs(server: &TestServer, collection: &str) -> u64 {
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
let db = client.database(&server.db_name());
db.collection::<mongodb::bson::Document>(collection)
.count_documents(mongodb::bson::doc! {})
.await
.unwrap()
}
#[tokio::test]
async fn delete_repo_cascades_to_dast_and_pentest_data() {
let server = TestServer::start().await;
// Create a repo
let resp = server
.post(
"/api/v1/repositories",
&json!({
"name": "cascade-test",
"git_url": "https://github.com/example/cascade-test.git",
}),
)
.await;
let body: serde_json::Value = resp.json().await.unwrap();
let repo_id = body["data"]["id"].as_str().unwrap().to_string();
// Insert DAST target linked to repo
let target_id = insert_dast_target(&server, &repo_id, "cascade-target").await;
// Insert pentest session linked to target
let session_id = insert_pentest_session(&server, &target_id, &repo_id).await;
// Insert downstream data
insert_attack_node(&server, &session_id).await;
insert_dast_finding(&server, &target_id, &session_id).await;
// Verify data exists
assert_eq!(count_docs(&server, "dast_targets").await, 1);
assert_eq!(count_docs(&server, "pentest_sessions").await, 1);
assert_eq!(count_docs(&server, "attack_chain_nodes").await, 1);
assert_eq!(count_docs(&server, "dast_findings").await, 1);
// Delete the repo
let resp = server
.delete(&format!("/api/v1/repositories/{repo_id}"))
.await;
assert_eq!(resp.status(), 200);
// All downstream data should be gone
assert_eq!(count_docs(&server, "dast_targets").await, 0);
assert_eq!(count_docs(&server, "pentest_sessions").await, 0);
assert_eq!(count_docs(&server, "attack_chain_nodes").await, 0);
assert_eq!(count_docs(&server, "dast_findings").await, 0);
server.cleanup().await;
}
#[tokio::test]
async fn delete_repo_cascades_sast_findings_and_sbom() {
let server = TestServer::start().await;
// Create a repo
let resp = server
.post(
"/api/v1/repositories",
&json!({
"name": "sast-cascade",
"git_url": "https://github.com/example/sast-cascade.git",
}),
)
.await;
let body: serde_json::Value = resp.json().await.unwrap();
let repo_id = body["data"]["id"].as_str().unwrap().to_string();
// Insert SAST finding and SBOM entry
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
let db = client.database(&server.db_name());
let now = mongodb::bson::DateTime::now();
db.collection::<mongodb::bson::Document>("findings")
.insert_one(mongodb::bson::doc! {
"repo_id": &repo_id,
"fingerprint": "fp-test-1",
"scanner": "semgrep",
"scan_type": "sast",
"title": "SQL Injection",
"description": "desc",
"severity": "critical",
"status": "open",
"created_at": now,
"updated_at": now,
})
.await
.unwrap();
db.collection::<mongodb::bson::Document>("sbom_entries")
.insert_one(mongodb::bson::doc! {
"repo_id": &repo_id,
"name": "lodash",
"version": "4.17.20",
"package_manager": "npm",
"known_vulnerabilities": [],
})
.await
.unwrap();
assert_eq!(count_docs(&server, "findings").await, 1);
assert_eq!(count_docs(&server, "sbom_entries").await, 1);
// Delete repo
server
.delete(&format!("/api/v1/repositories/{repo_id}"))
.await;
// Both should be gone
assert_eq!(count_docs(&server, "findings").await, 0);
assert_eq!(count_docs(&server, "sbom_entries").await, 0);
server.cleanup().await;
}

View File

@@ -1,48 +0,0 @@
use crate::common::TestServer;
use serde_json::json;
#[tokio::test]
async fn add_and_list_dast_targets() {
let server = TestServer::start().await;
// Initially empty
let resp = server.get("/api/v1/dast/targets").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"].as_array().unwrap().len(), 0);
// Add a target
let resp = server
.post(
"/api/v1/dast/targets",
&json!({
"name": "test-app",
"base_url": "https://test-app.example.com",
"target_type": "webapp",
}),
)
.await;
assert_eq!(resp.status(), 200);
// List should return 1
let resp = server.get("/api/v1/dast/targets").await;
let body: serde_json::Value = resp.json().await.unwrap();
let targets = body["data"].as_array().unwrap();
assert_eq!(targets.len(), 1);
assert_eq!(targets[0]["name"], "test-app");
assert_eq!(targets[0]["base_url"], "https://test-app.example.com");
server.cleanup().await;
}
#[tokio::test]
async fn list_dast_findings_empty() {
let server = TestServer::start().await;
let resp = server.get("/api/v1/dast/findings").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"].as_array().unwrap().len(), 0);
server.cleanup().await;
}

View File

@@ -1,144 +0,0 @@
use crate::common::TestServer;
use serde_json::json;
/// Helper: insert a finding directly via MongoDB for testing query endpoints.
async fn insert_finding(server: &TestServer, repo_id: &str, title: &str, severity: &str) {
// We insert via the agent's DB by posting to the internal test path.
// Since there's no direct "create finding" API, we use MongoDB directly.
let mongodb_uri = std::env::var("TEST_MONGODB_URI")
.unwrap_or_else(|_| "mongodb://root:example@localhost:27017/?authSource=admin".into());
// Extract the database name from the server's unique DB
// We'll use the agent's internal DB through the stats endpoint to verify
let client = mongodb::Client::with_uri_str(&mongodb_uri).await.unwrap();
// Get the DB name from the test server by parsing the health response
// For now, we use a direct insert approach
let db = client.database(&server.db_name());
let now = mongodb::bson::DateTime::now();
db.collection::<mongodb::bson::Document>("findings")
.insert_one(mongodb::bson::doc! {
"repo_id": repo_id,
"fingerprint": format!("fp-{title}-{severity}"),
"scanner": "test-scanner",
"scan_type": "sast",
"title": title,
"description": format!("Test finding: {title}"),
"severity": severity,
"status": "open",
"created_at": now,
"updated_at": now,
})
.await
.unwrap();
}
#[tokio::test]
async fn list_findings_empty() {
let server = TestServer::start().await;
let resp = server.get("/api/v1/findings").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"].as_array().unwrap().len(), 0);
assert_eq!(body["total"], 0);
server.cleanup().await;
}
#[tokio::test]
async fn list_findings_with_data() {
let server = TestServer::start().await;
insert_finding(&server, "repo1", "SQL Injection", "critical").await;
insert_finding(&server, "repo1", "XSS", "high").await;
insert_finding(&server, "repo2", "Info Leak", "low").await;
let resp = server.get("/api/v1/findings").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["total"], 3);
// Filter by severity
let resp = server.get("/api/v1/findings?severity=critical").await;
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["total"], 1);
assert_eq!(body["data"][0]["title"], "SQL Injection");
// Filter by repo
let resp = server.get("/api/v1/findings?repo_id=repo1").await;
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["total"], 2);
server.cleanup().await;
}
#[tokio::test]
async fn update_finding_status() {
let server = TestServer::start().await;
insert_finding(&server, "repo1", "Test Bug", "medium").await;
// Get the finding ID
let resp = server.get("/api/v1/findings").await;
let body: serde_json::Value = resp.json().await.unwrap();
let finding_id = body["data"][0]["_id"]["$oid"].as_str().unwrap();
// Update status to resolved
let resp = server
.patch(
&format!("/api/v1/findings/{finding_id}/status"),
&json!({ "status": "resolved" }),
)
.await;
assert_eq!(resp.status(), 200);
// Verify it's updated
let resp = server.get(&format!("/api/v1/findings/{finding_id}")).await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"]["status"], "resolved");
server.cleanup().await;
}
#[tokio::test]
async fn bulk_update_finding_status() {
let server = TestServer::start().await;
insert_finding(&server, "repo1", "Bug A", "high").await;
insert_finding(&server, "repo1", "Bug B", "high").await;
// Get both finding IDs
let resp = server.get("/api/v1/findings").await;
let body: serde_json::Value = resp.json().await.unwrap();
let ids: Vec<String> = body["data"]
.as_array()
.unwrap()
.iter()
.map(|f| f["_id"]["$oid"].as_str().unwrap().to_string())
.collect();
// Bulk update
let resp = server
.patch(
"/api/v1/findings/bulk-status",
&json!({
"ids": ids,
"status": "false_positive"
}),
)
.await;
assert_eq!(resp.status(), 200);
// Verify both are updated
for id in &ids {
let resp = server.get(&format!("/api/v1/findings/{id}")).await;
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"]["status"], "false_positive");
}
server.cleanup().await;
}

View File

@@ -1,29 +0,0 @@
use crate::common::TestServer;
#[tokio::test]
async fn health_endpoint_returns_ok() {
let server = TestServer::start().await;
let resp = server.get("/api/v1/health").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["status"], "ok");
server.cleanup().await;
}
#[tokio::test]
async fn stats_overview_returns_zeroes_on_empty_db() {
let server = TestServer::start().await;
let resp = server.get("/api/v1/stats/overview").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
let data = &body["data"];
assert_eq!(data["repositories"], 0);
assert_eq!(data["total_findings"], 0);
server.cleanup().await;
}

View File

@@ -1,6 +0,0 @@
mod cascade_delete;
mod dast;
mod findings;
mod health;
mod repositories;
mod stats;

View File

@@ -1,110 +0,0 @@
use crate::common::TestServer;
use serde_json::json;
#[tokio::test]
async fn add_and_list_repository() {
let server = TestServer::start().await;
// Initially empty
let resp = server.get("/api/v1/repositories").await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"].as_array().unwrap().len(), 0);
// Add a repository
let resp = server
.post(
"/api/v1/repositories",
&json!({
"name": "test-repo",
"git_url": "https://github.com/example/test-repo.git",
}),
)
.await;
assert_eq!(resp.status(), 200);
let body: serde_json::Value = resp.json().await.unwrap();
let repo_id = body["data"]["id"].as_str().unwrap().to_string();
assert!(!repo_id.is_empty());
// List should now return 1
let resp = server.get("/api/v1/repositories").await;
let body: serde_json::Value = resp.json().await.unwrap();
let repos = body["data"].as_array().unwrap();
assert_eq!(repos.len(), 1);
assert_eq!(repos[0]["name"], "test-repo");
server.cleanup().await;
}
#[tokio::test]
async fn add_duplicate_repository_fails() {
let server = TestServer::start().await;
let payload = json!({
"name": "dup-repo",
"git_url": "https://github.com/example/dup-repo.git",
});
// First add succeeds
let resp = server.post("/api/v1/repositories", &payload).await;
assert_eq!(resp.status(), 200);
// Second add with same git_url should fail (unique index)
let resp = server.post("/api/v1/repositories", &payload).await;
assert_ne!(resp.status(), 200);
server.cleanup().await;
}
#[tokio::test]
async fn delete_repository() {
let server = TestServer::start().await;
// Add a repo
let resp = server
.post(
"/api/v1/repositories",
&json!({
"name": "to-delete",
"git_url": "https://github.com/example/to-delete.git",
}),
)
.await;
let body: serde_json::Value = resp.json().await.unwrap();
let repo_id = body["data"]["id"].as_str().unwrap();
// Delete it
let resp = server
.delete(&format!("/api/v1/repositories/{repo_id}"))
.await;
assert_eq!(resp.status(), 200);
// List should be empty again
let resp = server.get("/api/v1/repositories").await;
let body: serde_json::Value = resp.json().await.unwrap();
assert_eq!(body["data"].as_array().unwrap().len(), 0);
server.cleanup().await;
}
#[tokio::test]
async fn delete_nonexistent_repository_returns_404() {
let server = TestServer::start().await;
let resp = server
.delete("/api/v1/repositories/000000000000000000000000")
.await;
assert_eq!(resp.status(), 404);
server.cleanup().await;
}
#[tokio::test]
async fn delete_invalid_id_returns_400() {
let server = TestServer::start().await;
let resp = server.delete("/api/v1/repositories/not-a-valid-id").await;
assert_eq!(resp.status(), 400);
server.cleanup().await;
}

Some files were not shown because too many files have changed in this diff Show More