1 Commits

Author SHA1 Message Date
Sharang Parnerkar
f8eb4ea84d fix: cascade-delete DAST targets, pentests, and all downstream data when repo is deleted
All checks were successful
CI / Check (pull_request) Successful in 9m33s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
Previously, deleting a repository only cleaned up SAST findings, SBOM,
scan runs, CVEs, tracker issues, graph data, and embeddings — but left
orphaned DAST targets, scan runs, DAST findings, pentest sessions,
attack chain nodes, and pentest messages in the database.

Now the delete handler follows the full cascade chain:
  repo → dast_targets → dast_scan_runs → dast_findings
  repo → dast_targets → pentest_sessions → attack_chain_nodes
  repo → dast_targets → pentest_sessions → pentest_messages
  repo → pentest_sessions (direct repo_id link) → downstream

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 23:28:26 +02:00
7 changed files with 150 additions and 196 deletions

View File

@@ -90,13 +90,10 @@ pub async fn chat(
}; };
let system_prompt = format!( let system_prompt = format!(
"You are a code assistant for this repository. Answer questions using the code context below.\n\n\ "You are an expert code assistant for a software repository. \
Rules:\n\ Answer the user's question based on the code context below. \
- Reference specific files, functions, and line numbers\n\ Reference specific files and functions when relevant. \
- Show code snippets when they help explain the answer\n\ If the context doesn't contain enough information, say so.\n\n\
- If the context is insufficient, say what's missing rather than guessing\n\
- Be concise — lead with the answer, then explain if needed\n\
- For security questions, note relevant CWEs and link to the finding if one exists\n\n\
## Code Context\n\n{code_context}" ## Code Context\n\n{code_context}"
); );

View File

@@ -237,5 +237,92 @@ pub async fn delete_repository(
.delete_many(doc! { "repo_id": &id }) .delete_many(doc! { "repo_id": &id })
.await; .await;
// Cascade delete DAST targets linked to this repo, and all their downstream data
// (scan runs, findings, pentest sessions, attack chains, messages)
if let Ok(mut cursor) = db.dast_targets().find(doc! { "repo_id": &id }).await {
use futures_util::StreamExt;
while let Some(Ok(target)) = cursor.next().await {
let target_id = target.id.map(|oid| oid.to_hex()).unwrap_or_default();
if !target_id.is_empty() {
cascade_delete_dast_target(db, &target_id).await;
}
}
}
// Also delete pentest sessions linked directly to this repo (not via target)
if let Ok(mut cursor) = db.pentest_sessions().find(doc! { "repo_id": &id }).await {
use futures_util::StreamExt;
while let Some(Ok(session)) = cursor.next().await {
let session_id = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
if !session_id.is_empty() {
let _ = db
.attack_chain_nodes()
.delete_many(doc! { "session_id": &session_id })
.await;
let _ = db
.pentest_messages()
.delete_many(doc! { "session_id": &session_id })
.await;
// Delete DAST findings produced by this session
let _ = db
.dast_findings()
.delete_many(doc! { "session_id": &session_id })
.await;
}
}
}
let _ = db
.pentest_sessions()
.delete_many(doc! { "repo_id": &id })
.await;
Ok(Json(serde_json::json!({ "status": "deleted" }))) Ok(Json(serde_json::json!({ "status": "deleted" })))
} }
/// Cascade-delete a DAST target and all its downstream data.
async fn cascade_delete_dast_target(db: &crate::database::Database, target_id: &str) {
// Delete pentest sessions for this target (and their attack chains + messages)
if let Ok(mut cursor) = db
.pentest_sessions()
.find(doc! { "target_id": target_id })
.await
{
use futures_util::StreamExt;
while let Some(Ok(session)) = cursor.next().await {
let session_id = session.id.map(|oid| oid.to_hex()).unwrap_or_default();
if !session_id.is_empty() {
let _ = db
.attack_chain_nodes()
.delete_many(doc! { "session_id": &session_id })
.await;
let _ = db
.pentest_messages()
.delete_many(doc! { "session_id": &session_id })
.await;
let _ = db
.dast_findings()
.delete_many(doc! { "session_id": &session_id })
.await;
}
}
}
let _ = db
.pentest_sessions()
.delete_many(doc! { "target_id": target_id })
.await;
// Delete DAST scan runs and their findings
let _ = db
.dast_findings()
.delete_many(doc! { "target_id": target_id })
.await;
let _ = db
.dast_scan_runs()
.delete_many(doc! { "target_id": target_id })
.await;
// Delete the target itself
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(target_id) {
let _ = db.dast_targets().delete_one(doc! { "_id": oid }).await;
}
}

View File

@@ -5,20 +5,15 @@ use compliance_core::models::Finding;
use crate::error::AgentError; use crate::error::AgentError;
use crate::llm::LlmClient; use crate::llm::LlmClient;
const DESCRIPTION_SYSTEM_PROMPT: &str = r#"You are a security engineer writing a bug tracker issue for a developer to fix. Be direct and actionable — developers skim issue descriptions, so lead with what matters. const DESCRIPTION_SYSTEM_PROMPT: &str = r#"You are a security engineer writing issue descriptions for a bug tracker. Generate a clear, actionable issue body in Markdown format that includes:
Format in Markdown: 1. **Summary**: 1-2 sentence overview
2. **Evidence**: Code location, snippet, and what was detected
3. **Impact**: What could happen if not fixed
4. **Remediation**: Step-by-step fix instructions
5. **References**: Relevant CWE/CVE links if applicable
1. **What**: 1 sentence — what's wrong and where (file:line) Keep it concise and professional. Use code blocks for code snippets."#;
2. **Why it matters**: 1-2 sentences — concrete impact if not fixed. Avoid generic "could lead to" phrasing; describe the specific attack or failure scenario.
3. **Fix**: The specific code change needed. Use a code block with the corrected code if possible. If the fix is configuration-based, show the exact config change.
4. **References**: CWE/CVE link if applicable (one line, not a section)
Rules:
- No filler paragraphs or background explanations
- No restating the finding title in the body
- Code blocks should show the FIX, not the vulnerable code (the developer can see that in the diff)
- If the remediation is a one-liner, just say it — don't wrap it in a section header"#;
pub async fn generate_issue_description( pub async fn generate_issue_description(
llm: &Arc<LlmClient>, llm: &Arc<LlmClient>,

View File

@@ -5,24 +5,7 @@ use compliance_core::models::Finding;
use crate::error::AgentError; use crate::error::AgentError;
use crate::llm::LlmClient; use crate::llm::LlmClient;
const FIX_SYSTEM_PROMPT: &str = r#"You are a security engineer suggesting a code fix. Return ONLY the corrected code that replaces the vulnerable snippet — no explanations, no markdown fences, no before/after comparison. const FIX_SYSTEM_PROMPT: &str = r#"You are a security engineer. Given a security finding with code context, suggest a concrete code fix. Return ONLY the fixed code snippet that can directly replace the vulnerable code. Include brief inline comments explaining the fix."#;
Rules:
- The fix must be a drop-in replacement for the vulnerable code
- Preserve the original code's style, indentation, and naming conventions
- Add at most one brief inline comment on the changed line explaining the security fix
- If the fix requires importing a new module, include the import on a separate line prefixed with the language's comment syntax + "Add import: "
- Do not refactor, rename variables, or "improve" unrelated code
- If the vulnerability is a false positive and the code is actually safe, return the original code unchanged with a comment explaining why no fix is needed
Language-specific fix guidance:
- Rust: use `?` for error propagation, prefer `SecretString` for secrets, use parameterized queries with `sqlx`/`diesel`
- Python: use parameterized queries (never f-strings in SQL), use `secrets` module not `random`, use `subprocess.run([...])` list form, use `markupsafe.escape()` for HTML
- Go: use `sql.Query` with `$1`/`?` placeholders, use `crypto/rand` not `math/rand`, use `html/template` not `text/template`, return errors don't panic
- Java/Kotlin: use `PreparedStatement` with `?` params, use `SecureRandom`, use `Jsoup.clean()` for HTML sanitization, use `@Valid` for input validation
- Ruby: use ActiveRecord parameterized finders, use `SecureRandom`, use `ERB::Util.html_escape`, use `strong_parameters`
- PHP: use PDO prepared statements with `:param` or `?`, use `random_bytes()`/`random_int()`, use `htmlspecialchars()` with `ENT_QUOTES`, use `password_hash(PASSWORD_BCRYPT)`
- C/C++: use `snprintf` not `sprintf`, use bounds-checked APIs, free resources in reverse allocation order, use `memset_s` for secret cleanup"#;
pub async fn suggest_fix(llm: &Arc<LlmClient>, finding: &Finding) -> Result<String, AgentError> { pub async fn suggest_fix(llm: &Arc<LlmClient>, finding: &Finding) -> Result<String, AgentError> {
let user_prompt = format!( let user_prompt = format!(

View File

@@ -1,138 +1,69 @@
// System prompts for multi-pass LLM code review. // System prompts for multi-pass LLM code review.
// Each pass focuses on a different aspect to avoid overloading a single prompt. // Each pass focuses on a different aspect to avoid overloading a single prompt.
pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing a code diff. Report ONLY genuine logic bugs that would cause incorrect behavior at runtime. pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing code changes. Focus ONLY on logic and correctness issues.
Report: Look for:
- Off-by-one errors, wrong comparisons, missing edge cases that cause wrong results - Off-by-one errors, wrong comparisons, missing edge cases
- Incorrect control flow that produces wrong output (not style preferences) - Incorrect control flow (unreachable code, missing returns, wrong loop conditions)
- Actual race conditions with concrete shared-state mutation (not theoretical ones) - Race conditions or concurrency bugs
- Resource leaks where cleanup is truly missing (not just "could be improved") - Resource leaks (unclosed handles, missing cleanup)
- Wrong variable used (copy-paste errors) — must be provably wrong, not just suspicious - Wrong variable used (copy-paste errors)
- Swallowed errors that silently hide failures in a way that matters - Incorrect error handling (swallowed errors, wrong error type)
Do NOT report: Ignore: style, naming, formatting, documentation, minor improvements.
- Style, naming, formatting, documentation, or code organization preferences
- Theoretical issues without a concrete triggering scenario
- "Potential" problems that require assumptions not supported by the visible code
- Complexity or function length — that's a separate review pass
Language-idiomatic patterns that are NOT bugs (do not flag these): For each issue found, respond with a JSON array:
- Rust: `||`/`&&` short-circuit evaluation, variable shadowing, `let` rebinding, `clone()`, `impl` blocks, `match` arms with guards, `?` operator chaining, `unsafe` blocks with safety comments
- Python: duck typing, EAFP pattern (try/except vs check-first), `*args`/`**kwargs`, walrus operator `:=`, truthiness checks on containers, bare `except:` in top-level handlers
- Go: multiple return values for errors, `if err != nil` patterns, goroutine + channel patterns, blank identifier `_`, named returns, `defer` for cleanup, `init()` functions
- Java/Kotlin: checked exception patterns, method overloading, `Optional` vs null checks, Kotlin `?.` safe calls, `!!` non-null assertions in tests, `when` exhaustive matching, companion objects, `lateinit`
- Ruby: monkey patching in libraries, method_missing, blocks/procs/lambdas, `rescue => e` patterns, `send`/`respond_to?` metaprogramming, `nil` checks via `&.` safe navigation
- PHP: loose comparisons with `==` (only flag if `===` was clearly intended), `@` error suppression in legacy code, `isset()`/`empty()` patterns, magic methods (`__get`, `__call`), array functions as callbacks
- C/C++: RAII patterns, move semantics, `const_cast`/`static_cast` in appropriate contexts, macro usage for platform compat, pointer arithmetic in low-level code, `goto` for cleanup in C
Severity guide:
- high: Will cause incorrect behavior in normal usage
- medium: Will cause incorrect behavior in edge cases
- low: Minor correctness concern with limited blast radius
Prefer returning [] over reporting low-confidence guesses. A false positive wastes more developer time than a missed low-severity issue.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}] [{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#; If no issues found, respond with: []"#;
pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing a code diff. Report ONLY exploitable security vulnerabilities with a realistic attack scenario. pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing code changes. Focus ONLY on security vulnerabilities.
Report: Look for:
- Injection vulnerabilities (SQL, command, XSS, template) where untrusted input reaches a sink - Injection vulnerabilities (SQL, command, XSS, template injection)
- Authentication/authorization bypasses with a concrete exploit path - Authentication/authorization bypasses
- Sensitive data exposure: secrets in code, credentials in logs, PII leaks - Sensitive data exposure (logging secrets, hardcoded credentials)
- Insecure cryptography: weak algorithms, predictable randomness, hardcoded keys - Insecure cryptography (weak algorithms, predictable randomness)
- Path traversal, SSRF, open redirects — only where user input reaches the vulnerable API - Path traversal, SSRF, open redirects
- Unsafe deserialization of untrusted data - Unsafe deserialization
- Missing input validation at EXTERNAL trust boundaries (user input, API responses) - Missing input validation at trust boundaries
Do NOT report: Ignore: code style, performance, general quality.
- Internal code that only handles trusted/validated data
- Hash functions used for non-security purposes (dedup fingerprints, cache keys, content addressing)
- Logging of non-sensitive operational data (finding titles, counts, performance metrics)
- "Information disclosure" for data that is already public or user-facing
- Code style, performance, or general quality issues
- Missing validation on internal function parameters (trust the caller within the same module/crate/package)
- Theoretical attacks that require preconditions not present in the code
Language-specific patterns that are NOT vulnerabilities (do not flag these): For each issue found, respond with a JSON array:
- Python: `pickle` used on trusted internal data, `eval()`/`exec()` on hardcoded strings, `subprocess` with hardcoded commands, Django `mark_safe()` on static content, `assert` in non-security contexts
- Go: `crypto/rand` is secure (don't confuse with `math/rand`), `sql.DB` with parameterized queries is safe, `http.ListenAndServe` without TLS in dev/internal, error strings in responses (Go convention)
- Java/Kotlin: Spring Security annotations are sufficient auth checks, `@Transactional` provides atomicity, JPA parameterized queries are safe, Kotlin `require()`/`check()` are assertion patterns not vulnerabilities
- Ruby: Rails `params.permit()` is input validation, `render html:` with `html_safe` on generated content, ActiveRecord parameterized finders are safe, Devise/Warden patterns for auth
- PHP: PDO prepared statements are safe, Laravel Eloquent is parameterized, `htmlspecialchars()` is XSS mitigation, Symfony security voters are auth checks, `password_hash()`/`password_verify()` are correct bcrypt usage
- C/C++: `strncpy`/`snprintf` are bounds-checked (vs `strcpy`/`sprintf`), smart pointers manage memory, RAII handles cleanup, `static_assert` is compile-time only, OpenSSL with proper context setup
- Rust: `sha2`/`blake3` for fingerprinting is not "weak crypto", `unsafe` with documented invariants, `secrecy::SecretString` properly handles secrets
Severity guide:
- critical: Remote code execution, auth bypass, or data breach with no preconditions
- high: Exploitable vulnerability requiring minimal preconditions
- medium: Vulnerability requiring specific conditions or limited impact
Prefer returning [] over reporting speculative vulnerabilities. Every false positive erodes trust in the scanner.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}] [{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
If no issues found, respond with: []"#; If no issues found, respond with: []"#;
pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking for convention violations that indicate likely bugs. Report ONLY deviations from the project's visible patterns that could cause real problems. pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking adherence to project conventions. Focus ONLY on patterns that indicate likely bugs or maintenance problems.
Report: Look for:
- Inconsistent error handling within the same module where the inconsistency could hide failures - Inconsistent error handling patterns within the same module
- Public API that breaks the module's established contract (not just different style) - Public API that doesn't follow the project's established patterns
- Anti-patterns that are bugs in this language: e.g. `unwrap()` in Rust library code where the CI enforces `clippy::unwrap_used`, `any` defeating TypeScript's type system - Missing or incorrect type annotations that could cause runtime issues
- Anti-patterns specific to the language (e.g. unwrap in Rust library code, any in TypeScript)
Do NOT report: Do NOT report: minor style preferences, documentation gaps, formatting.
- Style preferences, formatting, naming conventions, or documentation Only report issues with HIGH confidence that they deviate from the visible codebase conventions.
- Code organization suggestions ("this function should be split")
- Patterns that are valid in the language even if you'd write them differently
- "Missing type annotations" unless the code literally won't compile or causes a type inference bug
Language-specific patterns that are conventional (do not flag these): For each issue found, respond with a JSON array:
- Rust: variable shadowing, `||`/`&&` short-circuit, `let` rebinding, builder patterns, `clone()`, `From`/`Into` impl chains, `#[allow(...)]` attributes
- Python: `**kwargs` forwarding, `@property` setters, `__dunder__` methods, list comprehensions with conditions, `if TYPE_CHECKING` imports, `noqa` comments
- Go: stuttering names (`http.HTTPClient`) discouraged but not a bug, `context.Context` as first param, init() functions, `//nolint` directives, returning concrete types vs interfaces in internal code
- Java/Kotlin: builder pattern boilerplate, Lombok annotations (`@Data`, `@Builder`), Kotlin data classes, `companion object` factories, `@Suppress` annotations, checked exception wrapping
- Ruby: `attr_accessor` usage, `Enumerable` mixin patterns, `module_function`, `class << self` syntax, DSL blocks (Rake, RSpec, Sinatra routes)
- PHP: `__construct` with property promotion, Laravel facades, static factory methods, nullable types with `?`, attribute syntax `#[...]`
- C/C++: header guards vs `#pragma once`, forward declarations, `const` correctness patterns, template specialization, `auto` type deduction
Severity guide:
- medium: Convention violation that will likely cause a bug or maintenance problem
- low: Convention violation that is a minor concern
Return at most 3 findings. Prefer [] over marginal findings.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}] [{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#; If no issues found, respond with: []"#;
pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for complexity that is likely to cause bugs. Report ONLY complexity that makes the code demonstrably harder to reason about. pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for excessive complexity that could lead to bugs.
Report: Look for:
- Functions over 80 lines with multiple interleaved responsibilities (not just long) - Functions over 50 lines that should be decomposed
- Deeply nested control flow (5+ levels) where flattening would prevent bugs - Deeply nested control flow (4+ levels)
- Complex boolean expressions that a reader would likely misinterpret - Complex boolean expressions that are hard to reason about
- Functions with 5+ parameters
- Code duplication within the changed files
Do NOT report: Only report complexity issues that are HIGH risk for future bugs. Ignore acceptable complexity in configuration, CLI argument parsing, or generated code.
- Functions that are long but linear and easy to follow
- Acceptable complexity: configuration setup, CLI parsing, test helpers, builder patterns
- Code that is complex because the problem is complex — only report if restructuring would reduce bug risk
- "This function does multiple things" unless you can identify a specific bug risk from the coupling
- Suggestions that would just move complexity elsewhere without reducing it
Severity guide: For each issue found, respond with a JSON array:
- medium: Complexity that has a concrete risk of causing bugs during future changes
- low: Complexity that makes review harder but is unlikely to cause bugs
Return at most 2 findings. Prefer [] over reporting complexity that is justified.
Respond with a JSON array (no markdown fences):
[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}] [{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
If no issues found, respond with: []"#; If no issues found, respond with: []"#;

View File

@@ -8,46 +8,22 @@ use crate::pipeline::orchestrator::GraphContext;
/// Maximum number of findings to include in a single LLM triage call. /// Maximum number of findings to include in a single LLM triage call.
const TRIAGE_CHUNK_SIZE: usize = 30; const TRIAGE_CHUNK_SIZE: usize = 30;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a pragmatic security triage expert. Your job is to filter out noise and keep only findings that a developer should actually fix. Be aggressive about dismissing false positives — a clean, high-signal list is more valuable than a comprehensive one. const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.
Actions: Actions:
- "confirm": True positive with real impact. Keep severity as-is. - "confirm": The finding is a true positive at the reported severity. Keep as-is.
- "downgrade": Real issue but over-reported severity. Lower it. - "downgrade": The finding is real but over-reported. Lower severity recommended.
- "upgrade": Under-reported — higher severity warranted. - "upgrade": The finding is under-reported. Higher severity recommended.
- "dismiss": False positive, not exploitable, or not actionable. Remove it. - "dismiss": The finding is a false positive. Should be removed.
Dismiss when: Consider:
- The scanner flagged a language idiom as a bug (see examples below) - Is the code in a test, example, or generated file? (lower confidence for test code)
- The finding is in test/example/generated/vendored code - Does the surrounding code context confirm or refute the finding?
- The "vulnerability" requires preconditions that don't exist in the code - Is the finding actionable by a developer?
- The finding is about code style, complexity, or theoretical concerns rather than actual bugs - Would a real attacker be able to exploit this?
- A hash function is used for non-security purposes (dedup, caching, content addressing)
- Internal logging of non-sensitive operational data is flagged as "information disclosure"
- The finding duplicates another finding already in the list
- Framework-provided security is already in place (e.g. ORM parameterized queries, CSRF middleware, auth decorators)
Common false positive patterns by language (dismiss these): Respond with a JSON array, one entry per finding in the same order they were presented:
- Rust: short-circuit `||`/`&&`, variable shadowing, `clone()`, `unsafe` with safety docs, `sha2` for fingerprinting [{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;
- Python: EAFP try/except, `subprocess` with hardcoded args, `pickle` on trusted data, Django `mark_safe` on static content
- Go: `if err != nil` is not "swallowed error", `crypto/rand` is secure, returning errors is not "information disclosure"
- Java/Kotlin: Spring Security annotations are valid auth, JPA parameterized queries are safe, Kotlin `!!` in tests is fine
- Ruby: Rails `params.permit` is validation, ActiveRecord finders are parameterized, `html_safe` on generated content
- PHP: PDO prepared statements are safe, Laravel Eloquent is parameterized, `htmlspecialchars` is XSS mitigation
- C/C++: `strncpy`/`snprintf` are bounds-checked, smart pointers manage memory, RAII handles cleanup
Confirm only when:
- You can describe a concrete scenario where the bug manifests or the vulnerability is exploitable
- The fix is actionable (developer can change specific code to resolve it)
- The finding is in production code that handles external input or sensitive data
Confidence scoring (0-10):
- 8-10: Certain true positive with clear exploit/bug scenario
- 5-7: Likely true positive, some assumptions required
- 3-4: Uncertain, needs manual review
- 0-2: Almost certainly a false positive
Respond with a JSON array, one entry per finding in the same order presented (no markdown fences):
[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "1-2 sentences", "remediation": "optional fix"}, ...]"#;
pub async fn triage_findings( pub async fn triage_findings(
llm: &Arc<LlmClient>, llm: &Arc<LlmClient>,

View File

@@ -314,21 +314,6 @@ impl PentestOrchestrator {
- For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual - For SPA apps: a 200 HTTP status does NOT mean the page is accessible — check the actual
page content with the browser tool to verify if it shows real data or a login redirect. page content with the browser tool to verify if it shows real data or a login redirect.
## Finding Quality Rules
- **Do not report the same issue twice.** If multiple tools detect the same missing header or
vulnerability on the same endpoint, report it ONCE with the most specific tool's output.
For example, if the recon tool and the header scanner both find missing HSTS, report it only
from the header scanner (more specific).
- **Group related findings.** Missing security headers on the same endpoint are ONE finding
("Missing security headers") listing all missing headers, not separate findings per header.
- **Severity must match real impact:**
- critical/high: Exploitable vulnerability (you can demonstrate the exploit)
- medium: Real misconfiguration with security implications but not directly exploitable
- low: Best-practice recommendation, defense-in-depth, or informational
- **Missing headers are medium at most** unless you can demonstrate a concrete exploit enabled
by the missing header (e.g., missing CSP + confirmed XSS = high for CSP finding).
- Console.log in third-party/vendored JS (node_modules, minified libraries) is informational only.
## Important ## Important
- This is an authorized penetration test. All testing is permitted within the target scope. - This is an authorized penetration test. All testing is permitted within the target scope.
- Respect the rate limit of {rate_limit} requests per second. - Respect the rate limit of {rate_limit} requests per second.