feat(cra): SBOM- + DAST-Findings aus dem Scanner-MCP konsumieren
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 20s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 1m4s
CI / iace-gt-coverage (push) Successful in 15s
CI / test-python-backend (push) Successful in 24s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 20s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 1m4s
CI / iace-gt-coverage (push) Successful in 15s
CI / test-python-backend (push) Successful in 24s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Sharangs compliance-scanner-agent exponiert SBOM (sbom_vuln_report) + DAST (list_dast_findings) als eigene MCP-Tools (nicht via list_findings). Neuer fetch_all_findings(repo_id) zieht list_findings + SBOM + DAST in EINER MCP-Session und normalisiert ins Finding-Schema: - SBOM: ein Finding pro verwundbarem Paket (nicht pro CVE), cwe=CWE-1395 -> deterministisch CRA-AI-22 (robust gegen Paketnamen wie "sqlite"). - DAST: cwe/endpoint/vuln_type uebernommen -> Mapping via cwe/keywords. assess-from-scanner nutzt fetch_all_findings + liefert source.breakdown (code/sbom/dast). DAST hat im MCP keinen repo_id-Filter -> dast_repo_scoped:false (deployment-weit, transparent geflaggt). Echte MCP-Daten: Kitchenasty 58 code + 35 sbom + 81 dast -> 174 gemappt (Coverage 94,3%, alle 35 SBOM -> CRA-AI-22). Enthaelt zusaetzlich das Qdrant->Prod-Kopierскript (#42, verbatim macmini->prod). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Verbatim copy of the IACE Qdrant knowledge-base collections to another Qdrant.
|
||||||
|
|
||||||
|
There is no RAG/embedding service on prod, so the normal ingest_iace_kb.sh has no
|
||||||
|
target there. Instead we copy the already-embedded points (id + vector + payload)
|
||||||
|
1:1 from the source Qdrant (macmini) to the destination (prod). No re-embedding,
|
||||||
|
no re-chunking → the destination is byte-identical and /sdk/v1/rag/search reads it
|
||||||
|
the same way. Idempotent: same point ids → upsert overwrites, no duplicates.
|
||||||
|
|
||||||
|
Usage (run on macmini; reads local Qdrant, writes prod Qdrant):
|
||||||
|
SRC_QDRANT=http://localhost:6333 \
|
||||||
|
DST_QDRANT=https://qdrant-dev.breakpilot.ai \
|
||||||
|
DST_QDRANT_KEY=<prod-api-key> \
|
||||||
|
python3 copy_iace_collections_to_prod.py
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
SRC = os.environ.get("SRC_QDRANT", "http://localhost:6333").rstrip("/")
|
||||||
|
DST = os.environ["DST_QDRANT"].rstrip("/")
|
||||||
|
KEY = os.environ["DST_QDRANT_KEY"]
|
||||||
|
COLLECTIONS = os.environ.get(
|
||||||
|
"COLLECTIONS", "bp_iace_accident_stats,bp_iace_safety_kb,bp_iace_failure_kb"
|
||||||
|
).split(",")
|
||||||
|
BATCH = 128
|
||||||
|
|
||||||
|
|
||||||
|
def _req(method, url, body=None, key=None):
|
||||||
|
data = json.dumps(body).encode() if body is not None else None
|
||||||
|
r = urllib.request.Request(url, data=data, method=method)
|
||||||
|
r.add_header("Content-Type", "application/json")
|
||||||
|
if key:
|
||||||
|
r.add_header("api-key", key)
|
||||||
|
with urllib.request.urlopen(r, timeout=120) as resp:
|
||||||
|
return json.loads(resp.read())
|
||||||
|
|
||||||
|
|
||||||
|
def _exists(base, col, key=None) -> bool:
|
||||||
|
try:
|
||||||
|
_req("GET", f"{base}/collections/{col}", key=key)
|
||||||
|
return True
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
if e.code == 404:
|
||||||
|
return False
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def copy_collection(col: str) -> None:
|
||||||
|
src_cfg = _req("GET", f"{SRC}/collections/{col}")["result"]["config"]["params"]["vectors"]
|
||||||
|
size, dist = src_cfg["size"], src_cfg["distance"]
|
||||||
|
if _exists(DST, col, KEY):
|
||||||
|
print(f" {col}: dst exists — upserting into it")
|
||||||
|
else:
|
||||||
|
_req("PUT", f"{DST}/collections/{col}", {"vectors": {"size": size, "distance": dist}}, KEY)
|
||||||
|
print(f" {col}: created on dst ({size}d {dist})")
|
||||||
|
|
||||||
|
offset, total = None, 0
|
||||||
|
while True:
|
||||||
|
body = {"limit": BATCH, "with_vector": True, "with_payload": True}
|
||||||
|
if offset is not None:
|
||||||
|
body["offset"] = offset
|
||||||
|
res = _req("POST", f"{SRC}/collections/{col}/points/scroll", body)["result"]
|
||||||
|
pts = res.get("points", [])
|
||||||
|
if not pts:
|
||||||
|
break
|
||||||
|
upsert = [{"id": p["id"], "vector": p["vector"], "payload": p.get("payload", {})} for p in pts]
|
||||||
|
_req("PUT", f"{DST}/collections/{col}/points?wait=true", {"points": upsert}, KEY)
|
||||||
|
total += len(pts)
|
||||||
|
offset = res.get("next_page_offset")
|
||||||
|
if offset is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
src_n = _req("POST", f"{SRC}/collections/{col}/points/count", {"exact": True})["result"]["count"]
|
||||||
|
dst_n = _req("POST", f"{DST}/collections/{col}/points/count", {"exact": True}, KEY)["result"]["count"]
|
||||||
|
flag = "OK" if dst_n >= src_n else "MISMATCH"
|
||||||
|
print(f" {col}: copied {total} | src={src_n} dst={dst_n} [{flag}]")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
print(f"Copy IACE collections {SRC} -> {DST}")
|
||||||
|
for col in COLLECTIONS:
|
||||||
|
copy_collection(col.strip())
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -21,7 +21,7 @@ from compliance.services.cra_applicability import (
|
|||||||
compute_verdict, compute_machinery_verdict, maturity as evidence_maturity, MACHINE_INTEGRATOR,
|
compute_verdict, compute_machinery_verdict, maturity as evidence_maturity, MACHINE_INTEGRATOR,
|
||||||
)
|
)
|
||||||
from compliance.services.cra_datasheet_extractor import extract_grenzen
|
from compliance.services.cra_datasheet_extractor import extract_grenzen
|
||||||
from compliance.services.scanner_mcp_client import fetch_findings
|
from compliance.services.scanner_mcp_client import fetch_findings, fetch_all_findings
|
||||||
from compliance.services.cra_snapshot_store import save_snapshot, list_snapshots, get_snapshot
|
from compliance.services.cra_snapshot_store import save_snapshot, list_snapshots, get_snapshot
|
||||||
from compliance.services.cra_use_case_controls import enrich_findings_with_breadth
|
from compliance.services.cra_use_case_controls import enrich_findings_with_breadth
|
||||||
from compliance.services.cra_component_findings import findings_from_components
|
from compliance.services.cra_component_findings import findings_from_components
|
||||||
@@ -121,10 +121,11 @@ async def assess_from_scanner(body: ScannerPullRequest):
|
|||||||
scan_type/cvss_score/file_path). Returns empty assessment if no scanner is
|
scan_type/cvss_score/file_path). Returns empty assessment if no scanner is
|
||||||
configured — the frontend then keeps its demo scenario.
|
configured — the frontend then keeps its demo scenario.
|
||||||
"""
|
"""
|
||||||
findings = await fetch_findings(
|
pulled = await fetch_all_findings(
|
||||||
repo_id=body.repo_id, severity=body.severity,
|
repo_id=body.repo_id, severity=body.severity,
|
||||||
base_url=body.scanner_url, token=body.token,
|
base_url=body.scanner_url, token=body.token,
|
||||||
)
|
)
|
||||||
|
findings = pulled.get("findings", [])
|
||||||
payload = {
|
payload = {
|
||||||
"findings": findings,
|
"findings": findings,
|
||||||
"weights": body.weights,
|
"weights": body.weights,
|
||||||
@@ -136,7 +137,8 @@ async def assess_from_scanner(body: ScannerPullRequest):
|
|||||||
enrich_findings_with_breadth(result.get("mapped", []), db)
|
enrich_findings_with_breadth(result.get("mapped", []), db)
|
||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
result["source"] = {"scanner": True, "pulled": len(findings)}
|
result["source"] = {"scanner": True, "pulled": len(findings),
|
||||||
|
"breakdown": pulled.get("breakdown", {})}
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -57,3 +57,130 @@ async def fetch_findings(
|
|||||||
|
|
||||||
texts = [c.text for c in (result.content or []) if getattr(c, "type", "") == "text"]
|
texts = [c.text for c in (result.content or []) if getattr(c, "type", "") == "text"]
|
||||||
return parse_findings_text(texts[0]) if texts else []
|
return parse_findings_text(texts[0]) if texts else []
|
||||||
|
|
||||||
|
|
||||||
|
# --- SBOM + DAST consumption (Sharang's scanner exposes these as dedicated MCP
|
||||||
|
# tools, not via list_findings) -------------------------------------------------
|
||||||
|
|
||||||
|
_SEV_BY_RANK = {4: "critical", 3: "high", 2: "medium", 1: "low"}
|
||||||
|
_SEV_RANK = {v: k for k, v in _SEV_BY_RANK.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_sbom_report(text: str) -> list:
|
||||||
|
"""sbom_vuln_report -> one finding per VULNERABLE PACKAGE (not per CVE — a repo
|
||||||
|
can have hundreds of CVEs but ~dozens of packages). scan_type='dependency' so
|
||||||
|
the CRA mapper routes it to dependency-monitoring (CRA-AI-22)."""
|
||||||
|
try:
|
||||||
|
data = json.loads(text)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
return []
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return []
|
||||||
|
repo_id = data.get("repo_id", "")
|
||||||
|
out = []
|
||||||
|
for pkg in data.get("packages") or []:
|
||||||
|
vulns = pkg.get("vulnerabilities") or []
|
||||||
|
if not vulns:
|
||||||
|
continue
|
||||||
|
ids, seen, best = [], set(), 0
|
||||||
|
for v in vulns:
|
||||||
|
vid = v.get("id")
|
||||||
|
if vid and vid not in seen:
|
||||||
|
seen.add(vid)
|
||||||
|
ids.append(vid)
|
||||||
|
best = max(best, _SEV_RANK.get((v.get("severity") or "").lower(), 0))
|
||||||
|
name, ver = pkg.get("name", ""), pkg.get("version", "")
|
||||||
|
pm = pkg.get("package_manager", "") or ""
|
||||||
|
shown = ", ".join(ids[:8]) + (" …" if len(ids) > 8 else "")
|
||||||
|
out.append({
|
||||||
|
"id": f"sbom:{repo_id}:{name}@{ver}",
|
||||||
|
"repo_id": repo_id,
|
||||||
|
"title": f"Verwundbare Abhängigkeit: {name} {ver} ({len(ids)} Schwachstelle(n))",
|
||||||
|
"description": f"Abhängigkeit {name} {ver} ({pm}) mit bekannten Schwachstellen: {shown}.",
|
||||||
|
"scan_type": "dependency",
|
||||||
|
# CWE-1395 (Dependency on Vulnerable Third-Party Component) → the CWE
|
||||||
|
# path maps deterministically to CRA-AI-22, robust against package
|
||||||
|
# names that happen to contain keyword tokens (e.g. "sqlite" → "sql").
|
||||||
|
"cwe": "CWE-1395",
|
||||||
|
"severity": _SEV_BY_RANK.get(best, "medium"),
|
||||||
|
"location": f"{pm}:{name}@{ver}" if pm else f"{name}@{ver}",
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_dast(text: str) -> list:
|
||||||
|
"""list_dast_findings -> findings (carry cwe + endpoint + vuln_type so the CRA
|
||||||
|
mapper routes them via cwe/keywords). scan_type='dast'."""
|
||||||
|
out = []
|
||||||
|
for d in parse_findings_text(text):
|
||||||
|
if not isinstance(d, dict):
|
||||||
|
continue
|
||||||
|
out.append({
|
||||||
|
"id": d.get("_id") or d.get("id") or "",
|
||||||
|
"repo_id": d.get("repo_id") or "",
|
||||||
|
"title": d.get("title", ""),
|
||||||
|
"description": " ".join(x for x in [d.get("vuln_type", ""), d.get("description", "")] if x),
|
||||||
|
"scan_type": "dast",
|
||||||
|
"cwe": str(d.get("cwe", "") or ""),
|
||||||
|
"severity": (d.get("severity") or "").lower(),
|
||||||
|
"location": d.get("endpoint") or d.get("target_id") or "",
|
||||||
|
"exploited": bool(d.get("exploitable", False)),
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def _open_and_call(url: str, tok: str, calls: list) -> dict:
|
||||||
|
"""Open ONE MCP session and run [(tool, params), ...] -> {tool: text}. A tool
|
||||||
|
that errors yields '' (best-effort; the assessment degrades, never breaks)."""
|
||||||
|
from mcp.client.session import ClientSession
|
||||||
|
from mcp.client.streamable_http import streamablehttp_client
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Bearer {tok}"} if tok else None
|
||||||
|
out: dict = {}
|
||||||
|
async with streamablehttp_client(url, headers=headers) as (read, write, _):
|
||||||
|
async with ClientSession(read, write) as session:
|
||||||
|
await session.initialize()
|
||||||
|
for tool, params in calls:
|
||||||
|
try:
|
||||||
|
res = await session.call_tool(tool, params)
|
||||||
|
texts = [c.text for c in (res.content or []) if getattr(c, "type", "") == "text"]
|
||||||
|
out[tool] = texts[0] if texts else ""
|
||||||
|
except Exception:
|
||||||
|
out[tool] = ""
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_all_findings(
|
||||||
|
repo_id: Optional[str] = None,
|
||||||
|
severity: Optional[str] = None,
|
||||||
|
limit: int = 200,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
|
token: Optional[str] = None,
|
||||||
|
include_dast: bool = True,
|
||||||
|
) -> dict:
|
||||||
|
"""Pull list_findings + SBOM-vulns + DAST in one MCP session and return a
|
||||||
|
unified finding list plus a per-source breakdown. SBOM is repo-scoped
|
||||||
|
(sbom_vuln_report requires repo_id); DAST has no repo_id filter in the MCP, so
|
||||||
|
it is deployment-wide (flagged in the breakdown). Returns {} on no config."""
|
||||||
|
url = (base_url or SCANNER_MCP_URL).rstrip("/")
|
||||||
|
tok = token or SCANNER_MCP_TOKEN
|
||||||
|
if not url:
|
||||||
|
return {"findings": [], "breakdown": {}}
|
||||||
|
|
||||||
|
calls = [("list_findings", {"limit": limit, **({"repo_id": repo_id} if repo_id else {}),
|
||||||
|
**({"severity": severity} if severity else {})})]
|
||||||
|
if repo_id:
|
||||||
|
calls.append(("sbom_vuln_report", {"repo_id": repo_id}))
|
||||||
|
if include_dast:
|
||||||
|
calls.append(("list_dast_findings", {"limit": limit,
|
||||||
|
**({"severity": severity} if severity else {})}))
|
||||||
|
|
||||||
|
res = await _open_and_call(url, tok, calls)
|
||||||
|
code = parse_findings_text(res.get("list_findings", ""))
|
||||||
|
sbom = normalize_sbom_report(res.get("sbom_vuln_report", "")) if repo_id else []
|
||||||
|
dast = normalize_dast(res.get("list_dast_findings", "")) if include_dast else []
|
||||||
|
return {
|
||||||
|
"findings": code + sbom + dast,
|
||||||
|
"breakdown": {"code": len(code), "sbom": len(sbom), "dast": len(dast),
|
||||||
|
"dast_repo_scoped": False},
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,77 @@
|
|||||||
|
"""SBOM/DAST normalization from the scanner MCP -> CRA finding shape + mapping.
|
||||||
|
|
||||||
|
Shapes pinned from the live MCP (sbom_vuln_report / list_dast_findings, 2026-06-18).
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
from compliance.services.scanner_mcp_client import normalize_sbom_report, normalize_dast
|
||||||
|
from compliance.services.cra_finding_mapper import ScannerFinding, map_finding
|
||||||
|
|
||||||
|
SBOM = json.dumps({
|
||||||
|
"repo_id": "r1",
|
||||||
|
"vulnerable_packages_count": 1,
|
||||||
|
"total_vulnerabilities": 3,
|
||||||
|
"packages": [
|
||||||
|
{"name": "dompurify", "version": "3.3.3", "package_manager": "npm",
|
||||||
|
"license": "MIT", "vulnerabilities": [
|
||||||
|
{"id": "GHSA-39q2", "source": "osv", "severity": None},
|
||||||
|
{"id": "GHSA-39q2", "source": "osv", "severity": None}, # dup
|
||||||
|
{"id": "GHSA-76mc", "source": "osv", "severity": "high"}]},
|
||||||
|
{"name": "clean-pkg", "version": "1.0", "package_manager": "npm",
|
||||||
|
"vulnerabilities": []}, # no vulns -> skipped
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
DAST = json.dumps([
|
||||||
|
{"_id": {"$oid": "abc123"}, "vuln_type": "security_misconfiguration",
|
||||||
|
"title": "SQL backup exposure: /backup.sql", "description": "Sensitive resource accessible.",
|
||||||
|
"severity": "high", "cwe": "CWE-16", "endpoint": "https://demo.x/backup.sql",
|
||||||
|
"method": "GET", "exploitable": True},
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
class TestSbom:
|
||||||
|
def test_one_finding_per_vulnerable_package(self):
|
||||||
|
out = normalize_sbom_report(SBOM)
|
||||||
|
assert len(out) == 1 # clean-pkg skipped
|
||||||
|
f = out[0]
|
||||||
|
assert f["scan_type"] == "dependency"
|
||||||
|
assert f["cwe"] == "CWE-1395"
|
||||||
|
assert f["location"] == "npm:dompurify@3.3.3"
|
||||||
|
assert f["severity"] == "high" # escalated from the one graded vuln
|
||||||
|
assert "GHSA-39q2" in f["description"] and "GHSA-76mc" in f["description"]
|
||||||
|
|
||||||
|
def test_maps_to_dependency_requirement_even_with_keyword_in_name(self):
|
||||||
|
# CWE path dominates → CRA-AI-22, not CRA-AI-20 from a "sql"-like name
|
||||||
|
out = normalize_sbom_report(json.dumps({
|
||||||
|
"repo_id": "r", "packages": [
|
||||||
|
{"name": "sqlite3", "version": "5.0", "package_manager": "npm",
|
||||||
|
"vulnerabilities": [{"id": "CVE-x", "severity": "medium"}]}]}))
|
||||||
|
m = map_finding(ScannerFinding.from_dict(out[0]))
|
||||||
|
assert m.primary_requirement == "CRA-AI-22"
|
||||||
|
|
||||||
|
def test_bad_json(self):
|
||||||
|
assert normalize_sbom_report("not json") == []
|
||||||
|
assert normalize_sbom_report("{}") == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestDast:
|
||||||
|
def test_normalizes_dast_finding(self):
|
||||||
|
out = normalize_dast(DAST)
|
||||||
|
assert len(out) == 1
|
||||||
|
f = out[0]
|
||||||
|
assert f["scan_type"] == "dast"
|
||||||
|
assert f["cwe"] == "CWE-16"
|
||||||
|
assert f["location"] == "https://demo.x/backup.sql"
|
||||||
|
assert f["exploited"] is True
|
||||||
|
assert "security_misconfiguration" in f["description"]
|
||||||
|
|
||||||
|
def test_dast_maps_via_cwe(self):
|
||||||
|
out = normalize_dast(DAST)
|
||||||
|
m = map_finding(ScannerFinding.from_dict(out[0]))
|
||||||
|
assert m.primary_requirement == "CRA-AI-1" # CWE-16 -> secure config
|
||||||
|
assert m.finding_id == "abc123" # _id.$oid extracted
|
||||||
|
|
||||||
|
def test_empty(self):
|
||||||
|
assert normalize_dast("[]") == []
|
||||||
|
assert normalize_dast("not json") == []
|
||||||
Reference in New Issue
Block a user