"""MCP client to the external compliance-scanner-agent (pull-flow). We connect to THEIR MCP server (Streamable HTTP + Bearer), pull the findings they already produced (list_findings), and feed them into our deterministic CRA assessment. Their tool returns a JSON array of Finding docs as text content; the field shape is bridged by ScannerFinding.from_dict (scan_type/cvss_score/...). Config via env (SCANNER_MCP_URL, SCANNER_MCP_TOKEN) or per-call override. When no URL is configured, fetch_findings returns [] — callers fall back to their demo. """ import json import os from typing import Optional SCANNER_MCP_URL = os.getenv("SCANNER_MCP_URL", "") SCANNER_MCP_TOKEN = os.getenv("SCANNER_MCP_TOKEN", "") def parse_findings_text(text: str) -> list: """Parse the list_findings tool result (a JSON array, or {findings|results:[...]}).""" try: data = json.loads(text) except (json.JSONDecodeError, TypeError): return [] if isinstance(data, dict): data = data.get("findings") or data.get("results") or [] return data if isinstance(data, list) else [] async def fetch_findings( repo_id: Optional[str] = None, severity: Optional[str] = None, limit: int = 200, base_url: Optional[str] = None, token: Optional[str] = None, ) -> list: """Pull findings from the scanner's MCP server. Returns [] if unconfigured or on error.""" url = (base_url or SCANNER_MCP_URL).rstrip("/") tok = token or SCANNER_MCP_TOKEN if not url: return [] from mcp.client.session import ClientSession from mcp.client.streamable_http import streamablehttp_client headers = {"Authorization": f"Bearer {tok}"} if tok else None params: dict = {"limit": limit} if repo_id: params["repo_id"] = repo_id if severity: params["severity"] = severity async with streamablehttp_client(url, headers=headers) as (read, write, _): async with ClientSession(read, write) as session: await session.initialize() result = await session.call_tool("list_findings", params) texts = [c.text for c in (result.content or []) if getattr(c, "type", "") == "text"] return parse_findings_text(texts[0]) if texts else [] # --- SBOM + DAST consumption (Sharang's scanner exposes these as dedicated MCP # tools, not via list_findings) ------------------------------------------------- _SEV_BY_RANK = {4: "critical", 3: "high", 2: "medium", 1: "low"} _SEV_RANK = {v: k for k, v in _SEV_BY_RANK.items()} def normalize_sbom_report(text: str) -> list: """sbom_vuln_report -> one finding per VULNERABLE PACKAGE (not per CVE — a repo can have hundreds of CVEs but ~dozens of packages). scan_type='dependency' so the CRA mapper routes it to dependency-monitoring (CRA-AI-22).""" try: data = json.loads(text) except (json.JSONDecodeError, TypeError): return [] if not isinstance(data, dict): return [] repo_id = data.get("repo_id", "") out = [] for pkg in data.get("packages") or []: vulns = pkg.get("vulnerabilities") or [] if not vulns: continue ids, seen, best = [], set(), 0 for v in vulns: vid = v.get("id") if vid and vid not in seen: seen.add(vid) ids.append(vid) best = max(best, _SEV_RANK.get((v.get("severity") or "").lower(), 0)) name, ver = pkg.get("name", ""), pkg.get("version", "") pm = pkg.get("package_manager", "") or "" shown = ", ".join(ids[:8]) + (" …" if len(ids) > 8 else "") out.append({ "id": f"sbom:{repo_id}:{name}@{ver}", "repo_id": repo_id, "title": f"Verwundbare Abhängigkeit: {name} {ver} ({len(ids)} Schwachstelle(n))", "description": f"Abhängigkeit {name} {ver} ({pm}) mit bekannten Schwachstellen: {shown}.", "scan_type": "dependency", # CWE-1395 (Dependency on Vulnerable Third-Party Component) → the CWE # path maps deterministically to CRA-AI-22, robust against package # names that happen to contain keyword tokens (e.g. "sqlite" → "sql"). "cwe": "CWE-1395", "severity": _SEV_BY_RANK.get(best, "medium"), "location": f"{pm}:{name}@{ver}" if pm else f"{name}@{ver}", }) return out def normalize_dast(text: str) -> list: """list_dast_findings -> findings (carry cwe + endpoint + vuln_type so the CRA mapper routes them via cwe/keywords). scan_type='dast'.""" out = [] for d in parse_findings_text(text): if not isinstance(d, dict): continue out.append({ "id": d.get("_id") or d.get("id") or "", "repo_id": d.get("repo_id") or "", "title": d.get("title", ""), "description": " ".join(x for x in [d.get("vuln_type", ""), d.get("description", "")] if x), "scan_type": "dast", "cwe": str(d.get("cwe", "") or ""), "severity": (d.get("severity") or "").lower(), "location": d.get("endpoint") or d.get("target_id") or "", "exploited": bool(d.get("exploitable", False)), }) return out async def _open_and_call(url: str, tok: str, calls: list) -> dict: """Open ONE MCP session and run [(tool, params), ...] -> {tool: text}. A tool that errors yields '' (best-effort; the assessment degrades, never breaks).""" from mcp.client.session import ClientSession from mcp.client.streamable_http import streamablehttp_client headers = {"Authorization": f"Bearer {tok}"} if tok else None out: dict = {} async with streamablehttp_client(url, headers=headers) as (read, write, _): async with ClientSession(read, write) as session: await session.initialize() for tool, params in calls: try: res = await session.call_tool(tool, params) texts = [c.text for c in (res.content or []) if getattr(c, "type", "") == "text"] out[tool] = texts[0] if texts else "" except Exception: out[tool] = "" return out async def fetch_all_findings( repo_id: Optional[str] = None, severity: Optional[str] = None, limit: int = 200, base_url: Optional[str] = None, token: Optional[str] = None, include_dast: bool = True, ) -> dict: """Pull list_findings + SBOM-vulns + DAST in one MCP session and return a unified finding list plus a per-source breakdown. SBOM is repo-scoped (sbom_vuln_report requires repo_id); DAST has no repo_id filter in the MCP, so it is deployment-wide (flagged in the breakdown). Returns {} on no config.""" url = (base_url or SCANNER_MCP_URL).rstrip("/") tok = token or SCANNER_MCP_TOKEN if not url: return {"findings": [], "breakdown": {}} calls = [("list_findings", {"limit": limit, **({"repo_id": repo_id} if repo_id else {}), **({"severity": severity} if severity else {})})] if repo_id: calls.append(("sbom_vuln_report", {"repo_id": repo_id})) if include_dast: calls.append(("list_dast_findings", {"limit": limit, **({"severity": severity} if severity else {})})) res = await _open_and_call(url, tok, calls) code = parse_findings_text(res.get("list_findings", "")) sbom = normalize_sbom_report(res.get("sbom_vuln_report", "")) if repo_id else [] dast = normalize_dast(res.get("list_dast_findings", "")) if include_dast else [] return { "findings": code + sbom + dast, "breakdown": {"code": len(code), "sbom": len(sbom), "dast": len(dast), "dast_repo_scoped": False}, }