diff --git a/backend-compliance/compliance/api/cra_assess_routes.py b/backend-compliance/compliance/api/cra_assess_routes.py index a3be621d..45922c06 100644 --- a/backend-compliance/compliance/api/cra_assess_routes.py +++ b/backend-compliance/compliance/api/cra_assess_routes.py @@ -15,6 +15,7 @@ from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel from compliance.services.cra_finding_mapper import assess_findings_payload +from compliance.services.scanner_mcp_client import fetch_findings from compliance.services.cra_snapshot_store import save_snapshot, list_snapshots, get_snapshot from compliance.services.cra_use_case_controls import enrich_findings_with_breadth from compliance.services.cra_component_findings import findings_from_components @@ -97,6 +98,42 @@ async def assess(body: AssessRequest): return _assess_enriched(body) +class ScannerPullRequest(BaseModel): + repo_id: Optional[str] = None + severity: Optional[str] = None + scanner_url: Optional[str] = None # override SCANNER_MCP_URL + token: Optional[str] = None # override SCANNER_MCP_TOKEN + weights: Optional[Dict[str, str]] = None + safety_functions: Optional[List[SafetyFunctionIn]] = None + + +@router.post("/assess-from-scanner") +async def assess_from_scanner(body: ScannerPullRequest): + """Pull-flow: fetch findings from the scanner's MCP, then assess. + + Raw scanner finding dicts go straight to the tolerant mapper (keeps + scan_type/cvss_score/file_path). Returns empty assessment if no scanner is + configured — the frontend then keeps its demo scenario. + """ + findings = await fetch_findings( + repo_id=body.repo_id, severity=body.severity, + base_url=body.scanner_url, token=body.token, + ) + payload = { + "findings": findings, + "weights": body.weights, + "safety_functions": [s.model_dump() for s in body.safety_functions] if body.safety_functions else None, + } + result = assess_findings_payload(payload) + db = SessionLocal() + try: + enrich_findings_with_breadth(result.get("mapped", []), db) + finally: + db.close() + result["source"] = {"scanner": True, "pulled": len(findings)} + return result + + @router.post("/projects/{project_id}/assess-snapshot") async def assess_snapshot(project_id: str, body: AssessRequest, tenant_id: str = Depends(get_tenant_id)): """Run the assessment and persist it as a versioned snapshot (running system).""" diff --git a/backend-compliance/compliance/services/scanner_mcp_client.py b/backend-compliance/compliance/services/scanner_mcp_client.py new file mode 100644 index 00000000..0b5da97a --- /dev/null +++ b/backend-compliance/compliance/services/scanner_mcp_client.py @@ -0,0 +1,59 @@ +"""MCP client to the external compliance-scanner-agent (pull-flow). + +We connect to THEIR MCP server (Streamable HTTP + Bearer), pull the findings they +already produced (list_findings), and feed them into our deterministic CRA +assessment. Their tool returns a JSON array of Finding docs as text content; the +field shape is bridged by ScannerFinding.from_dict (scan_type/cvss_score/...). + +Config via env (SCANNER_MCP_URL, SCANNER_MCP_TOKEN) or per-call override. When no +URL is configured, fetch_findings returns [] — callers fall back to their demo. +""" +import json +import os +from typing import Optional + +SCANNER_MCP_URL = os.getenv("SCANNER_MCP_URL", "") +SCANNER_MCP_TOKEN = os.getenv("SCANNER_MCP_TOKEN", "") + + +def parse_findings_text(text: str) -> list: + """Parse the list_findings tool result (a JSON array, or {findings|results:[...]}).""" + try: + data = json.loads(text) + except (json.JSONDecodeError, TypeError): + return [] + if isinstance(data, dict): + data = data.get("findings") or data.get("results") or [] + return data if isinstance(data, list) else [] + + +async def fetch_findings( + repo_id: Optional[str] = None, + severity: Optional[str] = None, + limit: int = 200, + base_url: Optional[str] = None, + token: Optional[str] = None, +) -> list: + """Pull findings from the scanner's MCP server. Returns [] if unconfigured or on error.""" + url = (base_url or SCANNER_MCP_URL).rstrip("/") + tok = token or SCANNER_MCP_TOKEN + if not url: + return [] + + from mcp.client.session import ClientSession + from mcp.client.streamable_http import streamablehttp_client + + headers = {"Authorization": f"Bearer {tok}"} if tok else None + params: dict = {"limit": limit} + if repo_id: + params["repo_id"] = repo_id + if severity: + params["severity"] = severity + + async with streamablehttp_client(url, headers=headers) as (read, write, _): + async with ClientSession(read, write) as session: + await session.initialize() + result = await session.call_tool("list_findings", params) + + texts = [c.text for c in (result.content or []) if getattr(c, "type", "") == "text"] + return parse_findings_text(texts[0]) if texts else [] diff --git a/backend-compliance/tests/test_scanner_mcp_client.py b/backend-compliance/tests/test_scanner_mcp_client.py new file mode 100644 index 00000000..c2415f3b --- /dev/null +++ b/backend-compliance/tests/test_scanner_mcp_client.py @@ -0,0 +1,29 @@ +"""Pull-flow client: parse the scanner's list_findings result + safe no-config path.""" +import asyncio + +from compliance.services.scanner_mcp_client import fetch_findings, parse_findings_text + + +def test_parse_plain_array(): + out = parse_findings_text('[{"_id":"a","title":"x"},{"_id":"b"}]') + assert len(out) == 2 + assert out[0]["_id"] == "a" + + +def test_parse_wrapped_findings_key(): + assert parse_findings_text('{"findings":[{"_id":"a"}]}') == [{"_id": "a"}] + + +def test_parse_wrapped_results_key(): + assert parse_findings_text('{"results":[{"_id":"a"}]}') == [{"_id": "a"}] + + +def test_parse_garbage_returns_empty(): + assert parse_findings_text("not json") == [] + assert parse_findings_text("") == [] + assert parse_findings_text('{"x":1}') == [] + + +def test_fetch_findings_no_url_returns_empty(): + # Unconfigured + no override -> [] (no MCP lib needed; transport is lazy-imported). + assert asyncio.run(fetch_findings(base_url="")) == [] diff --git a/docker-compose.yml b/docker-compose.yml index 5335adae..0384bffe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -124,6 +124,10 @@ services: SMTP_FROM_NAME: ${SMTP_FROM_NAME:-BreakPilot Compliance} SMTP_FROM_ADDR: ${SMTP_FROM_ADDR:-compliance@breakpilot.app} RAG_SERVICE_URL: http://bp-core-rag-service:8097 + # Pull-flow: external repo-scanner MCP (compliance-scanner-agent). Unset = + # disabled, /assess-from-scanner returns empty and the UI keeps its demo. + SCANNER_MCP_URL: ${SCANNER_MCP_URL:-} + SCANNER_MCP_TOKEN: ${SCANNER_MCP_TOKEN:-} # LLM cascade for V3 vendor extraction (unknown CMPs). # Reuses the same env vars as the consent-tester so both can be # configured in one place.