feat(mcp): HTTP+Bearer CRA-MCP-Server für den Repo-Scanner + Finding-Adapter

Register-Flow für compliance-scanner-agent (anderes Team, Rust): deren MCP-Client (McpServerConfig) erwartet Streamable HTTP + Bearer — unser MCP war stdio/ohne Auth. - server.py auf FastMCP umgestellt: Tools cra_assess_findings + cra_list_requirements, Dual-Transport (stdio default; Streamable HTTP wenn MCP_PORT gesetzt), Bearer-Gate via CRA_MCP_TOKEN. - ScannerFinding.from_dict tolerant für ihr Finding-Schema (_id/fingerprint, scan_type→category, cvss_score→cvss, file_path→location, severity info→low). - Eigenständiger docker-compose-Dienst bp-compliance-mcp (Port 8099, pure/kein DB, isoliert von der Haupt-API) + Hetzner-amd64-Override. - Tests: test_cra_scanner_adapter, test_mcp_server (Bearer-Gate + Tool-Registry). Pull-Flow (wir holen ihre Findings über ihren MCP) + öffentliches nginx-Routing folgen separat (brauchen ihren Endpoint/Token). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-15 18:30:47 +02:00
parent 7aaa7e083b
commit 414496c31a
6 changed files with 222 additions and 75 deletions
@@ -1,87 +1,84 @@
 """MCP server: the interface the external repo-scanner queries for CRA risk.

-We are the MCP *server*; the scanner is the client and asks us, in a targeted
-way, to turn its findings into a CRA (Cyber Resilience Act) risk assessment. All
-assessment logic lives in the deterministic, fully-tested
-compliance.services.cra_finding_mapper — this module is only the MCP transport
-glue (stdio). Run as: ``python -m compliance.mcp.server``.
+We are an MCP *server* exposing the deterministic CRA assessment. The scanner
+(compliance-scanner-agent) registers us in its McpServerConfig registry and calls
+``cra_assess_findings`` with the findings it already produced. All assessment
+logic lives in compliance.services.cra_finding_mapper — this module is only MCP
+transport glue.

-Transport note: stdio is the default. If the scanner needs HTTP/streamable
-transport instead, only the ``main()`` runner below changes.
+Transports:
+  - stdio (default):            ``python -m compliance.mcp.server``
+  - Streamable HTTP + Bearer:   set ``MCP_PORT`` (the scanner uses HTTP transport).
+    Auth: if ``CRA_MCP_TOKEN`` is set, every request needs ``Authorization:
+    Bearer <token>``; if unset, the endpoint is open (local/stdio dev only).
 """
-import asyncio
 import json
+import os
+from typing import Optional

-from mcp.server import Server
-from mcp.server.stdio import stdio_server
-from mcp.types import Tool, TextContent
+from mcp.server.fastmcp import FastMCP

-from compliance.services.cra_finding_mapper import assess_findings_payload
 from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS
+from compliance.services.cra_finding_mapper import assess_findings_payload

-server = Server("breakpilot-cra")
-
-_FINDINGS_SCHEMA = {
-    "type": "object",
-    "properties": {
-        "findings": {
-            "type": "array",
-            "description": "Findings the scanner already produced.",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "id": {"type": "string"},
-                    "title": {"type": "string"},
-                    "description": {"type": "string"},
-                    "category": {"type": "string", "description": "e.g. crypto, auth, secrets, dependency"},
-                    "cwe": {"type": "string", "description": "e.g. CWE-798"},
-                    "severity": {"type": "string", "enum": ["critical", "high", "medium", "low"]},
-                    "cvss": {"type": "number"},
-                    "location": {"type": "string"},
-                },
-                "required": ["id"],
-            },
-        }
-    },
-    "required": ["findings"],
-}
+mcp = FastMCP("breakpilot-cra")


-@server.list_tools()
-async def list_tools() -> list:
-    return [
-        Tool(
-            name="cra_assess_findings",
-            description=(
-                "Map repo-scanner findings to the CRA Annex I essential requirements they "
-                "violate, derive a risk level per finding, and return the remediation measures "
-                "plus coverage. Deterministic; works standalone (no project/FMEA needed)."
-            ),
-            inputSchema=_FINDINGS_SCHEMA,
-        ),
-        Tool(
-            name="cra_list_requirements",
-            description="Return the 40 CRA Annex I essential requirements (the assessment spine).",
-            inputSchema={"type": "object", "properties": {}},
-        ),
-    ]
+@mcp.tool(
+    description=(
+        "Map repo-scanner findings to the CRA Annex I essential requirements they "
+        "violate, derive a risk level per finding, and return remediation measures "
+        "plus coverage. Deterministic; standalone (no project/FMEA needed). Each "
+        "finding accepts: id (required), title, description, category/scan_type, cwe, "
+        "severity (info|low|medium|high|critical), cvss/cvss_score, location/file_path."
+    )
+)
+async def cra_assess_findings(
+    findings: list,
+    weights: Optional[dict] = None,
+    safety_functions: Optional[list] = None,
+) -> str:
+    payload = {
+        "findings": findings,
+        "weights": weights or {},
+        "safety_functions": safety_functions or [],
+    }
+    return json.dumps(assess_findings_payload(payload), ensure_ascii=False)


-@server.call_tool()
-async def call_tool(name: str, arguments: dict) -> list:
-    if name == "cra_assess_findings":
-        result = assess_findings_payload(arguments or {})
-    elif name == "cra_list_requirements":
-        result = {"requirements": ANNEX_I_REQUIREMENTS}
+@mcp.tool(description="Return the 40 CRA Annex I essential requirements (the assessment spine).")
+async def cra_list_requirements() -> str:
+    return json.dumps({"requirements": ANNEX_I_REQUIREMENTS}, ensure_ascii=False)
+
+
+def _build_http_app():
+    """Streamable-HTTP ASGI app with optional Bearer-token gate."""
+    from starlette.middleware.base import BaseHTTPMiddleware
+    from starlette.responses import JSONResponse
+
+    token = (os.environ.get("CRA_MCP_TOKEN") or "").strip()
+
+    class BearerAuth(BaseHTTPMiddleware):
+        async def dispatch(self, request, call_next):
+            if token:
+                auth = request.headers.get("authorization", "")
+                if auth != f"Bearer {token}":
+                    return JSONResponse({"error": "unauthorized"}, status_code=401)
+            return await call_next(request)
+
+    app = mcp.streamable_http_app()
+    app.add_middleware(BearerAuth)
+    return app
+
+
+def main() -> None:
+    port = os.environ.get("MCP_PORT")
+    if port:
+        import uvicorn
+        uvicorn.run(_build_http_app(), host="0.0.0.0", port=int(port))
    else:
-        raise ValueError("Unknown tool: {}".format(name))
-    return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
-
-
-async def main() -> None:
-    async with stdio_server() as (read_stream, write_stream):
-        await server.run(read_stream, write_stream, server.create_initialization_options())
+        mcp.run()  # stdio


 if __name__ == "__main__":
-    asyncio.run(main())
+    main()
@@ -73,15 +73,23 @@ class ScannerFinding:

    @classmethod
    def from_dict(cls, d: dict) -> "ScannerFinding":
+        # Tolerant to the compliance-scanner-agent Finding shape (Rust/Mongo):
+        # _id/fingerprint, scan_type, cvss_score, file_path, severity incl. "info".
+        raw_id = d.get("id") or d.get("finding_id") or d.get("_id") or d.get("fingerprint") or ""
+        if isinstance(raw_id, dict):           # Mongo extended JSON {"$oid": "..."}
+            raw_id = raw_id.get("$oid") or ""
+        sev = (d.get("severity") or "").lower()
+        if sev == "info":                      # scanner has 5 levels; we use 4
+            sev = "low"
        return cls(
-            id=str(d.get("id") or d.get("finding_id") or ""),
+            id=str(raw_id),
            title=d.get("title", "") or d.get("name", ""),
            description=d.get("description", "") or d.get("detail", ""),
-            category=d.get("category", "") or d.get("type", ""),
+            category=d.get("category", "") or d.get("type", "") or d.get("scan_type", "") or d.get("scanner", ""),
            cwe=str(d.get("cwe", "") or ""),
-            severity=d.get("severity", "") or "",
-            cvss=d.get("cvss"),
-            location=d.get("location", "") or d.get("path", ""),
+            severity=sev,
+            cvss=d.get("cvss") if d.get("cvss") is not None else d.get("cvss_score"),
+            location=d.get("location", "") or d.get("path", "") or d.get("file_path", ""),
            safety_impact=bool(d.get("safety_impact", False)),
            exploited=bool(d.get("exploited", False)),
        )
@@ -0,0 +1,63 @@
+"""The MCP finding adapter must accept the compliance-scanner-agent Finding shape.
+
+Their Rust/Mongo Finding uses _id, scan_type, cvss_score, file_path and a 5-level
+severity (incl. "info"). Our mapper consumes id, category, cvss, location and a
+4-level severity. from_dict bridges that gap.
+"""
+from compliance.services.cra_finding_mapper import (
+    ScannerFinding,
+    assess_findings_payload,
+)
+
+
+def _scanner_finding(**over):
+    """A finding shaped like compliance-scanner-agent emits it."""
+    base = {
+        "_id": "507f1f77bcf86cd799439011",
+        "repo_id": "r1",
+        "fingerprint": "fp-123",
+        "scanner": "semgrep",
+        "scan_type": "secret_detection",
+        "title": "Hardcoded credential in source",
+        "description": "A hardcoded password was found.",
+        "severity": "high",
+        "cwe": "CWE-798",
+        "cve": None,
+        "cvss_score": 8.2,
+        "file_path": "src/config.py",
+        "status": "open",
+    }
+    base.update(over)
+    return base
+
+
+def test_from_dict_maps_scanner_field_names():
+    f = ScannerFinding.from_dict(_scanner_finding())
+    assert f.id == "507f1f77bcf86cd799439011"   # from _id
+    assert f.category == "secret_detection"      # from scan_type
+    assert f.cvss == 8.2                          # from cvss_score
+    assert f.location == "src/config.py"          # from file_path
+    assert f.cwe == "CWE-798"
+    assert f.severity == "high"
+
+
+def test_from_dict_info_severity_becomes_low():
+    f = ScannerFinding.from_dict(_scanner_finding(severity="info"))
+    assert f.severity == "low"
+
+
+def test_from_dict_mongo_extended_json_id():
+    f = ScannerFinding.from_dict(_scanner_finding(_id={"$oid": "abc123"}))
+    assert f.id == "abc123"
+
+
+def test_scanner_finding_assesses_to_cra_requirement():
+    result = assess_findings_payload({"findings": [_scanner_finding()]})
+    mapped = result["mapped"]
+    assert len(mapped) == 1
+    m = mapped[0]
+    assert m["finding_id"] == "507f1f77bcf86cd799439011"
+    # CWE-798 -> CRA-AI-9/-8 (hardcoded credentials / no default passwords)
+    assert m["primary_requirement"].startswith("CRA-AI-")
+    assert m["requirement_ids"]
+    assert m["risk_level"] in {"LOW", "MEDIUM", "HIGH", "CRITICAL"}
@@ -0,0 +1,51 @@
+"""The MCP HTTP transport must gate on the Bearer token the scanner sends.
+
+The scanner registers us with an access_token (McpServerConfig.access_token) and
+calls over Streamable HTTP. When CRA_MCP_TOKEN is set, requests without the exact
+``Authorization: Bearer <token>`` must be rejected before reaching the MCP layer.
+"""
+import importlib
+
+import pytest
+
+pytest.importorskip("mcp")  # MCP SDK only present in the container/CI image
+
+_MCP_HDR = {"Accept": "application/json, text/event-stream", "Content-Type": "application/json"}
+_INIT = {
+    "jsonrpc": "2.0", "id": 1, "method": "initialize",
+    "params": {"protocolVersion": "2024-11-05", "capabilities": {},
+               "clientInfo": {"name": "t", "version": "1"}},
+}
+
+
+@pytest.fixture()
+def client(monkeypatch):
+    monkeypatch.setenv("CRA_MCP_TOKEN", "testtok")
+    from starlette.testclient import TestClient
+    import compliance.mcp.server as srv
+    importlib.reload(srv)  # rebuild app under the patched env
+    with TestClient(srv._build_http_app()) as c:
+        yield c
+
+
+def test_missing_token_rejected(client):
+    r = client.post("/mcp", json=_INIT, headers=_MCP_HDR)
+    assert r.status_code == 401
+
+
+def test_wrong_token_rejected(client):
+    r = client.post("/mcp", json=_INIT, headers={**_MCP_HDR, "Authorization": "Bearer nope"})
+    assert r.status_code == 401
+
+
+def test_correct_token_passes_auth(client):
+    # Reaches the MCP layer (any non-401 status) → the Bearer gate let it through.
+    r = client.post("/mcp", json=_INIT, headers={**_MCP_HDR, "Authorization": "Bearer testtok"})
+    assert r.status_code != 401
+
+
+def test_tools_registered():
+    import compliance.mcp.server as srv
+    importlib.reload(srv)
+    names = {t.name for t in srv.mcp._tool_manager.list_tools()}
+    assert {"cra_assess_findings", "cra_list_requirements"} <= names
@@ -40,6 +40,9 @@ services:
      core-health-check:
        condition: service_completed_successfully

+  mcp-compliance:
+    platform: linux/amd64
+
  ai-compliance-sdk:
    platform: linux/amd64
    depends_on:
@@ -144,6 +144,31 @@ services:
    networks:
      - breakpilot-network

+  # MCP server for the external repo-scanner (compliance-scanner-agent).
+  # Standalone: exposes cra_assess_findings over Streamable HTTP + Bearer.
+  # Pure/deterministic (no DB/core) — isolated from the main API on purpose.
+  mcp-compliance:
+    build:
+      args:
+        BUILD_SHA: ${BUILD_SHA:-unknown}
+      context: ./backend-compliance
+      dockerfile: Dockerfile
+    container_name: bp-compliance-mcp
+    platform: linux/arm64
+    command: ["python", "-m", "compliance.mcp.server"]
+    expose:
+      - "8099"
+    ports:
+      - "8099:8099"
+    environment:
+      MCP_PORT: 8099
+      CRA_MCP_TOKEN: ${CRA_MCP_TOKEN:-}
+    healthcheck:
+      disable: true
+    restart: unless-stopped
+    networks:
+      - breakpilot-network
+
  # =========================================================
  # SDK SERVICES
  # =========================================================