feat(cra): standalone CRA finding->Annex I risk mapper + MCP interface

Deterministic mapper (no DB/LLM): repo-scanner findings -> the CRA Annex I
essential requirement(s) they violate -> risk level -> remediation measures +
coverage. Reuses the existing Annex I spine (cra_annex_i_data). The MCP server
(compliance/mcp/server.py, stdio) is the thin transport the external scanner
queries; all logic lives in the fully-tested mapper. Works standalone (no
project/FMEA required). No DB migrations.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-13 20:22:34 +02:00
parent 3489eaf8b0
commit a73b996381
5 changed files with 387 additions and 0 deletions
@@ -0,0 +1 @@
"""MCP Server for Compliance Document Checking."""
@@ -0,0 +1,87 @@
"""MCP server: the interface the external repo-scanner queries for CRA risk.
We are the MCP *server*; the scanner is the client and asks us, in a targeted
way, to turn its findings into a CRA (Cyber Resilience Act) risk assessment. All
assessment logic lives in the deterministic, fully-tested
compliance.services.cra_finding_mapper — this module is only the MCP transport
glue (stdio). Run as: ``python -m compliance.mcp.server``.
Transport note: stdio is the default. If the scanner needs HTTP/streamable
transport instead, only the ``main()`` runner below changes.
"""
import asyncio
import json
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent
from compliance.services.cra_finding_mapper import assess_findings_payload
from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS
server = Server("breakpilot-cra")
_FINDINGS_SCHEMA = {
"type": "object",
"properties": {
"findings": {
"type": "array",
"description": "Findings the scanner already produced.",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"title": {"type": "string"},
"description": {"type": "string"},
"category": {"type": "string", "description": "e.g. crypto, auth, secrets, dependency"},
"cwe": {"type": "string", "description": "e.g. CWE-798"},
"severity": {"type": "string", "enum": ["critical", "high", "medium", "low"]},
"cvss": {"type": "number"},
"location": {"type": "string"},
},
"required": ["id"],
},
}
},
"required": ["findings"],
}
@server.list_tools()
async def list_tools() -> list:
return [
Tool(
name="cra_assess_findings",
description=(
"Map repo-scanner findings to the CRA Annex I essential requirements they "
"violate, derive a risk level per finding, and return the remediation measures "
"plus coverage. Deterministic; works standalone (no project/FMEA needed)."
),
inputSchema=_FINDINGS_SCHEMA,
),
Tool(
name="cra_list_requirements",
description="Return the 40 CRA Annex I essential requirements (the assessment spine).",
inputSchema={"type": "object", "properties": {}},
),
]
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list:
if name == "cra_assess_findings":
result = assess_findings_payload(arguments or {})
elif name == "cra_list_requirements":
result = {"requirements": ANNEX_I_REQUIREMENTS}
else:
raise ValueError("Unknown tool: {}".format(name))
return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False))]
async def main() -> None:
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, server.create_initialization_options())
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,212 @@
"""Deterministic mapper: scanner finding -> CRA Annex I requirement -> risk + measures.
This is the brain of the standalone CRA cyber risk assessment (the layer the
external repo-scanner queries via MCP). It takes findings the scanner already
produced (we do NOT re-scan, and we do NOT duplicate the CVE/NVD knowledge the
scanner owns) and maps each to the CRA Annex I essential requirement(s) it
violates, derives a risk level, and attaches the remediation measures.
Pure + deterministic: no DB, no LLM, no network. Same input -> same output.
The requirement/measure spine is the single source of truth in
compliance.api.cra_annex_i_data (pure data, no FastAPI dependency).
"""
from dataclasses import dataclass, field, asdict
from typing import Optional
from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS, MEASURES, DEADLINES
_REQ_INDEX = {r["req_id"]: r for r in ANNEX_I_REQUIREMENTS}
_SEV_ORDER = {"LOW": 1, "MEDIUM": 2, "HIGH": 3, "CRITICAL": 4}
_SEV_BY_RANK = {v: k for k, v in _SEV_ORDER.items()}
# High-confidence CWE -> CRA-AI requirement(s), primary first. Keyed by the bare
# CWE number. Only req_ids that exist in the spine are referenced.
_CWE_TO_REQ = {
798: ["CRA-AI-9", "CRA-AI-8"], 259: ["CRA-AI-8"], 1392: ["CRA-AI-8"], 521: ["CRA-AI-8"],
287: ["CRA-AI-7"], 306: ["CRA-AI-7"], 304: ["CRA-AI-7"],
862: ["CRA-AI-12"], 863: ["CRA-AI-12"],
732: ["CRA-AI-4"], 269: ["CRA-AI-4"], 250: ["CRA-AI-4"],
327: ["CRA-AI-13"], 326: ["CRA-AI-13"], 328: ["CRA-AI-13"], 916: ["CRA-AI-9"],
319: ["CRA-AI-15"], 311: ["CRA-AI-15"], 312: ["CRA-AI-14"],
522: ["CRA-AI-9"], 320: ["CRA-AI-16"], 200: ["CRA-AI-13"],
1188: ["CRA-AI-1"], 16: ["CRA-AI-1"],
1004: ["CRA-AI-10"], 614: ["CRA-AI-10"], 384: ["CRA-AI-10"], 613: ["CRA-AI-10"], 352: ["CRA-AI-10"],
307: ["CRA-AI-11"],
345: ["CRA-AI-6"], 353: ["CRA-AI-6"], 494: ["CRA-AI-30"],
778: ["CRA-AI-24"], 532: ["CRA-AI-24"], 117: ["CRA-AI-27"],
1104: ["CRA-AI-22"], 1035: ["CRA-AI-22"], 937: ["CRA-AI-22"], 1395: ["CRA-AI-22"],
79: ["CRA-AI-20"], 89: ["CRA-AI-20"], 77: ["CRA-AI-20"], 78: ["CRA-AI-20"], 22: ["CRA-AI-20"], 20: ["CRA-AI-20"],
}
# Substring fallback (lowercase) against category + title + description, primary first.
_KEYWORD_TO_REQ = [
("default password", "CRA-AI-8"), ("hardcoded", "CRA-AI-9"), ("secret", "CRA-AI-9"),
("credential", "CRA-AI-9"), ("password", "CRA-AI-8"), ("mfa", "CRA-AI-7"),
("authentication", "CRA-AI-7"), ("authoriz", "CRA-AI-12"), ("access control", "CRA-AI-12"),
("privilege", "CRA-AI-4"), ("tls", "CRA-AI-15"), ("ssl", "CRA-AI-15"), ("cipher", "CRA-AI-13"),
("crypto", "CRA-AI-13"), ("encrypt", "CRA-AI-13"), ("cleartext", "CRA-AI-15"), ("at rest", "CRA-AI-14"),
("session", "CRA-AI-10"), ("cookie", "CRA-AI-10"), ("csrf", "CRA-AI-10"), ("brute", "CRA-AI-11"),
("rate limit", "CRA-AI-11"), ("sbom", "CRA-AI-23"), ("dependency", "CRA-AI-22"),
("outdated", "CRA-AI-22"), ("known vuln", "CRA-AI-22"), ("cve", "CRA-AI-22"),
("injection", "CRA-AI-20"), ("xss", "CRA-AI-20"), ("sql", "CRA-AI-20"), ("traversal", "CRA-AI-20"),
("logging", "CRA-AI-24"), ("update", "CRA-AI-28"), ("signature", "CRA-AI-29"),
("integrity", "CRA-AI-6"), ("debug", "CRA-AI-1"), ("config", "CRA-AI-1"),
]
@dataclass
class ScannerFinding:
"""One finding emitted by the external repo-scanner."""
id: str
title: str = ""
description: str = ""
category: str = ""
cwe: str = ""
severity: str = "" # critical | high | medium | low (scanner's rating)
cvss: Optional[float] = None
location: str = ""
@classmethod
def from_dict(cls, d: dict) -> "ScannerFinding":
return cls(
id=str(d.get("id") or d.get("finding_id") or ""),
title=d.get("title", "") or d.get("name", ""),
description=d.get("description", "") or d.get("detail", ""),
category=d.get("category", "") or d.get("type", ""),
cwe=str(d.get("cwe", "") or ""),
severity=d.get("severity", "") or "",
cvss=d.get("cvss"),
location=d.get("location", "") or d.get("path", ""),
)
@dataclass
class MappedFinding:
finding_id: str
requirement_ids: list = field(default_factory=list)
primary_requirement: str = ""
annex_anchor: str = ""
iso27001_ref: list = field(default_factory=list)
risk_level: str = "LOW"
measures: list = field(default_factory=list)
rationale: str = ""
unmapped: bool = False
@dataclass
class CRAAssessment:
findings_total: int
mapped: list = field(default_factory=list)
by_risk: dict = field(default_factory=dict)
requirements_touched: list = field(default_factory=list)
open_measures: list = field(default_factory=list) # [{id, description}]
unmapped_findings: list = field(default_factory=list)
coverage_pct: float = 0.0
deadlines: list = field(default_factory=lambda: list(DEADLINES))
def _cwe_num(cwe: str) -> Optional[int]:
digits = "".join(ch for ch in str(cwe) if ch.isdigit())
return int(digits) if digits else None
def _sev_from_cvss(cvss: Optional[float]) -> str:
if cvss is None:
return ""
if cvss >= 9.0:
return "CRITICAL"
if cvss >= 7.0:
return "HIGH"
if cvss >= 4.0:
return "MEDIUM"
if cvss > 0:
return "LOW"
return ""
def _rank(sev: str) -> int:
return _SEV_ORDER.get((sev or "").upper(), 0)
def _candidate_reqs(f: ScannerFinding) -> list:
"""Deterministic requirement candidates, primary first, deduped, existing only."""
out: list = []
num = _cwe_num(f.cwe)
if num in _CWE_TO_REQ:
out.extend(_CWE_TO_REQ[num])
haystack = " ".join([f.category, f.title, f.description]).lower()
for kw, rid in _KEYWORD_TO_REQ:
if kw in haystack:
out.append(rid)
seen, result = set(), []
for rid in out:
if rid in _REQ_INDEX and rid not in seen:
seen.add(rid)
result.append(rid)
return result
def map_finding(f: ScannerFinding) -> MappedFinding:
reqs = _candidate_reqs(f)
finding_sev = (f.severity or _sev_from_cvss(f.cvss)).upper()
if not reqs:
return MappedFinding(
finding_id=f.id, risk_level=_SEV_BY_RANK.get(_rank(finding_sev), "LOW"),
rationale="Kein eindeutiger CRA-Anforderungsbezug erkannt — manuelle Pruefung.",
unmapped=True,
)
primary = _REQ_INDEX[reqs[0]]
risk_rank = max(_rank(finding_sev), _rank(primary["severity"]))
measures: list = []
for rid in reqs:
for m in _REQ_INDEX[rid].get("mapped_measures", []):
if m not in measures:
measures.append(m)
return MappedFinding(
finding_id=f.id,
requirement_ids=reqs,
primary_requirement=primary["req_id"],
annex_anchor=primary.get("annex_anchor", ""),
iso27001_ref=list(primary.get("iso27001_ref", [])),
risk_level=_SEV_BY_RANK.get(risk_rank, "LOW"),
measures=measures,
rationale="{}: {}".format(primary["req_id"], primary.get("title", "")),
)
def assess_findings(findings: list) -> CRAAssessment:
"""Map a list of ScannerFinding into a deterministic CRA assessment."""
mapped = [map_finding(f) for f in findings]
by_risk = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
reqs_touched, measure_ids, unmapped = set(), [], []
for m in mapped:
by_risk[m.risk_level] = by_risk.get(m.risk_level, 0) + 1
if m.unmapped:
unmapped.append(m.finding_id)
for rid in m.requirement_ids:
reqs_touched.add(rid)
for mid in m.measures:
if mid not in measure_ids:
measure_ids.append(mid)
total = len(findings)
covered = total - len(unmapped)
return CRAAssessment(
findings_total=total,
mapped=mapped,
by_risk=by_risk,
requirements_touched=sorted(reqs_touched),
open_measures=[{"id": mid, "description": MEASURES.get(mid, "")} for mid in measure_ids],
unmapped_findings=unmapped,
coverage_pct=round(100.0 * covered / total, 1) if total else 0.0,
)
def assess_findings_payload(payload: dict) -> dict:
"""MCP/HTTP entry: {"findings": [ {...}, ... ]} -> assessment dict.
This is the testable tool body the MCP server wraps (kept transport-free).
"""
raw = payload.get("findings", []) if isinstance(payload, dict) else []
findings = [ScannerFinding.from_dict(d) for d in raw]
assessment = assess_findings(findings)
return asdict(assessment) # recurses into nested MappedFinding dataclasses