Files
breakpilot-compliance/backend-compliance/compliance/services/cra_finding_mapper.py
T
Benjamin Admin ee1632cd52 feat(cra): snapshot/history UI + measure-class (code-fix vs process) UI
Snapshot/history: "Snapshot speichern" + a version list (status, date, coverage)
you can click through — makes the CRA Art. 13 running system visible (backend
endpoints already live). Measure-class: each finding shows a remediation-class
badge from its CRA evidence_type ("Code-nah" = scan-locatable, code-fix in the
ticket possible; otherwise Prozess/Doku), and the measures section is relabelled
as the Sollzustand (process/build) — no auto-fix buttons on process measures.
Backend: MappedFinding now carries evidence_type.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-14 10:02:17 +02:00

256 lines
11 KiB
Python

"""Deterministic mapper: scanner finding -> CRA Annex I requirement -> risk + measures.
This is the brain of the standalone CRA cyber risk assessment (the layer the
external repo-scanner queries via MCP). It takes findings the scanner already
produced (we do NOT re-scan, and we do NOT duplicate the CVE/NVD knowledge the
scanner owns) and maps each to the CRA Annex I essential requirement(s) it
violates, derives a risk level, and attaches the remediation measures.
Pure + deterministic: no DB, no LLM, no network. Same input -> same output.
The requirement/measure spine is the single source of truth in
compliance.api.cra_annex_i_data (pure data, no FastAPI dependency).
"""
from dataclasses import dataclass, field, asdict
from typing import Optional
from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS, MEASURES, DEADLINES
from compliance.services.cra_security_crosswalk import security_refs_for
from compliance.services.cra_prioritizer import prioritize, OBJECTIVES
from compliance.services.cra_safety_bridge import build_cross_links
_REQ_INDEX = {r["req_id"]: r for r in ANNEX_I_REQUIREMENTS}
_SEV_ORDER = {"LOW": 1, "MEDIUM": 2, "HIGH": 3, "CRITICAL": 4}
_SEV_BY_RANK = {v: k for k, v in _SEV_ORDER.items()}
# High-confidence CWE -> CRA-AI requirement(s), primary first. Keyed by the bare
# CWE number. Only req_ids that exist in the spine are referenced.
_CWE_TO_REQ = {
798: ["CRA-AI-9", "CRA-AI-8"], 259: ["CRA-AI-8"], 1392: ["CRA-AI-8"], 521: ["CRA-AI-8"],
287: ["CRA-AI-7"], 306: ["CRA-AI-7"], 304: ["CRA-AI-7"],
862: ["CRA-AI-12"], 863: ["CRA-AI-12"],
732: ["CRA-AI-4"], 269: ["CRA-AI-4"], 250: ["CRA-AI-4"],
327: ["CRA-AI-13"], 326: ["CRA-AI-13"], 328: ["CRA-AI-13"], 916: ["CRA-AI-9"],
319: ["CRA-AI-15"], 311: ["CRA-AI-15"], 312: ["CRA-AI-14"],
522: ["CRA-AI-9"], 320: ["CRA-AI-16"], 200: ["CRA-AI-13"],
1188: ["CRA-AI-1"], 16: ["CRA-AI-1"],
1004: ["CRA-AI-10"], 614: ["CRA-AI-10"], 384: ["CRA-AI-10"], 613: ["CRA-AI-10"], 352: ["CRA-AI-10"],
307: ["CRA-AI-11"],
345: ["CRA-AI-6"], 353: ["CRA-AI-6"], 494: ["CRA-AI-30"],
778: ["CRA-AI-24"], 532: ["CRA-AI-24"], 117: ["CRA-AI-27"],
1104: ["CRA-AI-22"], 1035: ["CRA-AI-22"], 937: ["CRA-AI-22"], 1395: ["CRA-AI-22"],
79: ["CRA-AI-20"], 89: ["CRA-AI-20"], 77: ["CRA-AI-20"], 78: ["CRA-AI-20"], 22: ["CRA-AI-20"], 20: ["CRA-AI-20"],
}
# Substring fallback (lowercase) against category + title + description, primary first.
_KEYWORD_TO_REQ = [
("default password", "CRA-AI-8"), ("hardcoded", "CRA-AI-9"), ("secret", "CRA-AI-9"),
("credential", "CRA-AI-9"), ("password", "CRA-AI-8"), ("mfa", "CRA-AI-7"),
("authentication", "CRA-AI-7"), ("authoriz", "CRA-AI-12"), ("access control", "CRA-AI-12"),
("privilege", "CRA-AI-4"), ("tls", "CRA-AI-15"), ("ssl", "CRA-AI-15"), ("cipher", "CRA-AI-13"),
("crypto", "CRA-AI-13"), ("encrypt", "CRA-AI-13"), ("cleartext", "CRA-AI-15"), ("at rest", "CRA-AI-14"),
("session", "CRA-AI-10"), ("cookie", "CRA-AI-10"), ("csrf", "CRA-AI-10"), ("brute", "CRA-AI-11"),
("rate limit", "CRA-AI-11"), ("sbom", "CRA-AI-23"), ("dependency", "CRA-AI-22"),
("outdated", "CRA-AI-22"), ("known vuln", "CRA-AI-22"), ("cve", "CRA-AI-22"),
("injection", "CRA-AI-20"), ("xss", "CRA-AI-20"), ("sql", "CRA-AI-20"), ("traversal", "CRA-AI-20"),
("logging", "CRA-AI-24"), ("update", "CRA-AI-28"), ("signature", "CRA-AI-29"),
("integrity", "CRA-AI-6"), ("debug", "CRA-AI-1"), ("config", "CRA-AI-1"),
]
@dataclass
class ScannerFinding:
"""One finding emitted by the external repo-scanner."""
id: str
title: str = ""
description: str = ""
category: str = ""
cwe: str = ""
severity: str = "" # critical | high | medium | low (scanner's rating)
cvss: Optional[float] = None
location: str = ""
safety_impact: bool = False # compromise can defeat a CE safety function (personal harm)
exploited: bool = False # actively / publicly exploited
@classmethod
def from_dict(cls, d: dict) -> "ScannerFinding":
return cls(
id=str(d.get("id") or d.get("finding_id") or ""),
title=d.get("title", "") or d.get("name", ""),
description=d.get("description", "") or d.get("detail", ""),
category=d.get("category", "") or d.get("type", ""),
cwe=str(d.get("cwe", "") or ""),
severity=d.get("severity", "") or "",
cvss=d.get("cvss"),
location=d.get("location", "") or d.get("path", ""),
safety_impact=bool(d.get("safety_impact", False)),
exploited=bool(d.get("exploited", False)),
)
@dataclass
class MappedFinding:
finding_id: str
requirement_ids: list = field(default_factory=list)
primary_requirement: str = ""
annex_anchor: str = ""
iso27001_ref: list = field(default_factory=list)
evidence_type: str = "" # code | process | hybrid | document (from the requirement)
risk_level: str = "LOW"
measures: list = field(default_factory=list)
nist_refs: list = field(default_factory=list) # NIST 800-53 control IDs (golden-set crosswalk)
owasp_refs: list = field(default_factory=list) # [{code, label}] OWASP Top 10:2021
rationale: str = ""
unmapped: bool = False
# carried from the finding + set by the prioritizer (cra_prioritizer.prioritize)
safety_impact: bool = False
exploited: bool = False
objective: str = ""
priority_tier: str = "" # P0 (non-negotiable floor) | P1 | P2 | P3
priority_score: int = 0
quick_win: bool = False
priority_reason: str = ""
@dataclass
class CRAAssessment:
findings_total: int
mapped: list = field(default_factory=list)
by_risk: dict = field(default_factory=dict)
requirements_touched: list = field(default_factory=list)
open_measures: list = field(default_factory=list) # [{id, description}]
unmapped_findings: list = field(default_factory=list)
coverage_pct: float = 0.0
quick_wins: list = field(default_factory=list) # finding_ids: high impact, low effort
objectives: list = field(default_factory=lambda: list(OBJECTIVES))
cross_links: list = field(default_factory=list) # cyber-meets-safety bridge
deadlines: list = field(default_factory=lambda: list(DEADLINES))
def _cwe_num(cwe: str) -> Optional[int]:
digits = "".join(ch for ch in str(cwe) if ch.isdigit())
return int(digits) if digits else None
def _sev_from_cvss(cvss: Optional[float]) -> str:
if cvss is None:
return ""
if cvss >= 9.0:
return "CRITICAL"
if cvss >= 7.0:
return "HIGH"
if cvss >= 4.0:
return "MEDIUM"
if cvss > 0:
return "LOW"
return ""
def _rank(sev: str) -> int:
return _SEV_ORDER.get((sev or "").upper(), 0)
def _candidate_reqs(f: ScannerFinding) -> list:
"""Deterministic requirement candidates, primary first, deduped, existing only."""
out: list = []
num = _cwe_num(f.cwe)
if num in _CWE_TO_REQ:
out.extend(_CWE_TO_REQ[num])
haystack = " ".join([f.category, f.title, f.description]).lower()
for kw, rid in _KEYWORD_TO_REQ:
if kw in haystack:
out.append(rid)
seen, result = set(), []
for rid in out:
if rid in _REQ_INDEX and rid not in seen:
seen.add(rid)
result.append(rid)
return result
def map_finding(f: ScannerFinding) -> MappedFinding:
reqs = _candidate_reqs(f)
finding_sev = (f.severity or _sev_from_cvss(f.cvss)).upper()
if not reqs:
return MappedFinding(
finding_id=f.id, risk_level=_SEV_BY_RANK.get(_rank(finding_sev), "LOW"),
rationale="Kein eindeutiger CRA-Anforderungsbezug erkannt — manuelle Pruefung.",
unmapped=True, safety_impact=f.safety_impact, exploited=f.exploited,
)
primary = _REQ_INDEX[reqs[0]]
risk_rank = max(_rank(finding_sev), _rank(primary["severity"]))
measures: list = []
for rid in reqs:
for m in _REQ_INDEX[rid].get("mapped_measures", []):
if m not in measures:
measures.append(m)
refs = security_refs_for(reqs)
return MappedFinding(
finding_id=f.id,
requirement_ids=reqs,
primary_requirement=primary["req_id"],
annex_anchor=primary.get("annex_anchor", ""),
iso27001_ref=list(primary.get("iso27001_ref", [])),
evidence_type=primary.get("evidence_type", ""),
risk_level=_SEV_BY_RANK.get(risk_rank, "LOW"),
measures=measures,
nist_refs=refs["nist"],
owasp_refs=refs["owasp"],
rationale="{}: {}".format(primary["req_id"], primary.get("title", "")),
safety_impact=f.safety_impact,
exploited=f.exploited,
)
def assess_findings(findings: list, weights=None, safety_functions=None) -> CRAAssessment:
"""Map findings to a deterministic CRA assessment, then prioritise them.
weights: {objective: 'high'|'medium'|'low'} — customer priorities for the
discretionary tier (the P0 floor ignores them).
safety_functions: CE-risk-assessment safety functions for the cyber-meets-
safety bridge; a finding that can defeat one is flagged safety_impact (→ P0).
"""
mapped = [map_finding(f) for f in findings]
cross_links = build_cross_links(mapped, safety_functions)
flagged = {fid for cl in cross_links for fid in cl["cyber_finding_ids"]}
for m in mapped:
if m.finding_id in flagged:
m.safety_impact = True
mapped = prioritize(mapped, weights)
by_risk = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
reqs_touched, measure_ids, unmapped = set(), [], []
for m in mapped:
by_risk[m.risk_level] = by_risk.get(m.risk_level, 0) + 1
if m.unmapped:
unmapped.append(m.finding_id)
for rid in m.requirement_ids:
reqs_touched.add(rid)
for mid in m.measures:
if mid not in measure_ids:
measure_ids.append(mid)
total = len(findings)
covered = total - len(unmapped)
return CRAAssessment(
findings_total=total,
mapped=mapped,
by_risk=by_risk,
requirements_touched=sorted(reqs_touched),
open_measures=[{"id": mid, "description": MEASURES.get(mid, "")} for mid in measure_ids],
unmapped_findings=unmapped,
coverage_pct=round(100.0 * covered / total, 1) if total else 0.0,
quick_wins=[m.finding_id for m in mapped if m.quick_win],
cross_links=cross_links,
)
def assess_findings_payload(payload: dict) -> dict:
"""MCP/HTTP entry: {"findings": [ {...}, ... ]} -> assessment dict.
This is the testable tool body the MCP server wraps (kept transport-free).
"""
raw = payload.get("findings", []) if isinstance(payload, dict) else []
weights = payload.get("weights") if isinstance(payload, dict) else None
safety_functions = payload.get("safety_functions") if isinstance(payload, dict) else None
findings = [ScannerFinding.from_dict(d) for d in raw]
assessment = assess_findings(findings, weights, safety_functions)
return asdict(assessment) # recurses into nested MappedFinding dataclasses