"""Deterministic mapper: scanner finding -> CRA Annex I requirement -> risk + measures. This is the brain of the standalone CRA cyber risk assessment (the layer the external repo-scanner queries via MCP). It takes findings the scanner already produced (we do NOT re-scan, and we do NOT duplicate the CVE/NVD knowledge the scanner owns) and maps each to the CRA Annex I essential requirement(s) it violates, derives a risk level, and attaches the remediation measures. Pure + deterministic: no DB, no LLM, no network. Same input -> same output. The requirement/measure spine is the single source of truth in compliance.api.cra_annex_i_data (pure data, no FastAPI dependency). """ from dataclasses import dataclass, field, asdict from typing import Optional from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS, MEASURES, DEADLINES from compliance.services.cra_security_crosswalk import security_refs_for from compliance.services.cra_prioritizer import prioritize, OBJECTIVES from compliance.services.cra_safety_bridge import build_cross_links _REQ_INDEX = {r["req_id"]: r for r in ANNEX_I_REQUIREMENTS} _SEV_ORDER = {"LOW": 1, "MEDIUM": 2, "HIGH": 3, "CRITICAL": 4} _SEV_BY_RANK = {v: k for k, v in _SEV_ORDER.items()} # High-confidence CWE -> CRA-AI requirement(s), primary first. Keyed by the bare # CWE number. Only req_ids that exist in the spine are referenced. _CWE_TO_REQ = { 798: ["CRA-AI-9", "CRA-AI-8"], 259: ["CRA-AI-8"], 1392: ["CRA-AI-8"], 521: ["CRA-AI-8"], 287: ["CRA-AI-7"], 306: ["CRA-AI-7"], 304: ["CRA-AI-7"], 862: ["CRA-AI-12"], 863: ["CRA-AI-12"], 732: ["CRA-AI-4"], 269: ["CRA-AI-4"], 250: ["CRA-AI-4"], 327: ["CRA-AI-13"], 326: ["CRA-AI-13"], 328: ["CRA-AI-13"], 916: ["CRA-AI-9"], 319: ["CRA-AI-15"], 311: ["CRA-AI-15"], 312: ["CRA-AI-14"], 522: ["CRA-AI-9"], 320: ["CRA-AI-16"], 200: ["CRA-AI-13"], 1188: ["CRA-AI-1"], 16: ["CRA-AI-1"], 1004: ["CRA-AI-10"], 614: ["CRA-AI-10"], 384: ["CRA-AI-10"], 613: ["CRA-AI-10"], 352: ["CRA-AI-10"], 307: ["CRA-AI-11"], 345: ["CRA-AI-6"], 353: ["CRA-AI-6"], 494: ["CRA-AI-30"], 778: ["CRA-AI-24"], 532: ["CRA-AI-24"], 117: ["CRA-AI-27"], 1104: ["CRA-AI-22"], 1035: ["CRA-AI-22"], 937: ["CRA-AI-22"], 1395: ["CRA-AI-22"], 79: ["CRA-AI-20"], 89: ["CRA-AI-20"], 77: ["CRA-AI-20"], 78: ["CRA-AI-20"], 22: ["CRA-AI-20"], 20: ["CRA-AI-20"], } # Substring fallback (lowercase) against category + title + description, primary first. _KEYWORD_TO_REQ = [ ("default password", "CRA-AI-8"), ("hardcoded", "CRA-AI-9"), ("secret", "CRA-AI-9"), ("credential", "CRA-AI-9"), ("password", "CRA-AI-8"), ("mfa", "CRA-AI-7"), ("authentication", "CRA-AI-7"), ("authoriz", "CRA-AI-12"), ("access control", "CRA-AI-12"), ("privilege", "CRA-AI-4"), ("tls", "CRA-AI-15"), ("ssl", "CRA-AI-15"), ("cipher", "CRA-AI-13"), ("crypto", "CRA-AI-13"), ("encrypt", "CRA-AI-13"), ("cleartext", "CRA-AI-15"), ("at rest", "CRA-AI-14"), ("session", "CRA-AI-10"), ("cookie", "CRA-AI-10"), ("csrf", "CRA-AI-10"), ("brute", "CRA-AI-11"), ("rate limit", "CRA-AI-11"), ("sbom", "CRA-AI-23"), ("dependency", "CRA-AI-22"), ("outdated", "CRA-AI-22"), ("known vuln", "CRA-AI-22"), ("cve", "CRA-AI-22"), ("injection", "CRA-AI-20"), ("xss", "CRA-AI-20"), ("sql", "CRA-AI-20"), ("traversal", "CRA-AI-20"), ("logging", "CRA-AI-24"), ("update", "CRA-AI-28"), ("signature", "CRA-AI-29"), ("integrity", "CRA-AI-6"), ("debug", "CRA-AI-1"), ("config", "CRA-AI-1"), ] @dataclass class ScannerFinding: """One finding emitted by the external repo-scanner.""" id: str title: str = "" description: str = "" category: str = "" cwe: str = "" severity: str = "" # critical | high | medium | low (scanner's rating) cvss: Optional[float] = None location: str = "" safety_impact: bool = False # compromise can defeat a CE safety function (personal harm) exploited: bool = False # actively / publicly exploited @classmethod def from_dict(cls, d: dict) -> "ScannerFinding": return cls( id=str(d.get("id") or d.get("finding_id") or ""), title=d.get("title", "") or d.get("name", ""), description=d.get("description", "") or d.get("detail", ""), category=d.get("category", "") or d.get("type", ""), cwe=str(d.get("cwe", "") or ""), severity=d.get("severity", "") or "", cvss=d.get("cvss"), location=d.get("location", "") or d.get("path", ""), safety_impact=bool(d.get("safety_impact", False)), exploited=bool(d.get("exploited", False)), ) @dataclass class MappedFinding: finding_id: str requirement_ids: list = field(default_factory=list) primary_requirement: str = "" annex_anchor: str = "" iso27001_ref: list = field(default_factory=list) evidence_type: str = "" # code | process | hybrid | document (from the requirement) risk_level: str = "LOW" measures: list = field(default_factory=list) nist_refs: list = field(default_factory=list) # NIST 800-53 control IDs (golden-set crosswalk) owasp_refs: list = field(default_factory=list) # [{code, label}] OWASP Top 10:2021 rationale: str = "" unmapped: bool = False # carried from the finding + set by the prioritizer (cra_prioritizer.prioritize) safety_impact: bool = False exploited: bool = False objective: str = "" priority_tier: str = "" # P0 (non-negotiable floor) | P1 | P2 | P3 priority_score: int = 0 quick_win: bool = False priority_reason: str = "" @dataclass class CRAAssessment: findings_total: int mapped: list = field(default_factory=list) by_risk: dict = field(default_factory=dict) requirements_touched: list = field(default_factory=list) open_measures: list = field(default_factory=list) # [{id, description}] unmapped_findings: list = field(default_factory=list) coverage_pct: float = 0.0 quick_wins: list = field(default_factory=list) # finding_ids: high impact, low effort objectives: list = field(default_factory=lambda: list(OBJECTIVES)) cross_links: list = field(default_factory=list) # cyber-meets-safety bridge deadlines: list = field(default_factory=lambda: list(DEADLINES)) def _cwe_num(cwe: str) -> Optional[int]: digits = "".join(ch for ch in str(cwe) if ch.isdigit()) return int(digits) if digits else None def _sev_from_cvss(cvss: Optional[float]) -> str: if cvss is None: return "" if cvss >= 9.0: return "CRITICAL" if cvss >= 7.0: return "HIGH" if cvss >= 4.0: return "MEDIUM" if cvss > 0: return "LOW" return "" def _rank(sev: str) -> int: return _SEV_ORDER.get((sev or "").upper(), 0) def _candidate_reqs(f: ScannerFinding) -> list: """Deterministic requirement candidates, primary first, deduped, existing only.""" out: list = [] num = _cwe_num(f.cwe) if num in _CWE_TO_REQ: out.extend(_CWE_TO_REQ[num]) haystack = " ".join([f.category, f.title, f.description]).lower() for kw, rid in _KEYWORD_TO_REQ: if kw in haystack: out.append(rid) seen, result = set(), [] for rid in out: if rid in _REQ_INDEX and rid not in seen: seen.add(rid) result.append(rid) return result def map_finding(f: ScannerFinding) -> MappedFinding: reqs = _candidate_reqs(f) finding_sev = (f.severity or _sev_from_cvss(f.cvss)).upper() if not reqs: return MappedFinding( finding_id=f.id, risk_level=_SEV_BY_RANK.get(_rank(finding_sev), "LOW"), rationale="Kein eindeutiger CRA-Anforderungsbezug erkannt — manuelle Pruefung.", unmapped=True, safety_impact=f.safety_impact, exploited=f.exploited, ) primary = _REQ_INDEX[reqs[0]] risk_rank = max(_rank(finding_sev), _rank(primary["severity"])) measures: list = [] for rid in reqs: for m in _REQ_INDEX[rid].get("mapped_measures", []): if m not in measures: measures.append(m) refs = security_refs_for(reqs) return MappedFinding( finding_id=f.id, requirement_ids=reqs, primary_requirement=primary["req_id"], annex_anchor=primary.get("annex_anchor", ""), iso27001_ref=list(primary.get("iso27001_ref", [])), evidence_type=primary.get("evidence_type", ""), risk_level=_SEV_BY_RANK.get(risk_rank, "LOW"), measures=measures, nist_refs=refs["nist"], owasp_refs=refs["owasp"], rationale="{}: {}".format(primary["req_id"], primary.get("title", "")), safety_impact=f.safety_impact, exploited=f.exploited, ) def assess_findings(findings: list, weights=None, safety_functions=None) -> CRAAssessment: """Map findings to a deterministic CRA assessment, then prioritise them. weights: {objective: 'high'|'medium'|'low'} — customer priorities for the discretionary tier (the P0 floor ignores them). safety_functions: CE-risk-assessment safety functions for the cyber-meets- safety bridge; a finding that can defeat one is flagged safety_impact (→ P0). """ mapped = [map_finding(f) for f in findings] cross_links = build_cross_links(mapped, safety_functions) flagged = {fid for cl in cross_links for fid in cl["cyber_finding_ids"]} for m in mapped: if m.finding_id in flagged: m.safety_impact = True mapped = prioritize(mapped, weights) by_risk = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0} reqs_touched, measure_ids, unmapped = set(), [], [] for m in mapped: by_risk[m.risk_level] = by_risk.get(m.risk_level, 0) + 1 if m.unmapped: unmapped.append(m.finding_id) for rid in m.requirement_ids: reqs_touched.add(rid) for mid in m.measures: if mid not in measure_ids: measure_ids.append(mid) total = len(findings) covered = total - len(unmapped) return CRAAssessment( findings_total=total, mapped=mapped, by_risk=by_risk, requirements_touched=sorted(reqs_touched), open_measures=[{"id": mid, "description": MEASURES.get(mid, "")} for mid in measure_ids], unmapped_findings=unmapped, coverage_pct=round(100.0 * covered / total, 1) if total else 0.0, quick_wins=[m.finding_id for m in mapped if m.quick_win], cross_links=cross_links, ) def assess_findings_payload(payload: dict) -> dict: """MCP/HTTP entry: {"findings": [ {...}, ... ]} -> assessment dict. This is the testable tool body the MCP server wraps (kept transport-free). """ raw = payload.get("findings", []) if isinstance(payload, dict) else [] weights = payload.get("weights") if isinstance(payload, dict) else None safety_functions = payload.get("safety_functions") if isinstance(payload, dict) else None findings = [ScannerFinding.from_dict(d) for d in raw] assessment = assess_findings(findings, weights, safety_functions) return asdict(assessment) # recurses into nested MappedFinding dataclasses