Files
breakpilot-compliance/backend-compliance/tests/test_cra_finding_mapper.py
T
Benjamin Bönisch 72093e5501
CI / detect-changes (push) Successful in 17s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 13s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 25s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
fix(cra): Scanner-Findings vollstaendig mappen + assess-from-scanner-Latenz senken
Punkt 2 (Coverage): semgrep/gdpr-Findings ohne CWE blieben unmapped (~21%).
Der Mapper nutzt jetzt den scanner rule_id + gezielte Keywords (gdpr ->
Datenminimierung CRA-AI-17, path-traversal/prototype-pollution -> CRA-AI-20,
nginx-header/Docker-Hardening -> CRA-AI-1/4, insecure-websocket -> CRA-AI-15).
Reale Scanner-Daten: unmapped 19/92 -> 0/92 (Coverage 100%).

Punkt 3 (Latenz): enrich_findings_with_breadth lief ~6 Aggregat-Queries je
(use_case,sub_topic)-Paar, nutzte aber nur die Liste. Jetzt EINE batched Query
(breadth_controls_batch) fuer alle Paare + Prozess-Cache (TTL 1800s). macmini:
cold 0,23s / warm 0,000s. Prod-Root-Cause: atom_classification ohne
(use_case,sub_topic)-Index nach DB-Swap -> Index dem DB-Owner empfohlen.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-17 13:17:51 +02:00

154 lines
6.5 KiB
Python

"""Tests for the deterministic CRA finding -> Annex I requirement mapper."""
from compliance.services.cra_finding_mapper import (
ScannerFinding, map_finding, assess_findings, assess_findings_payload,
)
from compliance.services.cra_security_crosswalk import CRA_SECURITY_CROSSWALK
from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS
def test_hardcoded_credentials_cwe_maps_to_credential_requirement():
m = map_finding(ScannerFinding(id="f1", title="Hardcoded API key", cwe="CWE-798", severity="high"))
assert m.primary_requirement == "CRA-AI-9"
assert "CRA-AI-8" in m.requirement_ids
assert not m.unmapped
assert m.annex_anchor # spine carries the Annex anchor
def test_mapped_finding_carries_evidence_type():
m = map_finding(ScannerFinding(id="e", title="default password", cwe="CWE-259", severity="high"))
assert m.evidence_type == "code" # CRA-AI-8 is code-checkable
def test_default_password_is_critical_and_carries_measure_M542():
m = map_finding(ScannerFinding(id="f2", title="Universal default password", cwe="CWE-259", severity="critical"))
assert m.primary_requirement == "CRA-AI-8"
assert m.risk_level == "CRITICAL"
assert "M542" in m.measures # default-password change measure from the spine
def test_weak_tls_via_keyword_maps_to_transport_crypto():
m = map_finding(ScannerFinding(id="f3", title="TLS 1.0 enabled", description="weak protocol", severity="high"))
assert m.primary_requirement == "CRA-AI-15"
assert not m.unmapped
def test_dependency_cve_without_cwe_maps_to_dependency_monitoring():
m = map_finding(ScannerFinding(id="f4", title="lodash 4.17.4 has known CVE", category="dependency", severity="high"))
assert m.primary_requirement == "CRA-AI-22"
def test_severity_derived_from_cvss_when_missing():
m = map_finding(ScannerFinding(id="f5", title="cleartext transmission", cwe="CWE-319", cvss=9.4))
# finding sev (CRITICAL from cvss) escalates over requirement sev
assert m.risk_level == "CRITICAL"
def test_risk_is_max_of_finding_and_requirement_severity():
# low-severity finding but the requirement (CRA-AI-8, CRITICAL) dominates
m = map_finding(ScannerFinding(id="f6", title="default password", severity="low"))
assert m.primary_requirement == "CRA-AI-8"
assert m.risk_level == "CRITICAL"
def test_unmapped_finding_is_flagged_not_invented():
m = map_finding(ScannerFinding(id="f7", title="zzz unrelated note", severity="low"))
assert m.unmapped
assert m.requirement_ids == []
def test_gdpr_scanner_finding_maps_to_data_minimisation():
# gdpr-pattern scanner finding (no cwe) -> CRA Annex I data minimisation
m = map_finding(ScannerFinding(
id="g1", category="gdpr", title="Data collection without apparent consent mechanism",
severity="medium"))
assert m.primary_requirement == "CRA-AI-17"
assert not m.unmapped
def test_semgrep_path_traversal_via_rule_id_maps_to_secure_testing():
# semgrep finding with NO cwe — the rule_id pins it as path traversal
m = map_finding(ScannerFinding(
id="s1", category="sast", rule_id="javascript.express.security.express-path-join-resolve",
title="Possible writing outside of the destination", severity="medium"))
assert m.primary_requirement == "CRA-AI-20"
assert not m.unmapped
def test_semgrep_prototype_pollution_maps_to_secure_testing():
m = map_finding(ScannerFinding(
id="s2", category="sast", title="Possibility of prototype polluting function detected",
severity="medium"))
assert m.primary_requirement == "CRA-AI-20"
def test_nginx_header_redefinition_maps_to_secure_config():
m = map_finding(ScannerFinding(
id="s3", category="sast", rule_id="generic.nginx.security.header-redefinition",
title="The 'add_header' directive is called in a 'location' block", severity="medium"))
assert m.primary_requirement == "CRA-AI-1"
assert not m.unmapped
def test_assessment_aggregates_and_coverage():
findings = [
ScannerFinding(id="a", cwe="CWE-259", severity="critical"), # CRA-AI-8
ScannerFinding(id="b", title="TLS 1.0", severity="high"), # CRA-AI-15
ScannerFinding(id="c", title="zzz nothing", severity="low"), # unmapped
]
a = assess_findings(findings)
assert a.findings_total == 3
assert sum(a.by_risk.values()) == 3
assert "CRA-AI-8" in a.requirements_touched
assert "c" in a.unmapped_findings
assert a.coverage_pct == round(100.0 * 2 / 3, 1)
assert any(meas["id"] == "M542" for meas in a.open_measures)
assert all("description" in meas for meas in a.open_measures)
def test_payload_entry_is_json_serializable_and_deterministic():
payload = {"findings": [
{"id": "x", "cwe": "CWE-798", "severity": "high"},
{"id": "y", "type": "dependency", "name": "openssl CVE-2024-x", "severity": "critical"},
]}
r1 = assess_findings_payload(payload)
r2 = assess_findings_payload(payload)
assert r1 == r2 # deterministic
assert r1["findings_total"] == 2
assert isinstance(r1["mapped"], list) and isinstance(r1["mapped"][0], dict)
by_id = {m["finding_id"]: m for m in r1["mapped"]} # order is now priority-sorted
assert by_id["x"]["primary_requirement"] == "CRA-AI-9"
def test_empty_payload_is_safe():
r = assess_findings_payload({})
assert r["findings_total"] == 0
assert r["coverage_pct"] == 0.0
# --- Security golden-set crosswalk (NIST 800-53 + OWASP Top 10:2021) ---
def test_default_password_carries_nist_and_owasp_refs():
m = map_finding(ScannerFinding(id="g1", title="default password", cwe="CWE-259", severity="high"))
assert "IA-5" in m.nist_refs
assert any(o["code"] == "A07:2021" for o in m.owasp_refs)
def test_dependency_finding_maps_to_owasp_a06_and_nist_ra5():
m = map_finding(ScannerFinding(id="g2", title="outdated dependency", category="dependency", severity="high"))
assert "RA-5" in m.nist_refs
assert any(o["code"] == "A06:2021" for o in m.owasp_refs)
def test_unmapped_finding_has_no_security_refs():
m = map_finding(ScannerFinding(id="g3", title="zzz nothing", severity="low"))
assert m.nist_refs == [] and m.owasp_refs == []
def test_golden_set_covers_every_requirement():
# Completeness invariant: every CRA-AI requirement has a crosswalk entry
# with at least one NIST control id (OWASP may be empty for process reqs).
for req in ANNEX_I_REQUIREMENTS:
rid = req["req_id"]
assert rid in CRA_SECURITY_CROSSWALK, "missing crosswalk for {}".format(rid)
assert CRA_SECURITY_CROSSWALK[rid]["nist"], "no NIST refs for {}".format(rid)