72093e5501
CI / detect-changes (push) Successful in 17s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 13s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 25s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Punkt 2 (Coverage): semgrep/gdpr-Findings ohne CWE blieben unmapped (~21%). Der Mapper nutzt jetzt den scanner rule_id + gezielte Keywords (gdpr -> Datenminimierung CRA-AI-17, path-traversal/prototype-pollution -> CRA-AI-20, nginx-header/Docker-Hardening -> CRA-AI-1/4, insecure-websocket -> CRA-AI-15). Reale Scanner-Daten: unmapped 19/92 -> 0/92 (Coverage 100%). Punkt 3 (Latenz): enrich_findings_with_breadth lief ~6 Aggregat-Queries je (use_case,sub_topic)-Paar, nutzte aber nur die Liste. Jetzt EINE batched Query (breadth_controls_batch) fuer alle Paare + Prozess-Cache (TTL 1800s). macmini: cold 0,23s / warm 0,000s. Prod-Root-Cause: atom_classification ohne (use_case,sub_topic)-Index nach DB-Swap -> Index dem DB-Owner empfohlen. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
109 lines
5.0 KiB
Python
109 lines
5.0 KiB
Python
"""Attach the atom-grain network_security regulatory breadth to CRA findings.
|
|
|
|
This is the "semantic breadth (2)" from the handoff: the shared Controls-API
|
|
(compliance.atom_classification, use_case=network_security, ~11k precise,
|
|
framework-traceable obligations). It runs at the ENDPOINT/VIEW layer — NOT in
|
|
the pure cra_finding_mapper, which stays deterministic. The CRA Annex I anchor +
|
|
the curated measure + the NIST/OWASP golden-set crosswalk remain the lead; this
|
|
is breadth + source evidence, not a replacement.
|
|
|
|
Only network_security is atom-grain — we query only that, always scoped by
|
|
sub_topic + limit (per the caveats).
|
|
"""
|
|
import time
|
|
|
|
from compliance.api.cra_annex_i_data import ANNEX_I_REQUIREMENTS
|
|
from compliance.services.use_case_controls import UseCaseControlsService
|
|
|
|
# CRA-AI requirement -> network_security sub_topic (via the NIST families per
|
|
# CRA-AI). Exact sub_topic keys verified against the live atom_classification.
|
|
_REQ_TO_SUBTOPIC = {
|
|
"CRA-AI-1": "secure_development", "CRA-AI-2": "network_segmentation",
|
|
"CRA-AI-3": "network_segmentation", "CRA-AI-4": "access_control",
|
|
"CRA-AI-5": "secure_development", "CRA-AI-6": "secure_development",
|
|
"CRA-AI-7": "authentication", "CRA-AI-8": "authentication", "CRA-AI-9": "authentication",
|
|
"CRA-AI-10": "access_control", "CRA-AI-11": "authentication", "CRA-AI-12": "access_control",
|
|
"CRA-AI-13": "cryptography", "CRA-AI-14": "cryptography", "CRA-AI-15": "cryptography",
|
|
"CRA-AI-16": "cryptography", "CRA-AI-17": "data_protection",
|
|
"CRA-AI-18": "secure_development", "CRA-AI-19": "secure_development", "CRA-AI-20": "secure_development",
|
|
"CRA-AI-21": "supply_chain_security", "CRA-AI-22": "vulnerability_management",
|
|
"CRA-AI-23": "supply_chain_security",
|
|
"CRA-AI-24": "logging_monitoring", "CRA-AI-25": "logging_monitoring",
|
|
"CRA-AI-26": "logging_monitoring", "CRA-AI-27": "logging_monitoring",
|
|
"CRA-AI-28": "vulnerability_management", "CRA-AI-29": "vulnerability_management",
|
|
"CRA-AI-30": "vulnerability_management", "CRA-AI-31": "vulnerability_management",
|
|
"CRA-AI-32": "vulnerability_management", "CRA-AI-33": "vulnerability_management",
|
|
"CRA-AI-34": "vulnerability_management",
|
|
"CRA-AI-35": "incident_response", "CRA-AI-36": "incident_response",
|
|
"CRA-AI-37": "incident_response", "CRA-AI-38": "incident_response",
|
|
"CRA-AI-39": "vulnerability_management", "CRA-AI-40": "incident_response",
|
|
}
|
|
|
|
|
|
# Two breadth lanes per finding: the CRA-specific corpus (always — most on-point
|
|
# for a CRA assessment) + the technical-depth corpus for the sub_topic
|
|
# (code_security for secure-dev, else network_security). All atom-grain.
|
|
# isms/dora/kritis are also atom-grain — available for future per-regime routing.
|
|
_TECHNICAL_USECASE = {"secure_development": "code_security"} # default: network_security
|
|
|
|
|
|
def subtopic_for(req_id: str):
|
|
return _REQ_TO_SUBTOPIC.get(req_id)
|
|
|
|
|
|
def usecases_for(sub_topic: str) -> list:
|
|
return ["cra", _TECHNICAL_USECASE.get(sub_topic, "network_security")]
|
|
|
|
|
|
# Process-level memo for the (use_case, sub_topic) breadth lists. The atom corpus
|
|
# is static reference data, so it is safe to reuse across requests — this turns the
|
|
# warm path into zero DB work; only the first call after a (re)start pays for it.
|
|
_BREADTH_CACHE: dict = {} # (use_case, sub_topic) -> (monotonic_ts, [controls])
|
|
_BREADTH_TTL = 1800.0
|
|
|
|
|
|
def enrich_findings_with_breadth(mapped: list, db, per_use_case: int = 3) -> None:
|
|
"""Attach `sub_topic` + `regulatory_breadth` (atom controls from the CRA corpus
|
|
+ the technical-depth corpus) to each finding, each control tagged with its
|
|
use_case. The needed (use_case, sub_topic) pairs are fetched in ONE batched
|
|
query (process-cached); the old path ran ~6 queries per pair (latency #61).
|
|
Best-effort: on any error a finding just gets empty breadth — never breaks the
|
|
assessment.
|
|
"""
|
|
now = time.monotonic()
|
|
needed: set = set()
|
|
for m in mapped:
|
|
st = _REQ_TO_SUBTOPIC.get(m.get("primary_requirement"))
|
|
m["sub_topic"] = st
|
|
if not st:
|
|
m["regulatory_breadth"] = []
|
|
continue
|
|
for uc in usecases_for(st):
|
|
key = (uc, st)
|
|
hit = _BREADTH_CACHE.get(key)
|
|
if not hit or now - hit[0] >= _BREADTH_TTL:
|
|
needed.add(key)
|
|
|
|
if needed:
|
|
try:
|
|
fetched = UseCaseControlsService(db).breadth_controls_batch(
|
|
needed, per=per_use_case)
|
|
except Exception:
|
|
fetched = {}
|
|
for key in needed: # cache hits AND empty results
|
|
_BREADTH_CACHE[key] = (now, fetched.get(key, []))
|
|
|
|
for m in mapped:
|
|
st = m.get("sub_topic")
|
|
if not st:
|
|
continue
|
|
merged, seen = [], set()
|
|
for uc in usecases_for(st):
|
|
cached = _BREADTH_CACHE.get((uc, st))
|
|
for c in (cached[1] if cached else []):
|
|
cid = c.get("control_id")
|
|
if cid and cid not in seen:
|
|
seen.add(cid)
|
|
merged.append(c)
|
|
m["regulatory_breadth"] = merged
|