feat(cra): Maßnahmen-Provenienz + Lizenzklasse je Normquelle
Jede Normreferenz einer Maßnahme wird lizenzklassifiziert (eu_law / public_domain / open / paid_reference) — paid-reference-Normen werden nur als Verweis geführt, nie im Text gespeichert (idea/expression). Kuratierte Maßnahmen tragen Tier 'core', KI-/Fallback-Maßnahmen 'review' (indikativ). Frontend zeigt Quellen-Badges + "indikativ"-Kennzeichnung. Methodik in docs-src/development/mapping-methodology.md (Szenario C, Due-Diligence). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
"""Provenance / license classification for norm references.
|
||||
|
||||
Encodes the BreakPilot mapping methodology (idea/expression): a *reference* to
|
||||
where a topic sits in a standard is a fact and citable; the paid normative *text*
|
||||
is never stored or reproduced. This classifier marks which sources are freely
|
||||
usable (EU law, US-gov public domain, open licenses) vs. paid standards we may
|
||||
only REFERENCE by clause/control ID.
|
||||
|
||||
See docs-src/development/mapping-methodology.md.
|
||||
"""
|
||||
|
||||
LAW = "eu_law" # EU legislation — public, reproducible (EUR-Lex)
|
||||
PUBLIC_DOMAIN = "public_domain" # e.g. NIST (US gov work) — reproducible
|
||||
OPEN = "open" # OWASP (CC), ETSI EN 303 645, BSI — freely available
|
||||
PAID_REFERENCE = "paid_reference" # ISO/IEC/EN/DIN — REFERENCE ONLY, no text stored
|
||||
|
||||
LABEL = {
|
||||
LAW: "EU-Recht (frei)",
|
||||
PUBLIC_DOMAIN: "Public Domain (frei)",
|
||||
OPEN: "offen lizenziert",
|
||||
PAID_REFERENCE: "kostenpflichtige Norm — nur Verweis",
|
||||
}
|
||||
|
||||
_LAW = ("2024/2847", "2023/1230", "verordnung (eu)", "maschinenverordnung", "(cra)",
|
||||
"anhang", "nis2", "nis-2", " art. ", "dsgvo", "2016/679", "2022/2555")
|
||||
_PUBLIC = ("nist", "ntia", "nvd", "cisa")
|
||||
_OPEN = ("owasp", "slsa", "etsi en 303 645", "bsi", "cyclonedx", "spdx",
|
||||
"nist privacy framework")
|
||||
_PAID = ("iso", "iec", "din", "en iso", "62443", "27002", "27035", "29147",
|
||||
"30111", "15408", "18045", "13849", "13850", "13857", "14119", "14120",
|
||||
"61496", "61800", "62061", "60204", "82079", "15066", "10218", "13855", "62061")
|
||||
|
||||
|
||||
def classify_norm_ref(ref: str) -> str:
|
||||
r = (ref or "").lower()
|
||||
# NIST Privacy Framework is open-ish; keep public-domain check after open guard.
|
||||
if "nist privacy" in r:
|
||||
return OPEN
|
||||
if any(k in r for k in _LAW):
|
||||
return LAW
|
||||
if any(k in r for k in _PUBLIC):
|
||||
return PUBLIC_DOMAIN
|
||||
if any(k in r for k in _OPEN):
|
||||
return OPEN
|
||||
if any(k in r for k in _PAID):
|
||||
return PAID_REFERENCE
|
||||
return PAID_REFERENCE # conservative default: treat unknown as reference-only
|
||||
|
||||
|
||||
def classify_refs(refs) -> list:
|
||||
"""[{ref, license_class, label}] for each norm reference."""
|
||||
return [
|
||||
{"ref": r, "license_class": (lc := classify_norm_ref(r)), "label": LABEL[lc]}
|
||||
for r in (refs or [])
|
||||
]
|
||||
Reference in New Issue
Block a user