feat(ucca): persisted Control-Mapping data model (Obligation -> framework control)

Versioned JSONL store + Go model for Regulation->Control mappings, per the
A-decision: the retriever only PROPOSES candidates; the curated mapping is the
audited truth the Advisor uses at runtime, never re-invented per query.

- ControlMapping struct (source_norm/source_role/target_framework/target_control/
  mapping_type/confidence/provenance/rationale/version)
- enum validation (rule layer), fail-closed loader, forward+reverse index,
  curated-only filter (IsCurated)
- seed: 2 retriever_candidate rows CRA Annex I -> OWASP ASVS (not yet curated)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-25 09:32:15 +02:00
parent a3053c3c86
commit d987e4fde6
3 changed files with 222 additions and 0 deletions
@@ -0,0 +1,5 @@
// Control-Mapping: CRA Annex I -> OWASP ASVS 5.0. Eine Zeile = ein Mapping (Schema: ControlMapping).
// provenance=retriever_candidate sind Vorschlaege des Control-Intent-Retrievers, NOCH NICHT kuratiert.
// Erst nach Human/Rule-Review wird provenance=human_curated/rule_based gesetzt (= Audit-Wahrheit).
{"source_norm":"CRA Annex I Part I (2)(d) — Schutz der Vertraulichkeit / Verschluesselung","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V11.1.1","mapping_type":"supports","confidence":"medium","provenance":"retriever_candidate","rationale":"CRA-Vertraulichkeits-/Verschluesselungsanforderung deckt sich mit ASVS Cryptographic Inventory and Documentation (V11.1.1). Retriever-Kandidat, Review noetig.","version":"2026-06-25"}
{"source_norm":"CRA Annex I Part II — Vulnerability Handling","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V6.2.4","mapping_type":"related","confidence":"low","provenance":"retriever_candidate","rationale":"User-Beispielzeile (Schema-Illustration). Part II ist Prozess-Pflicht (Schwachstellenbehandlung), V6.2.4 ist Passwort-Control — semantisch schwacher Kandidat, klarer Review-Fall.","version":"2026-06-25"}
@@ -0,0 +1,139 @@
package ucca
import (
"bufio"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
)
// ControlMapping is one persisted, versioned link from a legal obligation/requirement
// to a concrete framework control. The retriever only PROPOSES candidates
// (provenance=retriever_candidate); the curated mapping (human_curated/rule_based) is the
// audited truth the Advisor uses at runtime — never re-invented per query.
type ControlMapping struct {
SourceNorm string `json:"source_norm"` // e.g. "CRA Annex I Part II"
SourceRole string `json:"source_role"` // source_role of the norm (operational_requirement, ...)
TargetFramework string `json:"target_framework"` // e.g. "OWASP ASVS"
TargetControl string `json:"target_control"` // e.g. "V6.2.4"
MappingType string `json:"mapping_type"` // supports | partially_supports | evidence_for | related
Confidence string `json:"confidence"` // high | medium | low
Provenance string `json:"provenance"` // retriever_candidate | human_curated | rule_based
Rationale string `json:"rationale"`
Version string `json:"version"` // YYYY-MM-DD
}
// Allowed enum values — the deterministic "rule" layer that keeps the curated store clean.
var (
mappingTypeValues = map[string]bool{"supports": true, "partially_supports": true, "evidence_for": true, "related": true}
confidenceValues = map[string]bool{"high": true, "medium": true, "low": true}
provenanceValues = map[string]bool{"retriever_candidate": true, "human_curated": true, "rule_based": true}
)
// Validate checks required fields + enum membership, so the persisted audit store never
// holds garbage (fail-closed at load).
func (m ControlMapping) Validate() error {
switch {
case m.SourceNorm == "":
return fmt.Errorf("control mapping: source_norm required")
case m.TargetFramework == "":
return fmt.Errorf("control mapping: target_framework required")
case m.TargetControl == "":
return fmt.Errorf("control mapping: target_control required")
case !mappingTypeValues[m.MappingType]:
return fmt.Errorf("control mapping: invalid mapping_type %q", m.MappingType)
case !confidenceValues[m.Confidence]:
return fmt.Errorf("control mapping: invalid confidence %q", m.Confidence)
case !provenanceValues[m.Provenance]:
return fmt.Errorf("control mapping: invalid provenance %q", m.Provenance)
}
return nil
}
// IsCurated reports whether this mapping is part of the audited truth (not a raw candidate).
func (m ControlMapping) IsCurated() bool {
return m.Provenance == "human_curated" || m.Provenance == "rule_based"
}
// ControlMappingSet is the loaded, indexed mapping store (forward + reverse lookup).
type ControlMappingSet struct {
All []ControlMapping
bySourceNorm map[string][]ControlMapping
byControl map[string][]ControlMapping
}
func controlKey(framework, control string) string { return framework + ":" + control }
// ControlsFor returns the controls mapped to a source norm. curatedOnly restricts to the
// audited truth (what the Advisor may treat as fact).
func (s *ControlMappingSet) ControlsFor(sourceNorm string, curatedOnly bool) []ControlMapping {
return filterProvenance(s.bySourceNorm[sourceNorm], curatedOnly)
}
// ObligationsFor returns the norms mapped to a framework control (reverse lookup).
func (s *ControlMappingSet) ObligationsFor(framework, control string, curatedOnly bool) []ControlMapping {
return filterProvenance(s.byControl[controlKey(framework, control)], curatedOnly)
}
func filterProvenance(in []ControlMapping, curatedOnly bool) []ControlMapping {
if !curatedOnly {
return in
}
out := make([]ControlMapping, 0, len(in))
for _, m := range in {
if m.IsCurated() {
out = append(out, m)
}
}
return out
}
// LoadControlMappings reads every *.jsonl file under dir (one mapping per line; blank and
// //-prefixed lines ignored), validates each row, and builds the index. An invalid row
// aborts the whole load — fail-closed, because this is the audit truth, not best-effort.
func LoadControlMappings(dir string) (*ControlMappingSet, error) {
files, err := filepath.Glob(filepath.Join(dir, "*.jsonl"))
if err != nil {
return nil, err
}
set := &ControlMappingSet{
bySourceNorm: map[string][]ControlMapping{},
byControl: map[string][]ControlMapping{},
}
for _, f := range files {
fh, err := os.Open(f)
if err != nil {
return nil, err
}
sc := bufio.NewScanner(fh)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
line := 0
for sc.Scan() {
line++
raw := strings.TrimSpace(sc.Text())
if raw == "" || strings.HasPrefix(raw, "//") {
continue
}
var m ControlMapping
if err := json.Unmarshal([]byte(raw), &m); err != nil {
fh.Close()
return nil, fmt.Errorf("%s:%d: %w", f, line, err)
}
if err := m.Validate(); err != nil {
fh.Close()
return nil, fmt.Errorf("%s:%d: %w", f, line, err)
}
set.All = append(set.All, m)
set.bySourceNorm[m.SourceNorm] = append(set.bySourceNorm[m.SourceNorm], m)
k := controlKey(m.TargetFramework, m.TargetControl)
set.byControl[k] = append(set.byControl[k], m)
}
fh.Close()
if err := sc.Err(); err != nil {
return nil, err
}
}
return set, nil
}
@@ -0,0 +1,78 @@
package ucca
import (
"os"
"path/filepath"
"testing"
)
func TestControlMapping_Validate(t *testing.T) {
valid := ControlMapping{SourceNorm: "CRA Annex I", TargetFramework: "OWASP ASVS", TargetControl: "V6.2.4", MappingType: "supports", Confidence: "high", Provenance: "human_curated"}
if err := valid.Validate(); err != nil {
t.Fatalf("valid mapping rejected: %v", err)
}
bad := []struct {
name string
m ControlMapping
}{
{"no source_norm", ControlMapping{TargetFramework: "X", TargetControl: "Y", MappingType: "supports", Confidence: "high", Provenance: "human_curated"}},
{"no target_control", ControlMapping{SourceNorm: "A", TargetFramework: "X", MappingType: "supports", Confidence: "high", Provenance: "human_curated"}},
{"bad mapping_type", ControlMapping{SourceNorm: "A", TargetFramework: "X", TargetControl: "Y", MappingType: "nope", Confidence: "high", Provenance: "human_curated"}},
{"bad confidence", ControlMapping{SourceNorm: "A", TargetFramework: "X", TargetControl: "Y", MappingType: "supports", Confidence: "huge", Provenance: "human_curated"}},
{"bad provenance", ControlMapping{SourceNorm: "A", TargetFramework: "X", TargetControl: "Y", MappingType: "supports", Confidence: "high", Provenance: "guessed"}},
}
for _, tt := range bad {
if err := tt.m.Validate(); err == nil {
t.Errorf("%s: expected rejection", tt.name)
}
}
}
func TestLoadControlMappings(t *testing.T) {
dir := t.TempDir()
content := `// header comment, ignored
{"source_norm":"CRA Annex I","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V11.1.1","mapping_type":"supports","confidence":"high","provenance":"human_curated","rationale":"r","version":"2026-06-25"}
{"source_norm":"CRA Annex I","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V6.2.4","mapping_type":"related","confidence":"low","provenance":"retriever_candidate","rationale":"r","version":"2026-06-25"}
`
if err := os.WriteFile(filepath.Join(dir, "m.jsonl"), []byte(content), 0o644); err != nil {
t.Fatal(err)
}
set, err := LoadControlMappings(dir)
if err != nil {
t.Fatalf("load: %v", err)
}
if len(set.All) != 2 {
t.Fatalf("want 2 mappings, got %d", len(set.All))
}
if got := set.ControlsFor("CRA Annex I", false); len(got) != 2 {
t.Errorf("ControlsFor(all): want 2, got %d", len(got))
}
if got := set.ControlsFor("CRA Annex I", true); len(got) != 1 {
t.Errorf("ControlsFor(curatedOnly): want 1 (only human_curated), got %d", len(got))
}
if got := set.ObligationsFor("OWASP ASVS", "V11.1.1", false); len(got) != 1 {
t.Errorf("ObligationsFor reverse lookup: want 1, got %d", len(got))
}
}
func TestLoadControlMappings_RejectsInvalid(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "bad.jsonl"), []byte(`{"source_norm":"A","target_framework":"X","target_control":"Y","mapping_type":"BOGUS","confidence":"high","provenance":"human_curated"}`), 0o644); err != nil {
t.Fatal(err)
}
if _, err := LoadControlMappings(dir); err == nil {
t.Error("invalid mapping_type must fail the load (fail-closed audit store)")
}
}
func TestControlMappings_SeedFileValid(t *testing.T) {
// The committed seed store must always load + validate.
set, err := LoadControlMappings("../../data/control_mappings")
if err != nil {
t.Fatalf("seed control_mappings failed to load: %v", err)
}
if len(set.All) == 0 {
t.Fatal("seed control_mappings is empty")
}
}