diff --git a/ai-compliance-sdk/data/control_mappings/cra_owasp.jsonl b/ai-compliance-sdk/data/control_mappings/cra_owasp.jsonl new file mode 100644 index 00000000..7c2aac9c --- /dev/null +++ b/ai-compliance-sdk/data/control_mappings/cra_owasp.jsonl @@ -0,0 +1,5 @@ +// Control-Mapping: CRA Annex I -> OWASP ASVS 5.0. Eine Zeile = ein Mapping (Schema: ControlMapping). +// provenance=retriever_candidate sind Vorschlaege des Control-Intent-Retrievers, NOCH NICHT kuratiert. +// Erst nach Human/Rule-Review wird provenance=human_curated/rule_based gesetzt (= Audit-Wahrheit). +{"source_norm":"CRA Annex I Part I (2)(d) — Schutz der Vertraulichkeit / Verschluesselung","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V11.1.1","mapping_type":"supports","confidence":"medium","provenance":"retriever_candidate","rationale":"CRA-Vertraulichkeits-/Verschluesselungsanforderung deckt sich mit ASVS Cryptographic Inventory and Documentation (V11.1.1). Retriever-Kandidat, Review noetig.","version":"2026-06-25"} +{"source_norm":"CRA Annex I Part II — Vulnerability Handling","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V6.2.4","mapping_type":"related","confidence":"low","provenance":"retriever_candidate","rationale":"User-Beispielzeile (Schema-Illustration). Part II ist Prozess-Pflicht (Schwachstellenbehandlung), V6.2.4 ist Passwort-Control — semantisch schwacher Kandidat, klarer Review-Fall.","version":"2026-06-25"} diff --git a/ai-compliance-sdk/internal/ucca/control_mapping.go b/ai-compliance-sdk/internal/ucca/control_mapping.go new file mode 100644 index 00000000..3ba67317 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/control_mapping.go @@ -0,0 +1,139 @@ +package ucca + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" +) + +// ControlMapping is one persisted, versioned link from a legal obligation/requirement +// to a concrete framework control. The retriever only PROPOSES candidates +// (provenance=retriever_candidate); the curated mapping (human_curated/rule_based) is the +// audited truth the Advisor uses at runtime — never re-invented per query. +type ControlMapping struct { + SourceNorm string `json:"source_norm"` // e.g. "CRA Annex I Part II" + SourceRole string `json:"source_role"` // source_role of the norm (operational_requirement, ...) + TargetFramework string `json:"target_framework"` // e.g. "OWASP ASVS" + TargetControl string `json:"target_control"` // e.g. "V6.2.4" + MappingType string `json:"mapping_type"` // supports | partially_supports | evidence_for | related + Confidence string `json:"confidence"` // high | medium | low + Provenance string `json:"provenance"` // retriever_candidate | human_curated | rule_based + Rationale string `json:"rationale"` + Version string `json:"version"` // YYYY-MM-DD +} + +// Allowed enum values — the deterministic "rule" layer that keeps the curated store clean. +var ( + mappingTypeValues = map[string]bool{"supports": true, "partially_supports": true, "evidence_for": true, "related": true} + confidenceValues = map[string]bool{"high": true, "medium": true, "low": true} + provenanceValues = map[string]bool{"retriever_candidate": true, "human_curated": true, "rule_based": true} +) + +// Validate checks required fields + enum membership, so the persisted audit store never +// holds garbage (fail-closed at load). +func (m ControlMapping) Validate() error { + switch { + case m.SourceNorm == "": + return fmt.Errorf("control mapping: source_norm required") + case m.TargetFramework == "": + return fmt.Errorf("control mapping: target_framework required") + case m.TargetControl == "": + return fmt.Errorf("control mapping: target_control required") + case !mappingTypeValues[m.MappingType]: + return fmt.Errorf("control mapping: invalid mapping_type %q", m.MappingType) + case !confidenceValues[m.Confidence]: + return fmt.Errorf("control mapping: invalid confidence %q", m.Confidence) + case !provenanceValues[m.Provenance]: + return fmt.Errorf("control mapping: invalid provenance %q", m.Provenance) + } + return nil +} + +// IsCurated reports whether this mapping is part of the audited truth (not a raw candidate). +func (m ControlMapping) IsCurated() bool { + return m.Provenance == "human_curated" || m.Provenance == "rule_based" +} + +// ControlMappingSet is the loaded, indexed mapping store (forward + reverse lookup). +type ControlMappingSet struct { + All []ControlMapping + bySourceNorm map[string][]ControlMapping + byControl map[string][]ControlMapping +} + +func controlKey(framework, control string) string { return framework + ":" + control } + +// ControlsFor returns the controls mapped to a source norm. curatedOnly restricts to the +// audited truth (what the Advisor may treat as fact). +func (s *ControlMappingSet) ControlsFor(sourceNorm string, curatedOnly bool) []ControlMapping { + return filterProvenance(s.bySourceNorm[sourceNorm], curatedOnly) +} + +// ObligationsFor returns the norms mapped to a framework control (reverse lookup). +func (s *ControlMappingSet) ObligationsFor(framework, control string, curatedOnly bool) []ControlMapping { + return filterProvenance(s.byControl[controlKey(framework, control)], curatedOnly) +} + +func filterProvenance(in []ControlMapping, curatedOnly bool) []ControlMapping { + if !curatedOnly { + return in + } + out := make([]ControlMapping, 0, len(in)) + for _, m := range in { + if m.IsCurated() { + out = append(out, m) + } + } + return out +} + +// LoadControlMappings reads every *.jsonl file under dir (one mapping per line; blank and +// //-prefixed lines ignored), validates each row, and builds the index. An invalid row +// aborts the whole load — fail-closed, because this is the audit truth, not best-effort. +func LoadControlMappings(dir string) (*ControlMappingSet, error) { + files, err := filepath.Glob(filepath.Join(dir, "*.jsonl")) + if err != nil { + return nil, err + } + set := &ControlMappingSet{ + bySourceNorm: map[string][]ControlMapping{}, + byControl: map[string][]ControlMapping{}, + } + for _, f := range files { + fh, err := os.Open(f) + if err != nil { + return nil, err + } + sc := bufio.NewScanner(fh) + sc.Buffer(make([]byte, 0, 64*1024), 1024*1024) + line := 0 + for sc.Scan() { + line++ + raw := strings.TrimSpace(sc.Text()) + if raw == "" || strings.HasPrefix(raw, "//") { + continue + } + var m ControlMapping + if err := json.Unmarshal([]byte(raw), &m); err != nil { + fh.Close() + return nil, fmt.Errorf("%s:%d: %w", f, line, err) + } + if err := m.Validate(); err != nil { + fh.Close() + return nil, fmt.Errorf("%s:%d: %w", f, line, err) + } + set.All = append(set.All, m) + set.bySourceNorm[m.SourceNorm] = append(set.bySourceNorm[m.SourceNorm], m) + k := controlKey(m.TargetFramework, m.TargetControl) + set.byControl[k] = append(set.byControl[k], m) + } + fh.Close() + if err := sc.Err(); err != nil { + return nil, err + } + } + return set, nil +} diff --git a/ai-compliance-sdk/internal/ucca/control_mapping_test.go b/ai-compliance-sdk/internal/ucca/control_mapping_test.go new file mode 100644 index 00000000..90174411 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/control_mapping_test.go @@ -0,0 +1,78 @@ +package ucca + +import ( + "os" + "path/filepath" + "testing" +) + +func TestControlMapping_Validate(t *testing.T) { + valid := ControlMapping{SourceNorm: "CRA Annex I", TargetFramework: "OWASP ASVS", TargetControl: "V6.2.4", MappingType: "supports", Confidence: "high", Provenance: "human_curated"} + if err := valid.Validate(); err != nil { + t.Fatalf("valid mapping rejected: %v", err) + } + bad := []struct { + name string + m ControlMapping + }{ + {"no source_norm", ControlMapping{TargetFramework: "X", TargetControl: "Y", MappingType: "supports", Confidence: "high", Provenance: "human_curated"}}, + {"no target_control", ControlMapping{SourceNorm: "A", TargetFramework: "X", MappingType: "supports", Confidence: "high", Provenance: "human_curated"}}, + {"bad mapping_type", ControlMapping{SourceNorm: "A", TargetFramework: "X", TargetControl: "Y", MappingType: "nope", Confidence: "high", Provenance: "human_curated"}}, + {"bad confidence", ControlMapping{SourceNorm: "A", TargetFramework: "X", TargetControl: "Y", MappingType: "supports", Confidence: "huge", Provenance: "human_curated"}}, + {"bad provenance", ControlMapping{SourceNorm: "A", TargetFramework: "X", TargetControl: "Y", MappingType: "supports", Confidence: "high", Provenance: "guessed"}}, + } + for _, tt := range bad { + if err := tt.m.Validate(); err == nil { + t.Errorf("%s: expected rejection", tt.name) + } + } +} + +func TestLoadControlMappings(t *testing.T) { + dir := t.TempDir() + content := `// header comment, ignored +{"source_norm":"CRA Annex I","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V11.1.1","mapping_type":"supports","confidence":"high","provenance":"human_curated","rationale":"r","version":"2026-06-25"} +{"source_norm":"CRA Annex I","source_role":"operational_requirement","target_framework":"OWASP ASVS","target_control":"V6.2.4","mapping_type":"related","confidence":"low","provenance":"retriever_candidate","rationale":"r","version":"2026-06-25"} + +` + if err := os.WriteFile(filepath.Join(dir, "m.jsonl"), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + set, err := LoadControlMappings(dir) + if err != nil { + t.Fatalf("load: %v", err) + } + if len(set.All) != 2 { + t.Fatalf("want 2 mappings, got %d", len(set.All)) + } + if got := set.ControlsFor("CRA Annex I", false); len(got) != 2 { + t.Errorf("ControlsFor(all): want 2, got %d", len(got)) + } + if got := set.ControlsFor("CRA Annex I", true); len(got) != 1 { + t.Errorf("ControlsFor(curatedOnly): want 1 (only human_curated), got %d", len(got)) + } + if got := set.ObligationsFor("OWASP ASVS", "V11.1.1", false); len(got) != 1 { + t.Errorf("ObligationsFor reverse lookup: want 1, got %d", len(got)) + } +} + +func TestLoadControlMappings_RejectsInvalid(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "bad.jsonl"), []byte(`{"source_norm":"A","target_framework":"X","target_control":"Y","mapping_type":"BOGUS","confidence":"high","provenance":"human_curated"}`), 0o644); err != nil { + t.Fatal(err) + } + if _, err := LoadControlMappings(dir); err == nil { + t.Error("invalid mapping_type must fail the load (fail-closed audit store)") + } +} + +func TestControlMappings_SeedFileValid(t *testing.T) { + // The committed seed store must always load + validate. + set, err := LoadControlMappings("../../data/control_mappings") + if err != nil { + t.Fatalf("seed control_mappings failed to load: %v", err) + } + if len(set.All) == 0 { + t.Fatal("seed control_mappings is empty") + } +}