From 31222885b34e9e3d3bcd98c262258654fd9f6fd1 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Thu, 25 Jun 2026 01:54:36 +0200
Subject: [PATCH] feat(ai-sdk): control-intent result diversity + standard-name
 classifier override
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On an implementation question impl_guidance (ENISA) keeps its earned semantic
Top-1, but the top-K now surfaces the best operational_requirement and
control_standard from the pool (ensureControlDiversity) — so different source
roles are visible instead of one role flooding the list, without forcing the
binding sources to Top-1.

A recognised standard NAME (NIST/OWASP/ISO 27001/CIS/CSA CCM/Grundschutz) now
overrides a mis-applied supervisory_guidance source_class in classifyAuthority,
so those standards classify and rank as technical_standard (control_standard
role). The corpus tags many standards as guidance (weight 70); the name wins.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ai-compliance-sdk/internal/ucca/authority.go  |  9 ++-
 .../internal/ucca/authority_test.go           |  1 +
 .../internal/ucca/control_role.go             | 51 +++++++++++++++++
 .../internal/ucca/control_role_test.go        | 55 +++++++++++++++++++
 .../internal/ucca/legal_rag_client.go         |  9 +++
 5 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/ai-compliance-sdk/internal/ucca/authority.go b/ai-compliance-sdk/internal/ucca/authority.go
index 715f80c7..6627b57e 100644
--- a/ai-compliance-sdk/internal/ucca/authority.go
+++ b/ai-compliance-sdk/internal/ucca/authority.go
@@ -40,6 +40,14 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
 	if jur == "" {
 		jur = inferJurisdiction(r)
 	}
+	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
+	// A recognised standard NAME (NIST/OWASP/ISO 27001/CIS/CSA CCM/Grundschutz) is authoritative
+	// even when the corpus mis-tagged the chunk as supervisory_guidance (weight 70) — many
+	// standards were ingested with a generic guidance source_class. The name wins, so they
+	// classify (and rank) as technical_standard / control_standard. binding_law is preserved.
+	if r.SourceClass != "binding_law" && containsAny(hay, standardMarkers) {
+		return authorityInfo{weight: 80, sourceClass: "technical_standard", jurisdiction: jur}
+	}
 	if r.SourceClass != "" {
 		w := r.AuthorityWeight
 		if w == 0 && r.SourceClass == "binding_law" {
@@ -50,7 +58,6 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
 	if r.AuthorityWeight > 0 {
 		return authorityInfo{weight: r.AuthorityWeight, sourceClass: sourceClassFromWeight(r.AuthorityWeight), jurisdiction: jur}
 	}
-	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
 	switch {
 	case containsAny(hay, foreignMarkers):
 		return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
diff --git a/ai-compliance-sdk/internal/ucca/authority_test.go b/ai-compliance-sdk/internal/ucca/authority_test.go
index 5e63e2a6..ade01b18 100644
--- a/ai-compliance-sdk/internal/ucca/authority_test.go
+++ b/ai-compliance-sdk/internal/ucca/authority_test.go
@@ -15,6 +15,7 @@ func TestClassifyAuthority(t *testing.T) {
 		{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
 		{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
 		{"untagged NIST standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, 80, "technical_standard", "EU"},
+		{"mis-tagged NIST guidance -> standard by name", LegalSearchResult{SourceClass: "supervisory_guidance", AuthorityWeight: 70, RegulationShort: "NIST SP 800-82r3", ArticleLabel: "NIST SP 800-82r3"}, 80, "technical_standard", "EU"},
 		{"BSI Grundschutz standard beats BSI guidance", LegalSearchResult{RegulationShort: "BSI Grundschutz", ArticleLabel: "BSI Grundschutz Baustein"}, 80, "technical_standard", "DE"},
 		{"weight-only 85 TRGS standard", LegalSearchResult{AuthorityWeight: 85, RegulationShort: "TRGS 529"}, 85, "technical_standard", "EU"},
 		{"tagged technical_standard", LegalSearchResult{AuthorityWeight: 80, SourceClass: "technical_standard", Jurisdiction: "EU"}, 80, "technical_standard", "EU"},
diff --git a/ai-compliance-sdk/internal/ucca/control_role.go b/ai-compliance-sdk/internal/ucca/control_role.go
index 8cfa509e..68a630c4 100644
--- a/ai-compliance-sdk/internal/ucca/control_role.go
+++ b/ai-compliance-sdk/internal/ucca/control_role.go
@@ -121,3 +121,54 @@ func controlRoleOf(payload map[string]interface{}) string {
 		IsRecital:       getBool(payload, "is_recital"),
 	})
 }
+
+// ensureControlDiversity guarantees that the returned top-K of a control question surfaces at
+// least one operational_requirement and one control_standard WHEN the pool contains them —
+// without forcing them to Top-1. implementation_guidance (e.g. ENISA good practices) keeps its
+// earned semantic lead; the rule only promotes the best hit of a missing control role into the
+// top-K by overwriting the lowest-ranked redundant guidance slot. So an implementation question
+// shows the relevant source ROLES (binding requirement + standard + guidance) side by side
+// instead of one role flooding the list. The promoted hit's original (now duplicate) position
+// stays in the tail and is dropped by the caller's truncation to topK.
+func ensureControlDiversity(results []LegalSearchResult, topK int) []LegalSearchResult {
+	if topK <= 0 || topK >= len(results) {
+		return results // everything is already returned — nothing to promote
+	}
+	roleAt := make([]string, len(results))
+	for i := range results {
+		roleAt[i] = classifyRole(results[i])
+	}
+	present := make(map[string]bool, topK)
+	for i := 0; i < topK; i++ {
+		present[roleAt[i]] = true
+	}
+	for _, want := range []string{roleOperationalReq, roleControlStandard} {
+		if present[want] {
+			continue
+		}
+		src := -1
+		for i := topK; i < len(results); i++ {
+			if roleAt[i] == want {
+				src = i
+				break
+			}
+		}
+		if src < 0 {
+			continue // role absent from the whole pool — nothing to promote
+		}
+		dst := -1
+		for j := topK - 1; j >= 0; j-- {
+			if roleAt[j] == roleImplGuidance {
+				dst = j
+				break
+			}
+		}
+		if dst < 0 {
+			continue // no redundant guidance to sacrifice — leave the head untouched
+		}
+		results[dst] = results[src]
+		roleAt[dst] = want
+		present[want] = true
+	}
+	return results
+}
diff --git a/ai-compliance-sdk/internal/ucca/control_role_test.go b/ai-compliance-sdk/internal/ucca/control_role_test.go
index 597516d6..9842ae73 100644
--- a/ai-compliance-sdk/internal/ucca/control_role_test.go
+++ b/ai-compliance-sdk/internal/ucca/control_role_test.go
@@ -77,3 +77,58 @@ func TestControlRoleOf_Payload(t *testing.T) {
 		t.Errorf("DORA abstract article role = %q must be excluded from the control-pool", got)
 	}
 }
+
+func headHasRole(head []LegalSearchResult, role string) bool {
+	for _, r := range head {
+		if classifyRole(r) == role {
+			return true
+		}
+	}
+	return false
+}
+
+func TestEnsureControlDiversity(t *testing.T) {
+	ig := func(n string) LegalSearchResult {
+		return LegalSearchResult{RegulationShort: "ENISA " + n + " Good Practices"}
+	}
+	opReq := LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "CRA Anhang I", Category: "regulation"}
+	std := LegalSearchResult{RegulationShort: "NIST SP 800-53"}
+
+	t.Run("injects missing op_req + control_standard, guidance keeps Top-1", func(t *testing.T) {
+		out := ensureControlDiversity([]LegalSearchResult{ig("A"), ig("B"), ig("C"), std, opReq}, 3)
+		head := out[:3]
+		if classifyRole(head[0]) != roleImplGuidance {
+			t.Errorf("Top-1 should stay implementation_guidance, got %q", classifyRole(head[0]))
+		}
+		if !headHasRole(head, roleOperationalReq) {
+			t.Error("top-K must contain an operational_requirement after diversity")
+		}
+		if !headHasRole(head, roleControlStandard) {
+			t.Error("top-K must contain a control_standard after diversity")
+		}
+	})
+
+	t.Run("no-op when both roles already present", func(t *testing.T) {
+		out := ensureControlDiversity([]LegalSearchResult{opReq, std, ig("A"), ig("B")}, 3)
+		if classifyRole(out[0]) != roleOperationalReq || classifyRole(out[1]) != roleControlStandard {
+			t.Error("already-diverse top-K must be left untouched")
+		}
+	})
+
+	t.Run("absent role is not forced (no panic)", func(t *testing.T) {
+		out := ensureControlDiversity([]LegalSearchResult{ig("A"), ig("B"), ig("C"), std}, 3)
+		if !headHasRole(out[:3], roleControlStandard) {
+			t.Error("present control_standard should be injected")
+		}
+		if headHasRole(out[:3], roleOperationalReq) {
+			t.Error("operational_requirement absent from the pool must NOT appear")
+		}
+	})
+
+	t.Run("topK covering the whole pool is unchanged", func(t *testing.T) {
+		out := ensureControlDiversity([]LegalSearchResult{ig("A"), opReq}, 5)
+		if len(out) != 2 || classifyRole(out[0]) != roleImplGuidance {
+			t.Error("topK >= len must return results unchanged")
+		}
+	})
+}
diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_client.go b/ai-compliance-sdk/internal/ucca/legal_rag_client.go
index 0ac9f489..d0bb408e 100644
--- a/ai-compliance-sdk/internal/ucca/legal_rag_client.go
+++ b/ai-compliance-sdk/internal/ucca/legal_rag_client.go
@@ -166,6 +166,15 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
 	// Response-Schema unveraendert. Score traegt den Authority-Score, damit nachgelagerte
 	// Multi-Collection-Merges (Advisor) die Ordnung bewahren.
 	results = rerankByAuthority(query, results)
+
+	// Control-Diversity: auf einer Umsetzungsfrage darf impl_guidance (ENISA) Top-1 bleiben,
+	// aber die Top-K soll mindestens eine binding operational_requirement (CRA Anhang I) und
+	// einen control_standard (NIST/ISO) zeigen, falls im Pool — Quellenarten sichtbar machen
+	// statt sie kuenstlich auf Top-1 zu heben. Nur Reihenfolge, vor der Truncation.
+	if queryWantsControls(query) {
+		results = ensureControlDiversity(results, topK)
+	}
+
 	if topK > 0 && len(results) > topK {
 		results = results[:topK]
 	}