feat(ai-sdk): control-intent result diversity + standard-name classifier override

On an implementation question impl_guidance (ENISA) keeps its earned semantic Top-1, but the top-K now surfaces the best operational_requirement and control_standard from the pool (ensureControlDiversity) — so different source roles are visible instead of one role flooding the list, without forcing the binding sources to Top-1. A recognised standard NAME (NIST/OWASP/ISO 27001/CIS/CSA CCM/Grundschutz) now overrides a mis-applied supervisory_guidance source_class in classifyAuthority, so those standards classify and rank as technical_standard (control_standard role). The corpus tags many standards as guidance (weight 70); the name wins. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-25 01:54:36 +02:00
parent d9d04deb00
commit 31222885b3
5 changed files with 124 additions and 1 deletions
@@ -40,6 +40,14 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
 	if jur == "" {
 		jur = inferJurisdiction(r)
 	}
+	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
+	// A recognised standard NAME (NIST/OWASP/ISO 27001/CIS/CSA CCM/Grundschutz) is authoritative
+	// even when the corpus mis-tagged the chunk as supervisory_guidance (weight 70) — many
+	// standards were ingested with a generic guidance source_class. The name wins, so they
+	// classify (and rank) as technical_standard / control_standard. binding_law is preserved.
+	if r.SourceClass != "binding_law" && containsAny(hay, standardMarkers) {
+		return authorityInfo{weight: 80, sourceClass: "technical_standard", jurisdiction: jur}
+	}
 	if r.SourceClass != "" {
 		w := r.AuthorityWeight
 		if w == 0 && r.SourceClass == "binding_law" {
@@ -50,7 +58,6 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
 	if r.AuthorityWeight > 0 {
 		return authorityInfo{weight: r.AuthorityWeight, sourceClass: sourceClassFromWeight(r.AuthorityWeight), jurisdiction: jur}
 	}
-	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
 	switch {
 	case containsAny(hay, foreignMarkers):
 		return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}