Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 31222885b3 |
@@ -40,6 +40,14 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
|
|||||||
if jur == "" {
|
if jur == "" {
|
||||||
jur = inferJurisdiction(r)
|
jur = inferJurisdiction(r)
|
||||||
}
|
}
|
||||||
|
hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
|
||||||
|
// A recognised standard NAME (NIST/OWASP/ISO 27001/CIS/CSA CCM/Grundschutz) is authoritative
|
||||||
|
// even when the corpus mis-tagged the chunk as supervisory_guidance (weight 70) — many
|
||||||
|
// standards were ingested with a generic guidance source_class. The name wins, so they
|
||||||
|
// classify (and rank) as technical_standard / control_standard. binding_law is preserved.
|
||||||
|
if r.SourceClass != "binding_law" && containsAny(hay, standardMarkers) {
|
||||||
|
return authorityInfo{weight: 80, sourceClass: "technical_standard", jurisdiction: jur}
|
||||||
|
}
|
||||||
if r.SourceClass != "" {
|
if r.SourceClass != "" {
|
||||||
w := r.AuthorityWeight
|
w := r.AuthorityWeight
|
||||||
if w == 0 && r.SourceClass == "binding_law" {
|
if w == 0 && r.SourceClass == "binding_law" {
|
||||||
@@ -50,7 +58,6 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
|
|||||||
if r.AuthorityWeight > 0 {
|
if r.AuthorityWeight > 0 {
|
||||||
return authorityInfo{weight: r.AuthorityWeight, sourceClass: sourceClassFromWeight(r.AuthorityWeight), jurisdiction: jur}
|
return authorityInfo{weight: r.AuthorityWeight, sourceClass: sourceClassFromWeight(r.AuthorityWeight), jurisdiction: jur}
|
||||||
}
|
}
|
||||||
hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
|
|
||||||
switch {
|
switch {
|
||||||
case containsAny(hay, foreignMarkers):
|
case containsAny(hay, foreignMarkers):
|
||||||
return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
|
return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ func TestClassifyAuthority(t *testing.T) {
|
|||||||
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
|
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
|
||||||
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
|
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
|
||||||
{"untagged NIST standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, 80, "technical_standard", "EU"},
|
{"untagged NIST standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, 80, "technical_standard", "EU"},
|
||||||
|
{"mis-tagged NIST guidance -> standard by name", LegalSearchResult{SourceClass: "supervisory_guidance", AuthorityWeight: 70, RegulationShort: "NIST SP 800-82r3", ArticleLabel: "NIST SP 800-82r3"}, 80, "technical_standard", "EU"},
|
||||||
{"BSI Grundschutz standard beats BSI guidance", LegalSearchResult{RegulationShort: "BSI Grundschutz", ArticleLabel: "BSI Grundschutz Baustein"}, 80, "technical_standard", "DE"},
|
{"BSI Grundschutz standard beats BSI guidance", LegalSearchResult{RegulationShort: "BSI Grundschutz", ArticleLabel: "BSI Grundschutz Baustein"}, 80, "technical_standard", "DE"},
|
||||||
{"weight-only 85 TRGS standard", LegalSearchResult{AuthorityWeight: 85, RegulationShort: "TRGS 529"}, 85, "technical_standard", "EU"},
|
{"weight-only 85 TRGS standard", LegalSearchResult{AuthorityWeight: 85, RegulationShort: "TRGS 529"}, 85, "technical_standard", "EU"},
|
||||||
{"tagged technical_standard", LegalSearchResult{AuthorityWeight: 80, SourceClass: "technical_standard", Jurisdiction: "EU"}, 80, "technical_standard", "EU"},
|
{"tagged technical_standard", LegalSearchResult{AuthorityWeight: 80, SourceClass: "technical_standard", Jurisdiction: "EU"}, 80, "technical_standard", "EU"},
|
||||||
|
|||||||
@@ -121,3 +121,54 @@ func controlRoleOf(payload map[string]interface{}) string {
|
|||||||
IsRecital: getBool(payload, "is_recital"),
|
IsRecital: getBool(payload, "is_recital"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ensureControlDiversity guarantees that the returned top-K of a control question surfaces at
|
||||||
|
// least one operational_requirement and one control_standard WHEN the pool contains them —
|
||||||
|
// without forcing them to Top-1. implementation_guidance (e.g. ENISA good practices) keeps its
|
||||||
|
// earned semantic lead; the rule only promotes the best hit of a missing control role into the
|
||||||
|
// top-K by overwriting the lowest-ranked redundant guidance slot. So an implementation question
|
||||||
|
// shows the relevant source ROLES (binding requirement + standard + guidance) side by side
|
||||||
|
// instead of one role flooding the list. The promoted hit's original (now duplicate) position
|
||||||
|
// stays in the tail and is dropped by the caller's truncation to topK.
|
||||||
|
func ensureControlDiversity(results []LegalSearchResult, topK int) []LegalSearchResult {
|
||||||
|
if topK <= 0 || topK >= len(results) {
|
||||||
|
return results // everything is already returned — nothing to promote
|
||||||
|
}
|
||||||
|
roleAt := make([]string, len(results))
|
||||||
|
for i := range results {
|
||||||
|
roleAt[i] = classifyRole(results[i])
|
||||||
|
}
|
||||||
|
present := make(map[string]bool, topK)
|
||||||
|
for i := 0; i < topK; i++ {
|
||||||
|
present[roleAt[i]] = true
|
||||||
|
}
|
||||||
|
for _, want := range []string{roleOperationalReq, roleControlStandard} {
|
||||||
|
if present[want] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
src := -1
|
||||||
|
for i := topK; i < len(results); i++ {
|
||||||
|
if roleAt[i] == want {
|
||||||
|
src = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if src < 0 {
|
||||||
|
continue // role absent from the whole pool — nothing to promote
|
||||||
|
}
|
||||||
|
dst := -1
|
||||||
|
for j := topK - 1; j >= 0; j-- {
|
||||||
|
if roleAt[j] == roleImplGuidance {
|
||||||
|
dst = j
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if dst < 0 {
|
||||||
|
continue // no redundant guidance to sacrifice — leave the head untouched
|
||||||
|
}
|
||||||
|
results[dst] = results[src]
|
||||||
|
roleAt[dst] = want
|
||||||
|
present[want] = true
|
||||||
|
}
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|||||||
@@ -77,3 +77,58 @@ func TestControlRoleOf_Payload(t *testing.T) {
|
|||||||
t.Errorf("DORA abstract article role = %q must be excluded from the control-pool", got)
|
t.Errorf("DORA abstract article role = %q must be excluded from the control-pool", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func headHasRole(head []LegalSearchResult, role string) bool {
|
||||||
|
for _, r := range head {
|
||||||
|
if classifyRole(r) == role {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEnsureControlDiversity(t *testing.T) {
|
||||||
|
ig := func(n string) LegalSearchResult {
|
||||||
|
return LegalSearchResult{RegulationShort: "ENISA " + n + " Good Practices"}
|
||||||
|
}
|
||||||
|
opReq := LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "CRA Anhang I", Category: "regulation"}
|
||||||
|
std := LegalSearchResult{RegulationShort: "NIST SP 800-53"}
|
||||||
|
|
||||||
|
t.Run("injects missing op_req + control_standard, guidance keeps Top-1", func(t *testing.T) {
|
||||||
|
out := ensureControlDiversity([]LegalSearchResult{ig("A"), ig("B"), ig("C"), std, opReq}, 3)
|
||||||
|
head := out[:3]
|
||||||
|
if classifyRole(head[0]) != roleImplGuidance {
|
||||||
|
t.Errorf("Top-1 should stay implementation_guidance, got %q", classifyRole(head[0]))
|
||||||
|
}
|
||||||
|
if !headHasRole(head, roleOperationalReq) {
|
||||||
|
t.Error("top-K must contain an operational_requirement after diversity")
|
||||||
|
}
|
||||||
|
if !headHasRole(head, roleControlStandard) {
|
||||||
|
t.Error("top-K must contain a control_standard after diversity")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("no-op when both roles already present", func(t *testing.T) {
|
||||||
|
out := ensureControlDiversity([]LegalSearchResult{opReq, std, ig("A"), ig("B")}, 3)
|
||||||
|
if classifyRole(out[0]) != roleOperationalReq || classifyRole(out[1]) != roleControlStandard {
|
||||||
|
t.Error("already-diverse top-K must be left untouched")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("absent role is not forced (no panic)", func(t *testing.T) {
|
||||||
|
out := ensureControlDiversity([]LegalSearchResult{ig("A"), ig("B"), ig("C"), std}, 3)
|
||||||
|
if !headHasRole(out[:3], roleControlStandard) {
|
||||||
|
t.Error("present control_standard should be injected")
|
||||||
|
}
|
||||||
|
if headHasRole(out[:3], roleOperationalReq) {
|
||||||
|
t.Error("operational_requirement absent from the pool must NOT appear")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("topK covering the whole pool is unchanged", func(t *testing.T) {
|
||||||
|
out := ensureControlDiversity([]LegalSearchResult{ig("A"), opReq}, 5)
|
||||||
|
if len(out) != 2 || classifyRole(out[0]) != roleImplGuidance {
|
||||||
|
t.Error("topK >= len must return results unchanged")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -166,6 +166,15 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
|||||||
// Response-Schema unveraendert. Score traegt den Authority-Score, damit nachgelagerte
|
// Response-Schema unveraendert. Score traegt den Authority-Score, damit nachgelagerte
|
||||||
// Multi-Collection-Merges (Advisor) die Ordnung bewahren.
|
// Multi-Collection-Merges (Advisor) die Ordnung bewahren.
|
||||||
results = rerankByAuthority(query, results)
|
results = rerankByAuthority(query, results)
|
||||||
|
|
||||||
|
// Control-Diversity: auf einer Umsetzungsfrage darf impl_guidance (ENISA) Top-1 bleiben,
|
||||||
|
// aber die Top-K soll mindestens eine binding operational_requirement (CRA Anhang I) und
|
||||||
|
// einen control_standard (NIST/ISO) zeigen, falls im Pool — Quellenarten sichtbar machen
|
||||||
|
// statt sie kuenstlich auf Top-1 zu heben. Nur Reihenfolge, vor der Truncation.
|
||||||
|
if queryWantsControls(query) {
|
||||||
|
results = ensureControlDiversity(results, topK)
|
||||||
|
}
|
||||||
|
|
||||||
if topK > 0 && len(results) > topK {
|
if topK > 0 && len(results) > topK {
|
||||||
results = results[:topK]
|
results = results[:topK]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user