Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a5b675d999 |
@@ -9,8 +9,8 @@ import (
|
|||||||
// authorityInfo is the normative classification of a search result, used internally
|
// authorityInfo is the normative classification of a search result, used internally
|
||||||
// for re-ranking only (Phase 1 changes ordering, not the response contract).
|
// for re-ranking only (Phase 1 changes ordering, not the response contract).
|
||||||
type authorityInfo struct {
|
type authorityInfo struct {
|
||||||
weight int // 100 binding, 80 technical_standard, 70 guidance, 0 foreign, 50 unknown
|
weight int // 100 binding_law, 70 guidance, 0 foreign_law, 50 unknown
|
||||||
sourceClass string // binding_law | technical_standard | supervisory_guidance | foreign_law | unknown
|
sourceClass string // binding_law | supervisory_guidance | foreign_law | unknown
|
||||||
jurisdiction string // DE | EU | CH
|
jurisdiction string // DE | EU | CH
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -18,13 +18,7 @@ var (
|
|||||||
guidanceMarkers = []string{
|
guidanceMarkers = []string{
|
||||||
"DSK", "EDPB", "BfDI", "BFDI", "BayLfD", "Baylfb", "ENISA", "BSI", "EUCC",
|
"DSK", "EDPB", "BfDI", "BFDI", "BayLfD", "Baylfb", "ENISA", "BSI", "EUCC",
|
||||||
"Standards Mapping", "Kpnr", "Orientierungshilfe", "Handreichung", "Beschluss",
|
"Standards Mapping", "Kpnr", "Orientierungshilfe", "Handreichung", "Beschluss",
|
||||||
"Leitlinie", "Guidance", "Empfehlung", "OECD", "CISA", "Blue Guide",
|
"Leitlinie", "Guidance", "Empfehlung", "NIST", "OECD", "CISA", "Blue Guide",
|
||||||
}
|
|
||||||
// Technical standards / control frameworks (best-practice controls). Checked BEFORE
|
|
||||||
// guidanceMarkers so a "BSI Grundschutz" chunk classifies as a standard, not BSI guidance.
|
|
||||||
standardMarkers = []string{
|
|
||||||
"NIST", "OWASP", "Grundschutz", "ISO 27001", "ISO/IEC 27001",
|
|
||||||
"CSA CCM", "Cloud Controls Matrix", "CIS Benchmark", "CIS Control",
|
|
||||||
}
|
}
|
||||||
foreignMarkers = []string{"RevDSG", "fedlex", "(CH)"}
|
foreignMarkers = []string{"RevDSG", "fedlex", "(CH)"}
|
||||||
deMarkers = []string{"BDSG", "DSK", "BfDI", "BFDI", "BayLfD", "Baylfb", "BSI"}
|
deMarkers = []string{"BDSG", "DSK", "BfDI", "BFDI", "BayLfD", "Baylfb", "BSI"}
|
||||||
@@ -54,8 +48,6 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
|
|||||||
switch {
|
switch {
|
||||||
case containsAny(hay, foreignMarkers):
|
case containsAny(hay, foreignMarkers):
|
||||||
return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
|
return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
|
||||||
case r.Category == "standard" || containsAny(hay, standardMarkers):
|
|
||||||
return authorityInfo{weight: 80, sourceClass: "technical_standard", jurisdiction: jur}
|
|
||||||
case r.Category == "guidance" || containsAny(hay, guidanceMarkers):
|
case r.Category == "guidance" || containsAny(hay, guidanceMarkers):
|
||||||
return authorityInfo{weight: 70, sourceClass: "supervisory_guidance", jurisdiction: jur}
|
return authorityInfo{weight: 70, sourceClass: "supervisory_guidance", jurisdiction: jur}
|
||||||
case r.Category == "regulation" || r.Category == "eu_recht" || normPattern.MatchString(r.ArticleLabel):
|
case r.Category == "regulation" || r.Category == "eu_recht" || normPattern.MatchString(r.ArticleLabel):
|
||||||
@@ -69,8 +61,6 @@ func sourceClassFromWeight(w int) string {
|
|||||||
switch {
|
switch {
|
||||||
case w >= 100:
|
case w >= 100:
|
||||||
return "binding_law"
|
return "binding_law"
|
||||||
case w >= 80:
|
|
||||||
return "technical_standard"
|
|
||||||
case w >= 70:
|
case w >= 70:
|
||||||
return "supervisory_guidance"
|
return "supervisory_guidance"
|
||||||
case w <= 0:
|
case w <= 0:
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ func bestBindingSemantic(results []LegalSearchResult, wantsIntent bool) float64
|
|||||||
}
|
}
|
||||||
best := 0.0
|
best := 0.0
|
||||||
for _, r := range results {
|
for _, r := range results {
|
||||||
if classifyAuthority(r).sourceClass == "binding_law" && r.Score > best {
|
if r.SourceClass == "binding_law" && r.Score > best {
|
||||||
best = r.Score
|
best = r.Score
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -120,22 +120,21 @@ func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchR
|
|||||||
qForeign := queryIsForeign(query)
|
qForeign := queryIsForeign(query)
|
||||||
wantsGuidance := queryWantsGuidance(query)
|
wantsGuidance := queryWantsGuidance(query)
|
||||||
wantsControls := queryWantsControls(query)
|
wantsControls := queryWantsControls(query)
|
||||||
bestBindingSem := bestBindingSemantic(results, wantsGuidance)
|
bestBindingSem := bestBindingSemantic(results, wantsGuidance || wantsControls)
|
||||||
|
|
||||||
out := make([]LegalSearchResult, len(results))
|
out := make([]LegalSearchResult, len(results))
|
||||||
copy(out, results)
|
copy(out, results)
|
||||||
for i := range out {
|
for i := range out {
|
||||||
out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
|
out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
|
||||||
}
|
}
|
||||||
// Explicit interpretation intent → a competitive guideline may outrank binding (lift
|
// Explicit interpretation intent → a competitive guideline may outrank binding;
|
||||||
// above the best binding FINAL). Explicit implementation intent → boost the CONTROL-POOL
|
// explicit implementation intent → a competitive technical_standard may. Both lift
|
||||||
// (operational/procedural requirement, control standard, implementation guidance) over
|
// ABOVE the best binding FINAL, so a pure norm question (neither intent) is untouched.
|
||||||
// the abstract obligation, soft-ordered by role. Norm questions (neither) stay untouched.
|
|
||||||
if wantsGuidance {
|
if wantsGuidance {
|
||||||
liftAboveBinding(out, results, bestBindingSem, "supervisory_guidance")
|
liftAboveBinding(out, results, bestBindingSem, "supervisory_guidance")
|
||||||
}
|
}
|
||||||
if wantsControls {
|
if wantsControls {
|
||||||
applyControlRoles(out)
|
liftAboveBinding(out, results, bestBindingSem, "technical_standard")
|
||||||
}
|
}
|
||||||
sort.SliceStable(out, func(a, b int) bool {
|
sort.SliceStable(out, func(a, b int) bool {
|
||||||
return out[a].Score > out[b].Score
|
return out[a].Score > out[b].Score
|
||||||
@@ -153,14 +152,12 @@ func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchR
|
|||||||
func liftAboveBinding(out, raw []LegalSearchResult, bestBindingSem float64, sourceClass string) {
|
func liftAboveBinding(out, raw []LegalSearchResult, bestBindingSem float64, sourceClass string) {
|
||||||
bestBindingFinal := 0.0
|
bestBindingFinal := 0.0
|
||||||
for i := range out {
|
for i := range out {
|
||||||
if classifyAuthority(out[i]).sourceClass == "binding_law" && out[i].Score > bestBindingFinal {
|
if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal {
|
||||||
bestBindingFinal = out[i].Score
|
bestBindingFinal = out[i].Score
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for i := range out {
|
for i := range out {
|
||||||
// Classify (not raw payload) so the untagged legacy corpus — e.g. NIST ingested
|
if out[i].SourceClass != sourceClass || raw[i].Score < bestBindingSem-intentLiftMargin {
|
||||||
// before source_class tagging — is still recognized as its interpretative class.
|
|
||||||
if classifyAuthority(out[i]).sourceClass != sourceClass || raw[i].Score < bestBindingSem-intentLiftMargin {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
lifted := bestBindingFinal + intentLiftGain + (raw[i].Score - bestBindingSem)
|
lifted := bestBindingFinal + intentLiftGain + (raw[i].Score - bestBindingSem)
|
||||||
|
|||||||
@@ -14,10 +14,6 @@ func TestClassifyAuthority(t *testing.T) {
|
|||||||
{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
|
{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
|
||||||
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
|
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
|
||||||
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
|
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
|
||||||
{"untagged NIST standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, 80, "technical_standard", "EU"},
|
|
||||||
{"BSI Grundschutz standard beats BSI guidance", LegalSearchResult{RegulationShort: "BSI Grundschutz", ArticleLabel: "BSI Grundschutz Baustein"}, 80, "technical_standard", "DE"},
|
|
||||||
{"weight-only 85 TRGS standard", LegalSearchResult{AuthorityWeight: 85, RegulationShort: "TRGS 529"}, 85, "technical_standard", "EU"},
|
|
||||||
{"tagged technical_standard", LegalSearchResult{AuthorityWeight: 80, SourceClass: "technical_standard", Jurisdiction: "EU"}, 80, "technical_standard", "EU"},
|
|
||||||
{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
|
{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
|
||||||
{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
|
{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
|
||||||
{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
|
{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
|
||||||
|
|||||||
@@ -1,123 +0,0 @@
|
|||||||
package ucca
|
|
||||||
|
|
||||||
import "strings"
|
|
||||||
|
|
||||||
// source_role is the FUNCTIONAL role of a chunk — WHAT must be done (obligation),
|
|
||||||
// HOW to implement it (operational/procedural requirement, control standard,
|
|
||||||
// implementation guidance), or how to READ the norm (interpretation/definition).
|
|
||||||
// It is ORTHOGONAL to source_class (legal authority): source_class decides RANK,
|
|
||||||
// source_role decides CONTROL-POOL membership for implementation questions.
|
|
||||||
// Derived deterministically from markers, so the untagged corpus needs no re-tag.
|
|
||||||
const (
|
|
||||||
roleObligation = "obligation" // the abstract duty (the WHAT)
|
|
||||||
roleOperationalReq = "operational_requirement" // concrete binding requirement (CRA Annex I)
|
|
||||||
roleProceduralReq = "procedural_requirement" // a process: notification/registration/DPIA/incident report
|
|
||||||
roleControlStandard = "control_standard" // best-practice control catalog (NIST/OWASP/ISO/CIS)
|
|
||||||
roleImplGuidance = "implementation_guidance" // advisory how-to (ENISA good practices, BSI)
|
|
||||||
roleInterpretation = "interpretation" // interprets the norm's MEANING (EDPB guideline)
|
|
||||||
roleDefinition = "definition" // definitions / scope / recitals
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
proceduralMarkers = []string{
|
|
||||||
"Meldung", "Meldepflicht", "Notification", "Notifizierung", "Registrierung",
|
|
||||||
"Registration", "Konformitätserklärung", "Declaration of Conformity", "Incident",
|
|
||||||
"Berichterstattung", "Reporting", "Folgenabschätzung", "DSFA", "DPIA", "Anzeigepflicht",
|
|
||||||
}
|
|
||||||
annexMarkers = []string{"Anhang", "Annex", "Appendix", "Anlage"}
|
|
||||||
operationalMarkers = []string{"Anforderung", "Requirement", "essential", "wesentliche"}
|
|
||||||
implMarkers = []string{
|
|
||||||
"Good Practice", "Best Practice", "Standards Mapping", "Umsetzung", "Implementation",
|
|
||||||
"Handreichung", "Maßnahmenkatalog", "ICS", "SCADA", "Technical Guideline", "TIG",
|
|
||||||
}
|
|
||||||
definitionMarkers = []string{"Begriffsbestimmung", "Definition"}
|
|
||||||
)
|
|
||||||
|
|
||||||
// classifyRole derives the functional source_role from chunk metadata + the authority
|
|
||||||
// class. technical_standard is always a control_standard; guidance splits into
|
|
||||||
// implementation_guidance (how-to) vs interpretation (meaning); binding splits into
|
|
||||||
// procedural / operational requirement / definition / plain obligation.
|
|
||||||
func classifyRole(r LegalSearchResult) string {
|
|
||||||
cls := classifyAuthority(r).sourceClass
|
|
||||||
hay := strings.ToLower(r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.Article)
|
|
||||||
switch {
|
|
||||||
case r.IsRecital:
|
|
||||||
return roleDefinition
|
|
||||||
case cls == "technical_standard":
|
|
||||||
return roleControlStandard
|
|
||||||
case cls == "supervisory_guidance":
|
|
||||||
if containsAnyLower(hay, implMarkers) {
|
|
||||||
return roleImplGuidance
|
|
||||||
}
|
|
||||||
return roleInterpretation
|
|
||||||
case cls == "binding_law":
|
|
||||||
switch {
|
|
||||||
case containsAnyLower(hay, definitionMarkers):
|
|
||||||
return roleDefinition
|
|
||||||
case containsAnyLower(hay, proceduralMarkers):
|
|
||||||
return roleProceduralReq
|
|
||||||
case containsAnyLower(hay, annexMarkers) || containsAnyLower(hay, operationalMarkers):
|
|
||||||
return roleOperationalReq
|
|
||||||
default:
|
|
||||||
return roleObligation
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return roleObligation
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// controlRoleBonus is the soft intra-pool preference (User 2026-06-24):
|
|
||||||
// operational_requirement > procedural_requirement > control_standard > implementation_guidance.
|
|
||||||
var controlRoleBonus = map[string]float64{
|
|
||||||
roleOperationalReq: 0.100,
|
|
||||||
roleProceduralReq: 0.075,
|
|
||||||
roleControlStandard: 0.050,
|
|
||||||
roleImplGuidance: 0.000,
|
|
||||||
}
|
|
||||||
|
|
||||||
// controlPoolGain lifts EVERY control-pool role over the non-control roles (obligation/
|
|
||||||
// interpretation/definition) on an implementation question, so the binding abstract
|
|
||||||
// obligation does not dominate by authority alone. The obligation is not removed — it
|
|
||||||
// stays visible as "Rechtsgrundlage" context below the recommended measures.
|
|
||||||
const controlPoolGain = 0.15
|
|
||||||
|
|
||||||
// applyControlRoles boosts the control-pool (the four implementation roles) for an
|
|
||||||
// EXPLICIT implementation question, soft-ordered op_req > procedural > standard > guidance.
|
|
||||||
// Replaces the earlier "lift technical_standard above binding" — controls are not only
|
|
||||||
// technical_standard, and the binding operational_requirement (e.g. CRA Annex I) should win.
|
|
||||||
func applyControlRoles(out []LegalSearchResult) {
|
|
||||||
for i := range out {
|
|
||||||
if bonus, ok := controlRoleBonus[classifyRole(out[i])]; ok {
|
|
||||||
out[i].Score += controlPoolGain + bonus
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// isControlPoolRole reports whether a role belongs to the control-pool surfaced on
|
|
||||||
// implementation questions (the four "how to implement" roles).
|
|
||||||
func isControlPoolRole(role string) bool {
|
|
||||||
switch role {
|
|
||||||
case roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// controlRoleOf classifies a raw Qdrant payload into a source_role, so searchControls can
|
|
||||||
// filter its deep dense pull to the control-pool BEFORE hits are mapped to LegalSearchResult.
|
|
||||||
func controlRoleOf(payload map[string]interface{}) string {
|
|
||||||
article := getString(payload, "article")
|
|
||||||
if article == "" {
|
|
||||||
article = getString(payload, "section")
|
|
||||||
}
|
|
||||||
return classifyRole(LegalSearchResult{
|
|
||||||
RegulationShort: getString(payload, "regulation_short"),
|
|
||||||
RegulationName: getString(payload, "regulation_name_de"),
|
|
||||||
ArticleLabel: getString(payload, "article_label"),
|
|
||||||
Article: article,
|
|
||||||
Category: getString(payload, "category"),
|
|
||||||
SourceClass: getString(payload, "source_class"),
|
|
||||||
AuthorityWeight: getInt(payload, "authority_weight"),
|
|
||||||
IsRecital: getBool(payload, "is_recital"),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
package ucca
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
func TestClassifyRole(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
r LegalSearchResult
|
|
||||||
want string
|
|
||||||
}{
|
|
||||||
{"NIST -> control_standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, roleControlStandard},
|
|
||||||
{"OWASP -> control_standard", LegalSearchResult{RegulationShort: "OWASP ASVS"}, roleControlStandard},
|
|
||||||
{"CRA Anhang -> operational_requirement", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "CRA Anhang I", Category: "regulation"}, roleOperationalReq},
|
|
||||||
{"CRA Meldepflicht -> procedural_requirement", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 14 CRA Meldepflicht", Category: "regulation"}, roleProceduralReq},
|
|
||||||
{"ENISA Good Practices -> implementation_guidance", LegalSearchResult{RegulationShort: "ENISA Supply Chain Good Practices"}, roleImplGuidance},
|
|
||||||
{"EDPB Leitlinie -> interpretation", LegalSearchResult{RegulationShort: "EDPB DPO", ArticleLabel: "WP243 Leitlinien Datenschutzbeauftragte"}, roleInterpretation},
|
|
||||||
{"DORA article -> obligation", LegalSearchResult{RegulationShort: "DORA", ArticleLabel: "Art. 5 DORA", Category: "regulation"}, roleObligation},
|
|
||||||
{"DSGVO Begriffsbestimmungen -> definition", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 4 DSGVO Begriffsbestimmungen", Category: "regulation"}, roleDefinition},
|
|
||||||
{"recital -> definition", LegalSearchResult{RegulationShort: "CRA", IsRecital: true}, roleDefinition},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
if got := classifyRole(tt.r); got != tt.want {
|
|
||||||
t.Errorf("classifyRole() = %q, want %q", got, tt.want)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestApplyControlRoles_PoolPreference(t *testing.T) {
|
|
||||||
// op_req > procedural > control_standard > impl_guidance; non-control roles get no boost.
|
|
||||||
roles := []struct {
|
|
||||||
r LegalSearchResult
|
|
||||||
wantGain float64
|
|
||||||
}{
|
|
||||||
{LegalSearchResult{ArticleLabel: "CRA Anhang I", Category: "regulation"}, controlPoolGain + 0.100},
|
|
||||||
{LegalSearchResult{ArticleLabel: "Art. 14 CRA Meldepflicht", Category: "regulation"}, controlPoolGain + 0.075},
|
|
||||||
{LegalSearchResult{RegulationShort: "NIST SP 800-53"}, controlPoolGain + 0.050},
|
|
||||||
{LegalSearchResult{RegulationShort: "ENISA Good Practices"}, controlPoolGain + 0.000},
|
|
||||||
{LegalSearchResult{ArticleLabel: "Art. 5 DORA", Category: "regulation"}, 0.0}, // obligation: no boost
|
|
||||||
}
|
|
||||||
for _, rc := range roles {
|
|
||||||
out := []LegalSearchResult{rc.r}
|
|
||||||
out[0].Score = 1.0
|
|
||||||
applyControlRoles(out)
|
|
||||||
if got := out[0].Score - 1.0; got < rc.wantGain-1e-9 || got > rc.wantGain+1e-9 {
|
|
||||||
t.Errorf("role %q: gain %.3f, want %.3f", classifyRole(rc.r), got, rc.wantGain)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsControlPoolRole(t *testing.T) {
|
|
||||||
for _, r := range []string{roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance} {
|
|
||||||
if !isControlPoolRole(r) {
|
|
||||||
t.Errorf("%q should be in the control-pool", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, r := range []string{roleObligation, roleInterpretation, roleDefinition} {
|
|
||||||
if isControlPoolRole(r) {
|
|
||||||
t.Errorf("%q should NOT be in the control-pool", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestControlRoleOf_Payload(t *testing.T) {
|
|
||||||
// searchControls filters its deep dense pull by classifying the raw Qdrant payload.
|
|
||||||
nist := map[string]interface{}{"regulation_short": "NIST SP 800-82r3", "article": "AU-8"}
|
|
||||||
if got := controlRoleOf(nist); got != roleControlStandard {
|
|
||||||
t.Errorf("untagged NIST payload role = %q, want control_standard", got)
|
|
||||||
}
|
|
||||||
craAnnex := map[string]interface{}{"regulation_short": "CRA", "article": "Anhang-I", "category": "regulation"}
|
|
||||||
if got := controlRoleOf(craAnnex); got != roleOperationalReq {
|
|
||||||
t.Errorf("CRA Anhang payload role = %q, want operational_requirement", got)
|
|
||||||
}
|
|
||||||
dora := map[string]interface{}{"regulation_short": "DORA", "article_label": "Art. 5 DORA", "category": "regulation"}
|
|
||||||
if got := controlRoleOf(dora); isControlPoolRole(got) {
|
|
||||||
t.Errorf("DORA abstract article role = %q must be excluded from the control-pool", got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -107,15 +107,6 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
|||||||
hits = mergeDedupHits(hits, bindingHits)
|
hits = mergeDedupHits(hits, bindingHits)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Control-Augmentation: bei expliziter Umsetzungsfrage einen tiefen dense-Pool ziehen und
|
|
||||||
// nur die Control-Pool-Rollen behalten — so werden NIST/CRA-Anhang (dense rank ~8-9, unter
|
|
||||||
// dem kleinen top-K) Kandidaten. Re-Rank/applyControlRoles ordnen sie danach.
|
|
||||||
if queryWantsControls(query) {
|
|
||||||
if controlHits, cErr := c.searchControls(ctx, collection, embedding); cErr == nil {
|
|
||||||
hits = mergeDedupHits(hits, controlHits)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Graph-Augmentation: verbundene Normen (references_out/in) der Top-Hits ueber die
|
// Graph-Augmentation: verbundene Normen (references_out/in) der Top-Hits ueber die
|
||||||
// praezise Zitations-Kante in den Pool ziehen — z.B. Art. 13 CRA zieht Anhang I (die
|
// praezise Zitations-Kante in den Pool ziehen — z.B. Art. 13 CRA zieht Anhang I (die
|
||||||
// eigentliche Pflichtquelle). Pool-Augmentation only; Re-Rank + topK bleiben.
|
// eigentliche Pflichtquelle). Pool-Augmentation only; Re-Rank + topK bleiben.
|
||||||
|
|||||||
@@ -204,34 +204,6 @@ func (c *LegalRAGClient) searchBinding(ctx context.Context, collection string, e
|
|||||||
return c.doPointsSearch(ctx, collection, searchReq)
|
return c.doPointsSearch(ctx, collection, searchReq)
|
||||||
}
|
}
|
||||||
|
|
||||||
// controlPoolDepth is how deep the dense control pull reaches. Measured: for an EU-cyber
|
|
||||||
// control query the relevant control sources sit at dense rank ~8-9 (NIST, CRA Annex), far
|
|
||||||
// below the client's small top-K — so a fixed dense depth of 60 reliably surfaces them.
|
|
||||||
const controlPoolDepth = 60
|
|
||||||
|
|
||||||
// searchControls fetches a DEEP dense pool and keeps only the control-pool roles, so control
|
|
||||||
// sources that the small top-K (hybrid) search misses become candidates on an implementation
|
|
||||||
// question. Role is derived in code (no source_role tag needed). AUGMENTS the pool — the
|
|
||||||
// caller gates it on control-intent.
|
|
||||||
func (c *LegalRAGClient) searchControls(ctx context.Context, collection string, embedding []float64) ([]qdrantSearchHit, error) {
|
|
||||||
searchReq := qdrantSearchRequest{
|
|
||||||
Vector: embedding,
|
|
||||||
Limit: controlPoolDepth,
|
|
||||||
WithPayload: true,
|
|
||||||
}
|
|
||||||
hits, err := c.doPointsSearch(ctx, collection, searchReq)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
kept := make([]qdrantSearchHit, 0, len(hits))
|
|
||||||
for _, h := range hits {
|
|
||||||
if isControlPoolRole(controlRoleOf(h.Payload)) {
|
|
||||||
kept = append(kept, h)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return kept, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// doPointsSearch issues a POST /points/search and decodes the hits.
|
// doPointsSearch issues a POST /points/search and decodes the hits.
|
||||||
func (c *LegalRAGClient) doPointsSearch(ctx context.Context, collection string, searchReq qdrantSearchRequest) ([]qdrantSearchHit, error) {
|
func (c *LegalRAGClient) doPointsSearch(ctx context.Context, collection string, searchReq qdrantSearchRequest) ([]qdrantSearchHit, error) {
|
||||||
jsonBody, err := json.Marshal(searchReq)
|
jsonBody, err := json.Marshal(searchReq)
|
||||||
|
|||||||
@@ -96,21 +96,20 @@ func TestQueryWantsControls(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRerank_ControlQuestion_OperationalReqTop(t *testing.T) {
|
func TestRerank_ControlQuestion_StandardMayWin(t *testing.T) {
|
||||||
// User priority for implementation questions: operational_requirement (binding concrete,
|
// Explicit implementation intent + standard semantically competitive → standard wins.
|
||||||
// CRA Anhang I) > control_standard (NIST). Both are in the control-pool; op_req wins.
|
|
||||||
results := []LegalSearchResult{
|
results := []LegalSearchResult{
|
||||||
{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8", SourceClass: "technical_standard", AuthorityWeight: 80, Jurisdiction: "EU", Score: 0.60},
|
intentRes("NIST SP 800-82", "technical_standard", 0.62, 80),
|
||||||
{RegulationShort: "CRA", ArticleLabel: "CRA Anhang I", Category: "regulation", Score: 0.58},
|
intentRes("CRA", "binding_law", 0.58, 100),
|
||||||
}
|
}
|
||||||
out := rerankByAuthority("Welche Controls und Massnahmen passen zu Security Updates?", results)
|
out := rerankByAuthority("Welche Controls passen zu Security Updates?", results)
|
||||||
if out[0].RegulationShort != "CRA" {
|
if out[0].SourceClass != "technical_standard" {
|
||||||
t.Errorf("operational_requirement (CRA Anhang I) should be Top-1 over control_standard, got %q", out[0].RegulationShort)
|
t.Errorf("control question: technical_standard should win Top-1, got %s", out[0].SourceClass)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRerank_NormQuestion_BindingOverStandard(t *testing.T) {
|
func TestRerank_NormQuestion_BindingOverStandard(t *testing.T) {
|
||||||
// "Anforderungen" → no control intent → binding obligation stays Top-1 over the standard.
|
// "Anforderungen" → no control intent → binding stays Top-1 over the standard.
|
||||||
results := []LegalSearchResult{
|
results := []LegalSearchResult{
|
||||||
intentRes("NIST SP 800-82", "technical_standard", 0.62, 80),
|
intentRes("NIST SP 800-82", "technical_standard", 0.62, 80),
|
||||||
intentRes("CRA", "binding_law", 0.58, 100),
|
intentRes("CRA", "binding_law", 0.58, 100),
|
||||||
@@ -121,15 +120,15 @@ func TestRerank_NormQuestion_BindingOverStandard(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRerank_ControlQuestion_PoolBeatsBareObligation(t *testing.T) {
|
func TestRerank_OffTopicStandard_BlockedByGuard(t *testing.T) {
|
||||||
// A control-pool source (NIST control_standard) outranks an abstract obligation with no
|
// Control intent present, but the standard is semantically far below binding →
|
||||||
// domain/topic advantage, because the implementation intent boosts the control-pool.
|
// the margin guard keeps binding Top-1 (no off-topic standard override).
|
||||||
results := []LegalSearchResult{
|
results := []LegalSearchResult{
|
||||||
{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8", SourceClass: "technical_standard", AuthorityWeight: 80, Jurisdiction: "EU", Score: 0.55},
|
intentRes("NIST SP 800-82", "technical_standard", 0.40, 80),
|
||||||
{RegulationShort: "XYZ", ArticleLabel: "Art. 5 XYZ", Category: "regulation", Score: 0.58},
|
intentRes("CRA", "binding_law", 0.58, 100),
|
||||||
}
|
}
|
||||||
out := rerankByAuthority("Welche Controls und Massnahmen passen zu Security Updates?", results)
|
out := rerankByAuthority("Welche Controls passen zu Security Updates?", results)
|
||||||
if out[0].RegulationShort != "NIST SP 800-82r3" {
|
if out[0].SourceClass != "binding_law" {
|
||||||
t.Errorf("control_standard should beat a bare abstract obligation on a control question, got %q", out[0].RegulationShort)
|
t.Errorf("off-topic standard must not win even with control intent, got %s", out[0].SourceClass)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user