ci(go-lint): golangci-lint v1.64.8 (go1.24) + new-from-merge-base (#32 )

feat(ai-sdk): authority-aware re-ranking for legal RAG (Phase 1) (#31 )
Merge pull request 'feat: wire breakpilot-compliance to Infisical for local dev' (#30 ) from feat/infisical-secrets into main
2026-06-23 10:58:48 +00:00 · 2026-06-23 09:30:52 +00:00 · 2026-06-22 19:12:54 +00:00
11 changed files with 649 additions and 9 deletions
@@ -136,12 +136,14 @@ jobs:
    runs-on: docker
    needs: detect-changes
    if: github.event_name == 'pull_request' && needs.detect-changes.outputs.sdk == 'true'
-    container: golangci/golangci-lint:v1.62-alpine
+    container: golangci/golangci-lint:v1.64.8-alpine
    steps:
      - name: Checkout
        run: |
          apk add --no-cache git
-          git clone --depth 1 --branch ${GITHUB_HEAD_REF:-${GITHUB_REF_NAME}} ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git .
+          # Full clone so `main` is a local ref — new-from-merge-base needs the merge base.
          git clone ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git .
          git checkout ${GITHUB_HEAD_REF:-${GITHUB_REF_NAME}}
      - name: Lint ai-compliance-sdk
        run: |
          [ -d "ai-compliance-sdk" ] || exit 0
@@ -55,8 +55,7 @@ linters-settings:
    rules:
      - name: exported
        arguments:
-          - checkPrivateReceivers: false
+          - disableStutteringCheck
          - disableStutteringCheck: true
      - name: error-return
      - name: increment-decrement
      - name: var-declaration
@@ -83,6 +82,6 @@ issues:
  max-issues-per-linter: 50
  max-same-issues: 5
-  # New code only: don't fail on pre-existing issues in files we haven't touched.
+  # New code only: lint lines changed vs main, so pre-existing debt doesn't fail CI.
-  # Remove this once a clean baseline is established.
+  # Needs the go-lint job to clone with a local `main` ref (see .gitea/workflows/ci.yaml).
-  new: false
+  new-from-merge-base: main
@@ -0,0 +1,220 @@
 package ucca
 import (
 	"regexp"
 	"strconv"
 	"strings"
 )
 // authorityInfo is the normative classification of a search result, used internally
 // for re-ranking only (Phase 1 changes ordering, not the response contract).
 type authorityInfo struct {
 	weight       int    // 100 binding_law, 70 guidance, 0 foreign_law, 50 unknown
 	sourceClass  string // binding_law | supervisory_guidance | foreign_law | unknown
 	jurisdiction string // DE | EU | CH
 }
 var (
 	guidanceMarkers = []string{
 		"DSK", "EDPB", "BfDI", "BFDI", "BayLfD", "Baylfb", "ENISA", "BSI", "EUCC",
 		"Standards Mapping", "Kpnr", "Orientierungshilfe", "Handreichung", "Beschluss",
 		"Leitlinie", "Guidance", "Empfehlung", "NIST", "OECD", "CISA", "Blue Guide",
 	}
 	foreignMarkers = []string{"RevDSG", "fedlex", "(CH)"}
 	deMarkers      = []string{"BDSG", "DSK", "BfDI", "BFDI", "BayLfD", "Baylfb", "BSI"}
 	normPattern    = regexp.MustCompile(`(§|Art\.?)\s*\d`)
 	bdsgParagraph  = regexp.MustCompile(`§\s*(\d+)`)
 )
 // classifyAuthority derives weight/source-class/jurisdiction. Explicitly tagged payload
 // values win; otherwise it falls back to the curated category + name markers, so the
 // not-yet-re-ingested (untagged) corpus is still classified deterministically.
 func classifyAuthority(r LegalSearchResult) authorityInfo {
 	jur := r.Jurisdiction
 	if jur == "" {
 		jur = inferJurisdiction(r)
 	}
 	if r.SourceClass != "" {
 		w := r.AuthorityWeight
 		if w == 0 && r.SourceClass == "binding_law" {
 			w = 100
 		}
 		return authorityInfo{weight: w, sourceClass: r.SourceClass, jurisdiction: jur}
 	}
 	if r.AuthorityWeight > 0 {
 		return authorityInfo{weight: r.AuthorityWeight, sourceClass: sourceClassFromWeight(r.AuthorityWeight), jurisdiction: jur}
 	}
 	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName + " " + r.RegulationCode
 	switch {
 	case containsAny(hay, foreignMarkers):
 		return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
 	case r.Category == "guidance" || containsAny(hay, guidanceMarkers):
 		return authorityInfo{weight: 70, sourceClass: "supervisory_guidance", jurisdiction: jur}
 	case r.Category == "regulation" || r.Category == "eu_recht" || normPattern.MatchString(r.ArticleLabel):
 		return authorityInfo{weight: 100, sourceClass: "binding_law", jurisdiction: jur}
 	default:
 		return authorityInfo{weight: 50, sourceClass: "unknown", jurisdiction: jur}
 	}
 }
 func sourceClassFromWeight(w int) string {
 	switch {
 	case w >= 100:
 		return "binding_law"
 	case w >= 70:
 		return "supervisory_guidance"
 	case w <= 0:
 		return "foreign_law"
 	default:
 		return "unknown"
 	}
 }
 func inferJurisdiction(r LegalSearchResult) string {
 	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationName
 	switch {
 	case containsAny(hay, foreignMarkers):
 		return "CH"
 	case strings.Contains(hay, "§") || containsAny(hay, deMarkers):
 		return "DE"
 	default:
 		return "EU"
 	}
 }
 // --- Domain routing: separates same-authority but topically foreign norms ---
 type domainDef struct {
 	name     string
 	regs     []string // regulation markers found in a chunk
 	keywords []string // query keywords that signal this domain
 }
 // Deterministic order (slice, not map) — important for stable classification + tests.
 var domains = []domainDef{
 	{"data_protection",
 		[]string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
 		[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
 			"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeiter"}},
 	{"cyber",
 		[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
 		[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
 			"cybersicherheit", "konformit", "hersteller", "importeur", "haendler", "händler", "ikt-",
 			"resilienz", "sicherheitsvorfall", "digitalen elementen"}},
 	{"ai",
 		[]string{"AI Act", "KI-VO", "KI-Verordnung"},
 		[]string{"ki-system", "ki-modell", "hochrisiko", "kuenstliche intelligenz", "künstliche intelligenz"}},
 	{"product_safety",
 		[]string{"Maschinenverordnung", "MaschinenVO", "GPSR", "RED", "MDR"},
 		nil},
 }
 func queryDomain(query string) string {
 	ql := strings.ToLower(query)
 	for _, d := range domains {
 		for _, kw := range d.keywords {
 			if strings.Contains(ql, kw) {
 				return d.name
 			}
 		}
 	}
 	return ""
 }
 func chunkDomain(r LegalSearchResult) string {
 	hay := r.ArticleLabel + " " + r.RegulationShort + " " + r.RegulationCode + " " + r.RegulationName
 	for _, d := range domains {
 		if containsAny(hay, d.regs) {
 			return d.name
 		}
 	}
 	return ""
 }
 // scopeClass flags special sub-regimes that must not win general questions —
 // BDSG Teil 3 (§§ 45-84) implements the JI directive (law enforcement), not the general regime.
 func scopeClass(r LegalSearchResult) string {
 	hay := r.ArticleLabel + " " + r.RegulationShort
 	if strings.Contains(hay, "BDSG") {
 		if m := bdsgParagraph.FindStringSubmatch(hay); m != nil {
 			if n, err := strconv.Atoi(m[1]); err == nil && n >= 45 && n <= 84 {
 				return "law_enforcement"
 			}
 		}
 	}
 	return "general"
 }
 // --- Topic ontology: amplifier only (boost), never an override ---
 type topicDef struct {
 	keywords []string
 	norms    []string // preferred canonical citation fragments
 }
 var topics = []topicDef{
 	{[]string{"datenschutzbeauftrag", "dsb", "benennung"}, []string{"Art. 37", "§ 38 BDSG"}},
 	{[]string{"stellung des"}, []string{"Art. 38"}},
 	{[]string{"aufgaben des"}, []string{"Art. 39"}},
 	{[]string{"folgenabsch", "dsfa"}, []string{"Art. 35"}},
 	{[]string{"besondere kategorien"}, []string{"Art. 9", "§ 22 BDSG"}},
 	{[]string{"auskunft"}, []string{"Art. 15", "§ 34 BDSG"}},
 	{[]string{"loesch", "lösch"}, []string{"Art. 17", "§ 35 BDSG"}},
 	{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
 	{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
 	{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
 }
 // resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
 func resultMatchesTopic(query string, r LegalSearchResult) bool {
 	ql := strings.ToLower(query)
 	hay := r.ArticleLabel + " " + r.RegulationShort
 	for _, t := range topics {
 		if !containsAnyLower(ql, t.keywords) {
 			continue
 		}
 		for _, n := range t.norms {
 			if normMatches(hay, n) {
 				return true
 			}
 		}
 	}
 	return false
 }
 // normMatches checks that norm appears in hay with a non-digit boundary, so "Art. 9"
 // matches "Art. 9 DSGVO" but not "Art. 90".
 func normMatches(hay, norm string) bool {
 	idx := strings.Index(hay, norm)
 	if idx < 0 {
 		return false
 	}
 	end := idx + len(norm)
 	if end < len(hay) && hay[end] >= '0' && hay[end] <= '9' {
 		return false
 	}
 	return true
 }
 func queryIsForeign(query string) bool {
 	return containsAnyLower(strings.ToLower(query),
 		[]string{"schweiz", "revdsg", "fedlex", " ch ", "oesterreich", "österreich"})
 }
 func containsAny(hay string, markers []string) bool {
 	for _, m := range markers {
 		if strings.Contains(hay, m) {
 			return true
 		}
 	}
 	return false
 }
 func containsAnyLower(haylower string, markers []string) bool {
 	for _, m := range markers {
 		if strings.Contains(haylower, strings.ToLower(m)) {
 			return true
 		}
 	}
 	return false
 }
@@ -0,0 +1,68 @@
 package ucca
 import "sort"
 // Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative).
 const (
 	authorityCoef    = 0.40 // * weight/100
 	jurisdictionGain = 0.05 // binding/guidance from DE or EU
 	foreignPenalty   = 0.60 // foreign law on a DE/EU question (demoted, not removed)
 	unknownPenalty   = 0.08
 	domainMatchGain  = 0.15
 	offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
 	scopePenalty     = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
 	topicGain        = 0.18 // amplifier only
 )
 // authorityScore computes the normative relevance of a result for a query. It augments the
 // semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests.
 func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 {
 	info := classifyAuthority(r)
 	score := r.Score + authorityCoef*float64(info.weight)/100.0
 	if info.jurisdiction == "CH" && !qForeign {
 		score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht
 	} else {
 		score += jurisdictionGain
 	}
 	if info.sourceClass == "unknown" {
 		score -= unknownPenalty
 	}
 	if qDomain != "" {
 		switch cd := chunkDomain(r); {
 		case cd == qDomain:
 			score += domainMatchGain
 		case cd != "":
 			score -= offDomainPenalty // off-domain binding: demoted, nicht geloescht
 		}
 	}
 	if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
 		score -= scopePenalty
 	}
 	if resultMatchesTopic(query, r) {
 		score += topicGain // Verstaerker, kein Override
 	}
 	return score
 }
 // rerankByAuthority re-orders results so binding law from the matching jurisdiction/domain
 // ranks above guidance, foreign and off-domain law — WITHOUT dropping anything (guidance is
 // kept as interpretation context). The computed score is written back to Score so downstream
 // merges (e.g. the multi-collection advisor) preserve this order. Pure + deterministic.
 func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchResult {
 	if len(results) < 2 {
 		return results
 	}
 	qDomain := queryDomain(query)
 	qForeign := queryIsForeign(query)
 	out := make([]LegalSearchResult, len(results))
 	copy(out, results)
 	for i := range out {
 		out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
 	}
 	sort.SliceStable(out, func(a, b int) bool {
 		return out[a].Score > out[b].Score
 	})
 	return out
 }
@@ -0,0 +1,96 @@
 package ucca
 import "testing"
 func bindingRes(label, reg, jur string, score float64) LegalSearchResult {
 	return LegalSearchResult{ArticleLabel: label, RegulationShort: reg, SourceClass: "binding_law", AuthorityWeight: 100, Jurisdiction: jur, Score: score}
 }
 func guidanceRes(label, reg string, score float64) LegalSearchResult {
 	return LegalSearchResult{ArticleLabel: label, RegulationShort: reg, SourceClass: "supervisory_guidance", AuthorityWeight: 70, Jurisdiction: "EU", Score: score}
 }
 func foreignRes(label string, score float64) LegalSearchResult {
 	return LegalSearchResult{ArticleLabel: label, RegulationShort: "RevDSG", SourceClass: "foreign_law", AuthorityWeight: 0, Jurisdiction: "CH", Score: score}
 }
 // Acceptance criteria (Phase 1) expressed as ordering tests.
 func TestRerankByAuthority_Acceptance(t *testing.T) {
 	t.Run("guidance does not overtake semantically competitive binding", func(t *testing.T) {
 		out := rerankByAuthority("Was gilt hier?", []LegalSearchResult{
 			guidanceRes("ENISA Mapping", "ENISA", 0.72),
 			bindingRes("CRA Anhang I", "CRA", "EU", 0.66),
 		})
 		if out[0].RegulationShort != "CRA" {
 			t.Fatalf("binding must rank first over competitive guidance, got %q", out[0].RegulationShort)
 		}
 	})
 	t.Run("foreign law demoted on DE/EU question but kept", func(t *testing.T) {
 		in := []LegalSearchResult{foreignRes("RevDSG Art 1", 0.85), bindingRes("Art. 9 DSGVO", "DSGVO", "EU", 0.62)}
 		out := rerankByAuthority("Welche Daten sind besonders geschuetzt?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("binding EU must beat foreign on a DE/EU query, got %q", out[0].RegulationShort)
 		}
 		if len(out) != 2 {
 			t.Fatalf("foreign law must be kept, got len=%d", len(out))
 		}
 	})
 	t.Run("off-domain binding demoted but not removed", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("Art. 13 EU MDR", "MDR", "EU", 0.70),
 			bindingRes("Art. 13 CRA", "CRA", "EU", 0.60),
 		}
 		out := rerankByAuthority("Welche Pflichten hat der Hersteller von Produkten mit digitalen Elementen?", in)
 		if out[0].RegulationShort != "CRA" {
 			t.Fatalf("on-domain CRA must beat off-domain MDR, got %q", out[0].RegulationShort)
 		}
 		if len(out) != 2 {
 			t.Fatalf("off-domain MDR must be kept, got len=%d", len(out))
 		}
 	})
 	t.Run("same-regime binding wins over guidance", func(t *testing.T) {
 		out := rerankByAuthority("Was gilt hier?", []LegalSearchResult{
 			bindingRes("Art. 13 CRA", "CRA", "EU", 0.70),
 			guidanceRes("ENISA Mapping", "ENISA", 0.60),
 		})
 		if out[0].RegulationShort != "CRA" {
 			t.Fatalf("binding must win, got %q", out[0].RegulationShort)
 		}
 	})
 	t.Run("BDSG Teil 3 demoted below DSGVO on general DP question", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 48 BDSG", "BDSG", "DE", 0.70), // Teil 3 (law enforcement)
 			bindingRes("Art. 9 DSGVO", "DSGVO", "EU", 0.62),
 		}
 		out := rerankByAuthority("Was sind besondere Kategorien personenbezogener Daten?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("DSGVO must beat BDSG Teil 3 on a general DP question, got %q", out[0].RegulationShort)
 		}
 	})
 	t.Run("nothing is dropped and topic amplifies", func(t *testing.T) {
 		in := []LegalSearchResult{
 			guidanceRes("ENISA", "ENISA", 0.72),
 			bindingRes("CRA Anhang I", "CRA", "EU", 0.66),
 			foreignRes("RevDSG", 0.5),
 		}
 		out := rerankByAuthority("Anforderungen an Security Updates?", in)
 		if len(out) != len(in) {
 			t.Fatalf("rerank must preserve all results, got %d want %d", len(out), len(in))
 		}
 		if out[0].ArticleLabel != "CRA Anhang I" {
 			t.Fatalf("topic+authority must lift CRA Anhang I to top, got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("single result returned unchanged", func(t *testing.T) {
 		in := []LegalSearchResult{bindingRes("Art. 1 CRA", "CRA", "EU", 0.5)}
 		if out := rerankByAuthority("x", in); len(out) != 1 {
 			t.Fatalf("len=%d", len(out))
 		}
 	})
 }
@@ -0,0 +1,125 @@
 package ucca
 import "testing"
 func TestClassifyAuthority(t *testing.T) {
 	tests := []struct {
 		name    string
 		result  LegalSearchResult
 		wantW   int
 		wantSC  string
 		wantJur string
 	}{
 		{"tagged binding EU", LegalSearchResult{AuthorityWeight: 100, SourceClass: "binding_law", Jurisdiction: "EU"}, 100, "binding_law", "EU"},
 		{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
 		{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
 		{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
 		{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
 		{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
 		{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
 		{"untagged unknown", LegalSearchResult{RegulationShort: "", ArticleLabel: ""}, 50, "unknown", "EU"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := classifyAuthority(tt.result)
 			if got.weight != tt.wantW || got.sourceClass != tt.wantSC || got.jurisdiction != tt.wantJur {
 				t.Errorf("classifyAuthority() = {%d %s %s}, want {%d %s %s}",
 					got.weight, got.sourceClass, got.jurisdiction, tt.wantW, tt.wantSC, tt.wantJur)
 			}
 		})
 	}
 }
 func TestQueryDomain(t *testing.T) {
 	tests := []struct{ q, want string }{
 		{"Welche Anforderungen an Security Updates?", "cyber"},
 		{"Wer braucht einen Datenschutzbeauftragten?", "data_protection"},
 		{"Was sind besondere Kategorien personenbezogener Daten?", "data_protection"},
 		{"Welche Pflichten beim Hochrisiko-KI-System?", "ai"},
 		{"Wie spaet ist es?", ""},
 	}
 	for _, tt := range tests {
 		if got := queryDomain(tt.q); got != tt.want {
 			t.Errorf("queryDomain(%q) = %q, want %q", tt.q, got, tt.want)
 		}
 	}
 }
 func TestChunkDomain(t *testing.T) {
 	tests := []struct {
 		name string
 		r    LegalSearchResult
 		want string
 	}{
 		{"CRA cyber", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA"}, "cyber"},
 		{"DSGVO dp", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "data_protection"},
 		{"AI Act ai", LegalSearchResult{RegulationShort: "AI Act", ArticleLabel: "Art. 10 AI Act"}, "ai"},
 		{"MDR product", LegalSearchResult{RegulationShort: "MDR", ArticleLabel: "Art. 13 EU MDR"}, "product_safety"},
 		{"unknown", LegalSearchResult{RegulationShort: "XYZ"}, ""},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := chunkDomain(tt.r); got != tt.want {
 				t.Errorf("chunkDomain() = %q, want %q", got, tt.want)
 			}
 		})
 	}
 }
 func TestScopeClass(t *testing.T) {
 	tests := []struct {
 		name string
 		r    LegalSearchResult
 		want string
 	}{
 		{"BDSG Teil 3 law enforcement", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 48 BDSG"}, "law_enforcement"},
 		{"BDSG general part", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, "general"},
 		{"DSGVO general", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "general"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := scopeClass(tt.r); got != tt.want {
 				t.Errorf("scopeClass() = %q, want %q", got, tt.want)
 			}
 		})
 	}
 }
 func TestResultMatchesTopic(t *testing.T) {
 	tests := []struct {
 		name  string
 		query string
 		r     LegalSearchResult
 		want  bool
 	}{
 		{"besondere Kategorien -> Art 9 match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, true},
 		{"besondere Kategorien -> Art 90 no match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 90 DSGVO"}, false},
 		{"security updates -> CRA Anhang I", "Anforderungen an Security Updates?", LegalSearchResult{ArticleLabel: "CRA Anhang I"}, true},
 		{"no topic keyword", "Wie spaet ist es?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := resultMatchesTopic(tt.query, tt.r); got != tt.want {
 				t.Errorf("resultMatchesTopic() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
 func TestNormMatches(t *testing.T) {
 	tests := []struct {
 		hay, norm string
 		want      bool
 	}{
 		{"Art. 9 DSGVO", "Art. 9", true},
 		{"Art. 90 DSGVO", "Art. 9", false},
 		{"§ 38 BDSG", "§ 38 BDSG", true},
 		{"§ 380 BDSG", "§ 38", false},
 		{"Art. 14 CRA", "Art. 14 CRA", true},
 	}
 	for _, tt := range tests {
 		if got := normMatches(tt.hay, tt.norm); got != tt.want {
 			t.Errorf("normMatches(%q,%q) = %v, want %v", tt.hay, tt.norm, got, tt.want)
 		}
 	}
 }
@@ -93,6 +93,13 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
 		hits = denseHits
 	}
 	// Stratified: den binding_law-Pool ERGAENZEN (nicht ersetzen), damit die Pflichtquelle
 	// immer Kandidat ist — Guidance bleibt als Auslegungskontext erhalten. Best-effort:
 	// Fehler beim Binding-Query degradieren still auf den semantischen Pool.
 	if bindingHits, bErr := c.searchBinding(ctx, collection, embedding, topK); bErr == nil {
 		hits = mergeDedupHits(hits, bindingHits)
 	}
 	results := make([]LegalSearchResult, len(hits))
 	for i, hit := range hits {
 		// Legal-Metadaten nach rag_reingest_spec.md §2: bevorzugt die normalisierten Felder
@@ -121,12 +128,41 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
 			Pages:           getIntSlice(hit.Payload, "pages"),
 			SourceURL:       getString(hit.Payload, "source"),
 			Score:           hit.Score,
 			AuthorityWeight: getInt(hit.Payload, "authority_weight"),
 			SourceClass:     getString(hit.Payload, "source_class"),
 			Jurisdiction:    getString(hit.Payload, "jurisdiction"),
 		}
 	}
 	// Authority-aware Re-Ranking: bindendes Recht der passenden Jurisdiktion/Domaene nach
 	// oben, Guidance/Fremdrecht/Off-Domain runter (nichts wird geloescht). Reihenfolge only,
 	// Response-Schema unveraendert. Score traegt den Authority-Score, damit nachgelagerte
 	// Multi-Collection-Merges (Advisor) die Ordnung bewahren.
 	results = rerankByAuthority(query, results)
 	if topK > 0 && len(results) > topK {
 		results = results[:topK]
 	}
 	return results, nil
 }
 // mergeDedupHits concatenates two hit lists, keeping the first occurrence of each point ID.
 func mergeDedupHits(primary, extra []qdrantSearchHit) []qdrantSearchHit {
 	seen := make(map[string]bool, len(primary)+len(extra))
 	out := make([]qdrantSearchHit, 0, len(primary)+len(extra))
 	for _, list := range [][]qdrantSearchHit{primary, extra} {
 		for _, h := range list {
 			id := fmt.Sprint(h.ID)
 			if seen[id] {
 				continue
 			}
 			seen[id] = true
 			out = append(out, h)
 		}
 	}
 	return out
 }
 // FormatLegalContextForPrompt formats the legal context for inclusion in an LLM prompt.
 func (c *LegalRAGClient) FormatLegalContextForPrompt(lc *LegalContext) string {
 	if lc == nil || len(lc.Results) == 0 {
@@ -185,6 +185,27 @@ func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, emb
 		searchReq.Filter = &qdrantFilter{Should: conditions}
 	}
 	return c.doPointsSearch(ctx, collection, searchReq)
 }
 // searchBinding fetches the top binding_law hits (authority-stratified pool) so the
 // obligation source is always a candidate even when guidance dominates semantically.
 // It AUGMENTS the semantic pool — guidance is preserved as interpretation context.
 func (c *LegalRAGClient) searchBinding(ctx context.Context, collection string, embedding []float64, topK int) ([]qdrantSearchHit, error) {
 	searchReq := qdrantSearchRequest{
 		Vector:      embedding,
 		Limit:       topK,
 		WithPayload: true,
 		Filter: &qdrantFilter{Must: []qdrantCondition{
 			{Key: "source_class", Match: qdrantMatch{Value: "binding_law"}},
 		}},
 	}
 	return c.doPointsSearch(ctx, collection, searchReq)
 }
 // doPointsSearch issues a POST /points/search and decodes the hits.
 func (c *LegalRAGClient) doPointsSearch(ctx context.Context, collection string, searchReq qdrantSearchRequest) ([]qdrantSearchHit, error) {
 	jsonBody, err := json.Marshal(searchReq)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal search request: %w", err)
@@ -225,6 +225,18 @@ func getIntSlice(m map[string]interface{}, key string) []int {
 	return result
 }
 func getInt(m map[string]interface{}, key string) int {
 	if v, ok := m[key]; ok {
 		switch n := v.(type) {
 		case float64:
 			return int(n)
 		case int:
 			return n
 		}
 	}
 	return 0
 }
 func contains(slice []string, item string) bool {
 	for _, s := range slice {
 		if s == item {
@@ -399,8 +399,9 @@ func TestHybridSearch_UsesQueryAPI(t *testing.T) {
 			return
 		}
-		// Fallback: should not reach dense search
+		// /points/search is now the stratified binding-law augmentation query (it AUGMENTS
-		t.Error("Unexpected dense search call when hybrid succeeded")
+		// the hybrid pool, it is not a dense fallback). Return empty so the hybrid hit
 		// remains the sole result for this test.
 		json.NewEncoder(w).Encode(qdrantSearchResponse{Result: []qdrantSearchHit{}})
 	}))
 	defer qdrantMock.Close()
@@ -446,6 +447,59 @@ func TestHybridSearch_UsesQueryAPI(t *testing.T) {
 	}
 }
 // TestSearch_StratifiedBindingRerank verifies that the binding-law pool augments the
 // semantic pool and that authority re-ranking lifts binding law above higher-semantic guidance.
 func TestSearch_StratifiedBindingRerank(t *testing.T) {
 	ollamaMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		json.NewEncoder(w).Encode(ollamaEmbeddingResponse{Embedding: make([]float64, 1024)})
 	}))
 	defer ollamaMock.Close()
 	qdrantMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if strings.Contains(r.URL.Path, "/index") {
 			w.WriteHeader(http.StatusOK)
 			w.Write([]byte(`{"result":{"status":"completed"}}`))
 			return
 		}
 		if strings.Contains(r.URL.Path, "/points/query") {
 			json.NewEncoder(w).Encode(qdrantQueryResponse{Result: []qdrantSearchHit{
 				{ID: "g1", Score: 0.72, Payload: map[string]interface{}{
 					"chunk_text": "ENISA guidance", "regulation_short": "ENISA",
 					"article_label": "ENISA CRA Mapping", "source_class": "supervisory_guidance",
 					"authority_weight": float64(70), "jurisdiction": "EU",
 				}},
 			}})
 			return
 		}
 		// /points/search = stratified binding-law pool (source_class=binding_law)
 		json.NewEncoder(w).Encode(qdrantSearchResponse{Result: []qdrantSearchHit{
 			{ID: "b1", Score: 0.66, Payload: map[string]interface{}{
 				"chunk_text": "CRA Anhang I requirement", "regulation_short": "CRA",
 				"article_label": "CRA Anhang I", "source_class": "binding_law",
 				"authority_weight": float64(100), "jurisdiction": "EU",
 			}},
 		}})
 	}))
 	defer qdrantMock.Close()
 	client := &LegalRAGClient{
 		qdrantURL: qdrantMock.URL, ollamaURL: ollamaMock.URL, embeddingModel: "bge-m3",
 		collection: "bp_compliance_ce", textIndexEnsured: make(map[string]bool),
 		hybridEnabled: true, httpClient: http.DefaultClient,
 	}
 	results, err := client.Search(context.Background(), "Was gilt hier?", nil, 5)
 	if err != nil {
 		t.Fatalf("search failed: %v", err)
 	}
 	if len(results) != 2 {
 		t.Fatalf("expected 2 merged results (guidance + binding), got %d", len(results))
 	}
 	if results[0].RegulationShort != "CRA" {
 		t.Errorf("binding CRA must rank first over higher-semantic guidance, got %q", results[0].RegulationShort)
 	}
 }
 func TestHybridSearch_FallbackToDense(t *testing.T) {
 	var requestedPaths []string
@@ -20,6 +20,13 @@ type LegalSearchResult struct {
 	Pages           []int   `json:"pages,omitempty"`
 	SourceURL       string  `json:"source_url"`
 	Score           float64 `json:"score"`
 	// Interne Felder fuer das Authority-Re-Ranking (Phase 1) — NICHT serialisiert
 	// (json:"-"), daher kein Contract-Change. Aus dem Qdrant-Payload befuellt und nur
 	// fuer die Sortierung in rerankByAuthority verwendet.
 	AuthorityWeight int    `json:"-"`
 	SourceClass     string `json:"-"`
 	Jurisdiction    string `json:"-"`
 }
 // LegalContext represents aggregated legal context for an assessment.
Author	SHA1	Message	Date
Benjamin_Boenisch	b83c3e6e00	ci(go-lint): golangci-lint v1.64.8 (go1.24) + new-from-merge-base (#32 ) CI / detect-changes (push) Successful in 16s Details CI / branch-name (push) Has been skipped Details CI / guardrail-integrity (push) Has been skipped Details CI / secret-scan (push) Has been skipped Details CI / sbom-scan (push) Has been skipped Details CI / dep-audit (push) Has been skipped Details CI / build-sha-integrity (push) Successful in 11s Details CI / validate-canonical-controls (push) Successful in 5s Details CI / loc-budget (push) Successful in 19s Details CI / go-lint (push) Has been skipped Details CI / python-lint (push) Has been skipped Details CI / nodejs-lint (push) Has been skipped Details CI / nodejs-build (push) Has been skipped Details CI / test-go (push) Successful in 57s Details CI / iace-gt-coverage (push) Successful in 16s Details CI / test-python-backend (push) Has been skipped Details CI / test-python-document-crawler (push) Has been skipped Details CI / test-python-dsms-gateway (push) Has been skipped Details	2026-06-23 10:58:48 +00:00
Benjamin_Boenisch	a1f425d43a	feat(ai-sdk): authority-aware re-ranking for legal RAG (Phase 1) (#31 ) CI / detect-changes (push) Successful in 8s Details CI / branch-name (push) Has been skipped Details CI / guardrail-integrity (push) Has been skipped Details CI / secret-scan (push) Has been skipped Details CI / dep-audit (push) Has been skipped Details CI / sbom-scan (push) Has been skipped Details CI / build-sha-integrity (push) Successful in 5s Details CI / validate-canonical-controls (push) Successful in 4s Details CI / loc-budget (push) Successful in 28s Details CI / go-lint (push) Has been skipped Details CI / python-lint (push) Has been skipped Details CI / nodejs-lint (push) Has been skipped Details CI / nodejs-build (push) Has been skipped Details CI / test-go (push) Successful in 58s Details CI / iace-gt-coverage (push) Successful in 16s Details CI / test-python-backend (push) Has been skipped Details CI / test-python-document-crawler (push) Has been skipped Details CI / test-python-dsms-gateway (push) Has been skipped Details	2026-06-23 09:30:52 +00:00
sharang	23c6ac6f32	Merge pull request 'feat: wire breakpilot-compliance to Infisical for local dev' (#30 ) from feat/infisical-secrets into main CI / detect-changes (push) Successful in 7s Details CI / branch-name (push) Has been skipped Details CI / guardrail-integrity (push) Has been skipped Details CI / secret-scan (push) Has been skipped Details CI / dep-audit (push) Has been skipped Details CI / sbom-scan (push) Has been skipped Details CI / build-sha-integrity (push) Successful in 7s Details CI / validate-canonical-controls (push) Successful in 6s Details CI / go-lint (push) Has been skipped Details CI / python-lint (push) Has been skipped Details CI / nodejs-lint (push) Has been skipped Details CI / nodejs-build (push) Has been skipped Details CI / test-go (push) Has been skipped Details CI / iace-gt-coverage (push) Has been skipped Details CI / loc-budget (push) Successful in 19s Details CI / test-python-backend (push) Has been skipped Details CI / test-python-document-crawler (push) Has been skipped Details CI / test-python-dsms-gateway (push) Has been skipped Details	2026-06-22 19:12:54 +00:00