breakpilot-compliance/ai-compliance-sdk/internal/ucca/legal_rag_assess.go

package ucca

import (
	"fmt"
	"strings"
)

const (
	assessConnectedCap    = 12   // cap connected norms surfaced in the assessment
	assessCrossRegimeTopN = 5    // window over which "cross regime" is judged
	assessReviewMargin    = 0.05 // a tighter winner gap → recommend human review
)

// Assess builds the auditable explanation layer over a ranked result set:
// primary norm, the norms it connects to (citation graph), cross-regime, a
// human-review flag, the winner margin and a short reasoning string. Pure →
// unit-testable. It EXPLAINS the ranking, it does not change it. Returns nil for
// an empty result set.
func Assess(results []LegalSearchResult) *LegalAssessment {
	if len(results) == 0 {
		return nil
	}
	// Norm-level view: collapse multiple chunks of the same article/annex so the
	// margin and cross-regime are judged between DISTINCT norms, not near-identical
	// chunks of one norm (which would make every winner margin ~0).
	norms := distinctNorms(results)
	p := norms[0]

	primary := primaryLabel(p)
	connected := dedupStrings(p.ReferencesOut, p.ReferencesIn, p.CitationUnit)
	if len(connected) > assessConnectedCap {
		connected = connected[:assessConnectedCap]
	}

	window := norms
	if len(window) > assessCrossRegimeTopN {
		window = window[:assessCrossRegimeTopN]
	}
	regimes := make(map[string]bool)
	for _, r := range window {
		if r.RegulationShort != "" {
			regimes[r.RegulationShort] = true
		}
	}
	crossRegime := len(regimes) > 1

	margin := 0.0
	if len(norms) > 1 {
		margin = norms[0].Score - norms[1].Score
	}

	primaryBinding := p.SourceClass == "binding_law"
	humanReview := margin < assessReviewMargin || crossRegime || !primaryBinding

	return &LegalAssessment{
		PrimaryNorm:       primary,
		PrimaryRegulation: p.RegulationShort,
		ConnectedNorms:    connected,
		CrossRegime:       crossRegime,
		HumanReviewFlag:   humanReview,
		WinnerMargin:      margin,
		ScoreReasoning:    assessReasoning(p, margin, crossRegime, primaryBinding),
	}
}

func primaryLabel(p LegalSearchResult) string {
	if p.CitationUnit != "" {
		return p.CitationUnit
	}
	if p.ArticleLabel != "" {
		return p.ArticleLabel
	}
	return strings.TrimSpace(p.RegulationShort + " " + p.Article)
}

// assessReasoning renders a short, human-readable justification (German).
func assessReasoning(p LegalSearchResult, margin float64, crossRegime, primaryBinding bool) string {
	label := primaryLabel(p)
	parts := make([]string, 0, 4)
	if primaryBinding {
		parts = append(parts, fmt.Sprintf("Primärtreffer %s: bindendes Recht (Autorität %d).", label, p.AuthorityWeight))
	} else {
		parts = append(parts, fmt.Sprintf("Primärtreffer %s ist keine bindende Norm (Leitlinie/Standard) — Quelle prüfen.", label))
	}
	if margin > 0 {
		parts = append(parts, fmt.Sprintf("Vorsprung %.2f vor #2.", margin))
	}
	if margin < assessReviewMargin {
		parts = append(parts, "Knapper Vorsprung — Alternativtreffer prüfen.")
	}
	if crossRegime {
		parts = append(parts, "Mehrere Regime betroffen — Querbezug prüfen.")
	}
	return strings.Join(parts, " ")
}

// distinctNorms collapses results that share a citation (multiple chunks of the
// same article/annex) to the first — i.e. highest-ranked — occurrence. Results
// without any citation identity are each kept, since they cannot be matched.
func distinctNorms(results []LegalSearchResult) []LegalSearchResult {
	seen := make(map[string]bool, len(results))
	out := make([]LegalSearchResult, 0, len(results))
	for _, r := range results {
		key := r.CitationUnit
		if key == "" {
			key = r.ArticleLabel
		}
		if key != "" {
			if seen[key] {
				continue
			}
			seen[key] = true
		}
		out = append(out, r)
	}
	return out
}

// dedupStrings concatenates out+in, drops empties and the excluded value, and
// returns a stable de-duplicated slice (insertion order preserved).
func dedupStrings(out, in []string, exclude string) []string {
	seen := map[string]bool{exclude: true}
	res := make([]string, 0, len(out)+len(in))
	for _, list := range [][]string{out, in} {
		for _, s := range list {
			if s == "" || seen[s] {
				continue
			}
			seen[s] = true
			res = append(res, s)
		}
	}
	return res
}