breakpilot-compliance/ai-compliance-sdk/internal/ucca/legal_rag_intent_test.go

package ucca

import "testing"

func intentRes(reg, sourceClass string, sem float64, weight int) LegalSearchResult {
	return LegalSearchResult{
		RegulationShort: reg, SourceClass: sourceClass, Score: sem,
		AuthorityWeight: weight, Jurisdiction: "EU",
	}
}

func TestQueryWantsGuidance(t *testing.T) {
	wants := []string{
		"Was empfiehlt der EDPB zum DSB?",
		"Was sagt die ENISA zu Security Updates?",
		"laut DSK ...",
		"Orientierungshilfe zur DSFA",
		"Welche BSI-Empfehlung gilt?",
		"Auslegung der Aufsichtsbehörde",
	}
	plain := []string{
		"Ab wann braucht man einen Datenschutzbeauftragten?",
		"Welche Anforderungen bestehen an Security Updates?",
	}
	for _, q := range wants {
		if !queryWantsGuidance(q) {
			t.Errorf("should detect interpretation intent: %q", q)
		}
	}
	for _, q := range plain {
		if queryWantsGuidance(q) {
			t.Errorf("should NOT detect intent (norm question): %q", q)
		}
	}
}

func TestRerank_NormQuestion_BindingStaysTop(t *testing.T) {
	// No intent signal → binding wins even though guidance is semantically higher.
	results := []LegalSearchResult{
		intentRes("EDPB DPO", "supervisory_guidance", 0.64, 70),
		intentRes("DSGVO", "binding_law", 0.58, 100),
	}
	out := rerankByAuthority("Ab wann braucht man einen Datenschutzbeauftragten?", results)
	if out[0].SourceClass != "binding_law" {
		t.Errorf("norm question: binding must stay Top-1, got %s", out[0].SourceClass)
	}
}

func TestRerank_InterpretationQuestion_GuidanceMayWin(t *testing.T) {
	// Explicit intent + guidance semantically competitive → guidance wins.
	results := []LegalSearchResult{
		intentRes("EDPB DPO", "supervisory_guidance", 0.64, 70),
		intentRes("DSGVO", "binding_law", 0.58, 100),
	}
	out := rerankByAuthority("Was empfiehlt der EDPB zum Datenschutzbeauftragten?", results)
	if out[0].SourceClass != "supervisory_guidance" {
		t.Errorf("interpretation question: guidance should win Top-1, got %s", out[0].SourceClass)
	}
}

func TestRerank_OffTopicGuidance_BlockedByGuard(t *testing.T) {
	// Intent present, but guidance semantic is far below the best binding hit →
	// the margin guard keeps binding on top (no off-topic guideline override).
	results := []LegalSearchResult{
		intentRes("EDPB DPO", "supervisory_guidance", 0.40, 70),
		intentRes("DSGVO", "binding_law", 0.58, 100),
	}
	out := rerankByAuthority("Was empfiehlt der EDPB zum Datenschutzbeauftragten?", results)
	if out[0].SourceClass != "binding_law" {
		t.Errorf("off-topic guidance must not win even with intent, got %s", out[0].SourceClass)
	}
}

func TestQueryWantsControls(t *testing.T) {
	wants := []string{
		"Welche Controls passen zu Security Updates?",
		"Welche Maßnahmen sollten wir umsetzen?",
		"Wie härten wir den Server ab?",
		"Gibt es NIST-Controls dafür?",
		"OWASP Best Practice für Logging?",
		"BSI Grundschutz Bausteine",
	}
	plain := []string{
		"Welche Anforderungen bestehen an Security Updates?",
		"Ab wann braucht man einen Datenschutzbeauftragten?",
	}
	for _, q := range wants {
		if !queryWantsControls(q) {
			t.Errorf("should detect control/implementation intent: %q", q)
		}
	}
	for _, q := range plain {
		if queryWantsControls(q) {
			t.Errorf("should NOT detect control intent (norm question): %q", q)
		}
	}
}

func TestRerank_ControlQuestion_OperationalReqTop(t *testing.T) {
	// User priority for implementation questions: operational_requirement (binding concrete,
	// CRA Anhang I) > control_standard (NIST). Both are in the control-pool; op_req wins.
	results := []LegalSearchResult{
		{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8", SourceClass: "technical_standard", AuthorityWeight: 80, Jurisdiction: "EU", Score: 0.60},
		{RegulationShort: "CRA", ArticleLabel: "CRA Anhang I", Category: "regulation", Score: 0.58},
	}
	out := rerankByAuthority("Welche Controls und Massnahmen passen zu Security Updates?", results)
	if out[0].RegulationShort != "CRA" {
		t.Errorf("operational_requirement (CRA Anhang I) should be Top-1 over control_standard, got %q", out[0].RegulationShort)
	}
}

func TestRerank_NormQuestion_BindingOverStandard(t *testing.T) {
	// "Anforderungen" → no control intent → binding obligation stays Top-1 over the standard.
	results := []LegalSearchResult{
		intentRes("NIST SP 800-82", "technical_standard", 0.62, 80),
		intentRes("CRA", "binding_law", 0.58, 100),
	}
	out := rerankByAuthority("Welche Anforderungen bestehen an Security Updates?", results)
	if out[0].SourceClass != "binding_law" {
		t.Errorf("norm question: binding must stay Top-1 over standard, got %s", out[0].SourceClass)
	}
}

func TestRerank_ControlQuestion_PoolBeatsBareObligation(t *testing.T) {
	// A control-pool source (NIST control_standard) outranks an abstract obligation with no
	// domain/topic advantage, because the implementation intent boosts the control-pool.
	results := []LegalSearchResult{
		{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8", SourceClass: "technical_standard", AuthorityWeight: 80, Jurisdiction: "EU", Score: 0.55},
		{RegulationShort: "XYZ", ArticleLabel: "Art. 5 XYZ", Category: "regulation", Score: 0.58},
	}
	out := rerankByAuthority("Welche Controls und Massnahmen passen zu Security Updates?", results)
	if out[0].RegulationShort != "NIST SP 800-82r3" {
		t.Errorf("control_standard should beat a bare abstract obligation on a control question, got %q", out[0].RegulationShort)
	}
}