breakpilot-lehrer/edu-search-service/internal/policy/policy_test.go

package policy

import (
	"regexp"
	"testing"
)

// =============================================================================
// MODEL TESTS
// =============================================================================

func TestBundeslandValidation(t *testing.T) {
	tests := []struct {
		name     string
		bl       Bundesland
		expected bool
	}{
		{"valid NI", BundeslandNI, true},
		{"valid BY", BundeslandBY, true},
		{"valid BW", BundeslandBW, true},
		{"valid NW", BundeslandNW, true},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			found := false
			for _, valid := range ValidBundeslaender {
				if valid == tt.bl {
					found = true
					break
				}
			}
			if found != tt.expected {
				t.Errorf("Expected %v to be valid=%v, got valid=%v", tt.bl, tt.expected, found)
			}
		})
	}
}

func TestLicenseValues(t *testing.T) {
	licenses := []License{
		LicenseDLDEBY20,
		LicenseCCBY,
		LicenseCCBYSA,
		LicenseCC0,
		LicenseParagraph5,
	}

	for _, l := range licenses {
		if l == "" {
			t.Errorf("License should not be empty")
		}
	}
}

func TestOperationValues(t *testing.T) {
	if len(ValidOperations) != 4 {
		t.Errorf("Expected 4 operations, got %d", len(ValidOperations))
	}

	expectedOps := []Operation{OperationLookup, OperationRAG, OperationTraining, OperationExport}
	for _, expected := range expectedOps {
		found := false
		for _, op := range ValidOperations {
			if op == expected {
				found = true
				break
			}
		}
		if !found {
			t.Errorf("Expected operation %s not found in ValidOperations", expected)
		}
	}
}

// =============================================================================
// PII DETECTOR TESTS
// =============================================================================

func TestPIIDetector_EmailDetection(t *testing.T) {
	tests := []struct {
		name     string
		text     string
		hasEmail bool
	}{
		{"simple email", "Contact: test@example.com", true},
		{"email with plus", "Email: user+tag@domain.org", true},
		{"no email", "This is plain text", false},
		{"partial email", "user@ is not an email", false},
		{"multiple emails", "Send to a@b.com and x@y.de", true},
	}

	// Test using regex pattern directly since we don't have a store
	emailPattern := `[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Simple test without database
			rule := &PIIRule{
				Name:     "Email",
				RuleType: PIIRuleTypeRegex,
				Pattern:  emailPattern,
				Severity: PIISeverityBlock,
			}

			detector := &PIIDetector{
				compiledRules: make(map[string]*regexp.Regexp),
			}

			matches := detector.findMatches(tt.text, rule)
			hasMatch := len(matches) > 0

			if hasMatch != tt.hasEmail {
				t.Errorf("Expected hasEmail=%v, got %v for text: %s", tt.hasEmail, hasMatch, tt.text)
			}
		})
	}
}

func TestPIIDetector_PhoneDetection(t *testing.T) {
	tests := []struct {
		name     string
		text     string
		hasPhone bool
	}{
		{"german mobile", "Call +49 170 1234567", true},
		{"german landline", "Tel: 030-12345678", true},
		{"with spaces", "Phone: 0170 123 4567", true},
		{"no phone", "This is just text", false},
		{"US format", "Call 555-123-4567", false}, // Should not match German pattern
	}

	phonePattern := `(?:\+49|0)[\s.-]?\d{2,4}[\s.-]?\d{3,}[\s.-]?\d{2,}`

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			rule := &PIIRule{
				Name:     "Phone",
				RuleType: PIIRuleTypeRegex,
				Pattern:  phonePattern,
				Severity: PIISeverityBlock,
			}

			detector := &PIIDetector{
				compiledRules: make(map[string]*regexp.Regexp),
			}

			matches := detector.findMatches(tt.text, rule)
			hasMatch := len(matches) > 0

			if hasMatch != tt.hasPhone {
				t.Errorf("Expected hasPhone=%v, got %v for text: %s", tt.hasPhone, hasMatch, tt.text)
			}
		})
	}
}

func TestPIIDetector_IBANDetection(t *testing.T) {
	tests := []struct {
		name    string
		text    string
		hasIBAN bool
	}{
		{"valid IBAN", "IBAN: DE89 3704 0044 0532 0130 00", true},
		{"compact IBAN", "DE89370400440532013000", true},
		{"no IBAN", "Just a number: 12345678", false},
		{"partial", "DE12 is not complete", false},
	}

	ibanPattern := `DE\d{2}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{2}`

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			rule := &PIIRule{
				Name:     "IBAN",
				RuleType: PIIRuleTypeRegex,
				Pattern:  ibanPattern,
				Severity: PIISeverityBlock,
			}

			detector := &PIIDetector{
				compiledRules: make(map[string]*regexp.Regexp),
			}

			matches := detector.findMatches(tt.text, rule)
			hasMatch := len(matches) > 0

			if hasMatch != tt.hasIBAN {
				t.Errorf("Expected hasIBAN=%v, got %v for text: %s", tt.hasIBAN, hasMatch, tt.text)
			}
		})
	}
}

func TestPIIDetector_KeywordMatching(t *testing.T) {
	tests := []struct {
		name     string
		text     string
		keywords string
		expected int
	}{
		{"single keyword", "The password is secret", "password", 1},
		{"multiple keywords", "Password and secret", "password,secret", 2},
		{"case insensitive", "PASSWORD and Secret", "password,secret", 2},
		{"no match", "This is safe text", "password,secret", 0},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			rule := &PIIRule{
				Name:     "Keywords",
				RuleType: PIIRuleTypeKeyword,
				Pattern:  tt.keywords,
				Severity: PIISeverityWarn,
			}

			detector := &PIIDetector{
				compiledRules: make(map[string]*regexp.Regexp),
			}

			matches := detector.findKeywordMatches(tt.text, rule)

			if len(matches) != tt.expected {
				t.Errorf("Expected %d matches, got %d for text: %s", tt.expected, len(matches), tt.text)
			}
		})
	}
}

func TestPIIDetector_Redaction(t *testing.T) {
	detector := &PIIDetector{
		compiledRules: make(map[string]*regexp.Regexp),
	}

	tests := []struct {
		name     string
		text     string
		matches  []PIIMatch
		expected string
	}{
		{
			"single redaction",
			"Email: test@example.com",
			[]PIIMatch{{StartIndex: 7, EndIndex: 23, Severity: PIISeverityBlock}},
			"Email: ****************",
		},
		{
			"no matches",
			"Plain text",
			[]PIIMatch{},
			"Plain text",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := detector.RedactText(tt.text, tt.matches)
			if result != tt.expected {
				t.Errorf("Expected '%s', got '%s'", tt.expected, result)
			}
		})
	}
}

func TestCompareSeverity(t *testing.T) {
	tests := []struct {
		a, b     PIISeverity
		expected int
	}{
		{PIISeverityBlock, PIISeverityWarn, 1},
		{PIISeverityWarn, PIISeverityBlock, -1},
		{PIISeverityBlock, PIISeverityBlock, 0},
		{PIISeverityRedact, PIISeverityWarn, 1},
		{PIISeverityRedact, PIISeverityBlock, -1},
	}

	for _, tt := range tests {
		t.Run(string(tt.a)+"_vs_"+string(tt.b), func(t *testing.T) {
			result := compareSeverity(tt.a, tt.b)
			if result != tt.expected {
				t.Errorf("Expected %d, got %d for %s vs %s", tt.expected, result, tt.a, tt.b)
			}
		})
	}
}

// =============================================================================
// ENFORCER TESTS
// =============================================================================

func TestExtractDomain(t *testing.T) {
	tests := []struct {
		name     string
		url      string
		expected string
		hasError bool
	}{
		{"full URL", "https://www.example.com/path", "example.com", false},
		{"with port", "http://example.com:8080/path", "example.com", false},
		{"subdomain", "https://sub.domain.example.com", "sub.domain.example.com", false},
		{"no scheme", "example.com/path", "example.com", false},
		{"www prefix", "https://www.test.de", "test.de", false},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result, err := extractDomain(tt.url)
			if tt.hasError && err == nil {
				t.Error("Expected error, got nil")
			}
			if !tt.hasError && err != nil {
				t.Errorf("Expected no error, got %v", err)
			}
			if result != tt.expected {
				t.Errorf("Expected '%s', got '%s'", tt.expected, result)
			}
		})
	}
}

// =============================================================================
// YAML LOADER TESTS
// =============================================================================

func TestParseYAML(t *testing.T) {
	yamlData := `
federal:
  name: "Test Federal"
  sources:
    - domain: "test.gov"
      name: "Test Source"
      license: "§5 UrhG"
      trust_boost: 0.9

NI:
  name: "Niedersachsen"
  sources:
    - domain: "ni.gov"
      name: "NI Source"
      license: "DL-DE-BY-2.0"

default_operations:
  lookup:
    allowed: true
    requires_citation: true
  training:
    allowed: false
    requires_citation: false

pii_rules:
  - name: "Test Rule"
    type: "regex"
    pattern: "test.*pattern"
    severity: "block"
`

	config, err := ParseYAML([]byte(yamlData))
	if err != nil {
		t.Fatalf("Failed to parse YAML: %v", err)
	}

	// Test federal
	if config.Federal.Name != "Test Federal" {
		t.Errorf("Expected federal name 'Test Federal', got '%s'", config.Federal.Name)
	}
	if len(config.Federal.Sources) != 1 {
		t.Errorf("Expected 1 federal source, got %d", len(config.Federal.Sources))
	}
	if config.Federal.Sources[0].Domain != "test.gov" {
		t.Errorf("Expected domain 'test.gov', got '%s'", config.Federal.Sources[0].Domain)
	}
	if config.Federal.Sources[0].TrustBoost != 0.9 {
		t.Errorf("Expected trust_boost 0.9, got %f", config.Federal.Sources[0].TrustBoost)
	}

	// Test Bundesland
	if len(config.Bundeslaender) != 1 {
		t.Errorf("Expected 1 Bundesland, got %d", len(config.Bundeslaender))
	}
	ni, ok := config.Bundeslaender["NI"]
	if !ok {
		t.Error("Expected NI in Bundeslaender")
	}
	if ni.Name != "Niedersachsen" {
		t.Errorf("Expected name 'Niedersachsen', got '%s'", ni.Name)
	}

	// Test operations
	if !config.DefaultOperations.Lookup.Allowed {
		t.Error("Expected lookup to be allowed")
	}
	if config.DefaultOperations.Training.Allowed {
		t.Error("Expected training to be NOT allowed")
	}

	// Test PII rules
	if len(config.PIIRules) != 1 {
		t.Errorf("Expected 1 PII rule, got %d", len(config.PIIRules))
	}
	if config.PIIRules[0].Name != "Test Rule" {
		t.Errorf("Expected rule name 'Test Rule', got '%s'", config.PIIRules[0].Name)
	}
}

// =============================================================================
// AUDIT TESTS
// =============================================================================

func TestMaskPII(t *testing.T) {
	tests := []struct {
		name     string
		input    string
		expected string
	}{
		{"short", "ab", "****"},
		{"medium", "test@email.com", "te****om"},
		{"long", "very-long-email@example.com", "ve****om"},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := maskPII(tt.input)
			if result != tt.expected {
				t.Errorf("Expected '%s', got '%s'", tt.expected, result)
			}
		})
	}
}

// =============================================================================
// DEFAULT PII RULES TEST
// =============================================================================

func TestDefaultPIIRules(t *testing.T) {
	rules := DefaultPIIRules()

	if len(rules) == 0 {
		t.Error("Expected default PII rules, got none")
	}

	// Check that each rule has required fields
	for _, rule := range rules {
		if rule.Name == "" {
			t.Error("Rule name should not be empty")
		}
		if rule.Type == "" {
			t.Error("Rule type should not be empty")
		}
		if rule.Pattern == "" {
			t.Error("Rule pattern should not be empty")
		}
	}

	// Check for email rule
	hasEmailRule := false
	for _, rule := range rules {
		if rule.Name == "Email Addresses" {
			hasEmailRule = true
			break
		}
	}
	if !hasEmailRule {
		t.Error("Expected email addresses rule in defaults")
	}
}

// =============================================================================
// INTEGRATION TEST HELPERS
// =============================================================================

// TestFilteredURL tests the FilteredURL struct.
func TestFilteredURL(t *testing.T) {
	fu := FilteredURL{
		URL:              "https://example.com",
		IsAllowed:        true,
		RequiresCitation: true,
	}

	if fu.URL != "https://example.com" {
		t.Error("URL not set correctly")
	}
	if !fu.IsAllowed {
		t.Error("IsAllowed should be true")
	}
	if !fu.RequiresCitation {
		t.Error("RequiresCitation should be true")
	}
}