Files
breakpilot-lehrer/edu-search-service/internal/quality/quality_test.go
Benjamin Boenisch 414e0f5ec0
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
feat: edu-search-service migriert, voice-service/geo-service entfernt
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00

334 lines
7.5 KiB
Go

package quality
import (
"testing"
)
func TestNewScorer(t *testing.T) {
scorer := NewScorer()
if scorer == nil {
t.Fatal("Expected non-nil scorer")
}
}
func TestNewScorerWithWeights(t *testing.T) {
weights := Weights{
ContentLength: 0.5,
HeadingStructure: 0.5,
}
scorer := NewScorerWithWeights(weights)
if scorer.weights.ContentLength != 0.5 {
t.Errorf("Expected weight 0.5, got %f", scorer.weights.ContentLength)
}
}
func TestCalculate_HighQualityDocument(t *testing.T) {
scorer := NewScorer()
features := ContentFeatures{
ContentLength: 5000,
HeadingCount: 5,
HeadingDepth: 3,
LinkDensity: 0.1,
AdDensity: 0,
TextToHTMLRatio: 0.4,
HasTitle: true,
HasDescription: true,
HasCanonical: true,
Language: "de",
DateIndicators: []string{"2024-01-15"},
}
score := scorer.Calculate(features)
if score.Total < 0.8 {
t.Errorf("Expected high quality score (>0.8), got %f", score.Total)
}
}
func TestCalculate_LowQualityDocument(t *testing.T) {
scorer := NewScorer()
features := ContentFeatures{
ContentLength: 100,
HeadingCount: 0,
LinkDensity: 0.5,
AdDensity: 0.2,
TextToHTMLRatio: 0.05,
HasTitle: false,
HasDescription: false,
Language: "",
}
score := scorer.Calculate(features)
if score.Total > 0.5 {
t.Errorf("Expected low quality score (<0.5), got %f", score.Total)
}
}
func TestCalculateContentLengthScore(t *testing.T) {
scorer := NewScorer()
tests := []struct {
length int
minScore float64
maxScore float64
}{
{100, 0.0, 0.2}, // very short
{500, 0.5, 0.7}, // short-medium
{2000, 0.7, 0.9}, // good
{5000, 0.9, 1.0}, // optimal
{30000, 0.6, 0.8}, // very long
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
score := scorer.calculateContentLengthScore(tt.length)
if score < tt.minScore || score > tt.maxScore {
t.Errorf("Length %d: expected score in [%f, %f], got %f",
tt.length, tt.minScore, tt.maxScore, score)
}
})
}
}
func TestCalculateHeadingScore(t *testing.T) {
scorer := NewScorer()
// No headings
score := scorer.calculateHeadingScore(0, 0, false)
if score > 0.1 {
t.Errorf("Expected low score for no headings, got %f", score)
}
// Good heading structure
score = scorer.calculateHeadingScore(5, 3, true)
if score < 0.9 {
t.Errorf("Expected high score for good headings, got %f", score)
}
}
func TestCalculateLinkQualityScore(t *testing.T) {
scorer := NewScorer()
// Good: low link and ad density
score := scorer.calculateLinkQualityScore(0.1, 0)
if score < 0.9 {
t.Errorf("Expected high score for good link quality, got %f", score)
}
// Bad: high ad density
score = scorer.calculateLinkQualityScore(0.1, 0.2)
if score > 0.6 {
t.Errorf("Expected low score for high ad density, got %f", score)
}
}
func TestCalculateTextRatioScore(t *testing.T) {
scorer := NewScorer()
tests := []struct {
ratio float64
minScore float64
}{
{0.05, 0.0}, // too low
{0.3, 0.9}, // optimal
{0.9, 0.5}, // too high (plain text dump)
}
for _, tt := range tests {
score := scorer.calculateTextRatioScore(tt.ratio)
if score < tt.minScore {
t.Errorf("Ratio %f: expected score >= %f, got %f", tt.ratio, tt.minScore, score)
}
}
}
func TestCalculateMetadataScore(t *testing.T) {
scorer := NewScorer()
// All metadata present
score := scorer.calculateMetadataScore(true, true, true)
if score != 1.0 {
t.Errorf("Expected 1.0 for all metadata, got %f", score)
}
// No metadata
score = scorer.calculateMetadataScore(false, false, false)
if score != 0.0 {
t.Errorf("Expected 0.0 for no metadata, got %f", score)
}
// Only title
score = scorer.calculateMetadataScore(true, false, false)
if score != 0.5 {
t.Errorf("Expected 0.5 for only title, got %f", score)
}
}
func TestCalculateLanguageScore(t *testing.T) {
scorer := NewScorer()
tests := []struct {
language string
expected float64
}{
{"de", 1.0},
{"german", 1.0},
{"en", 0.8},
{"", 0.5},
{"fr", 0.3},
}
for _, tt := range tests {
score := scorer.calculateLanguageScore(tt.language)
if score != tt.expected {
t.Errorf("Language '%s': expected %f, got %f", tt.language, tt.expected, score)
}
}
}
func TestCalculateFreshnessScore(t *testing.T) {
scorer := NewScorer()
// Recent date
score := scorer.calculateFreshnessScore([]string{"2024-06-15"})
if score < 0.9 {
t.Errorf("Expected high score for recent date, got %f", score)
}
// Older date
score = scorer.calculateFreshnessScore([]string{"2016-01-01"})
if score > 0.8 {
t.Errorf("Expected moderate score for 2016, got %f", score)
}
// No date indicators
score = scorer.calculateFreshnessScore(nil)
if score != 0.5 {
t.Errorf("Expected neutral score for no dates, got %f", score)
}
}
func TestCalculatePDFScore(t *testing.T) {
scorer := NewScorer()
// Multi-page PDF with good content
score := scorer.calculatePDFScore(10, 5000)
if score < 0.8 {
t.Errorf("Expected high score for good PDF, got %f", score)
}
// Single page, little content
score = scorer.calculatePDFScore(1, 50)
if score > 0.6 {
t.Errorf("Expected lower score for poor PDF, got %f", score)
}
}
func TestExtractDateIndicators(t *testing.T) {
text := "Lehrplan gültig ab 01.08.2023 - Stand: 2024-01-15. Aktualisiert 2024."
indicators := ExtractDateIndicators(text)
if len(indicators) == 0 {
t.Error("Expected to find date indicators")
}
// Should find at least the year patterns
found2024 := false
for _, ind := range indicators {
if ind == "2024" || ind == "2023" || ind == "2024-01-15" || ind == "01.08.2023" {
found2024 = true
}
}
if !found2024 {
t.Errorf("Expected to find 2024 or 2023, got: %v", indicators)
}
}
func TestExtractDateIndicators_Empty(t *testing.T) {
text := "This text has no dates whatsoever."
indicators := ExtractDateIndicators(text)
if len(indicators) != 0 {
t.Errorf("Expected no indicators, got: %v", indicators)
}
}
func TestCalculate_PDFDocument(t *testing.T) {
scorer := NewScorer()
features := ContentFeatures{
ContentLength: 3000,
HeadingCount: 3,
HeadingDepth: 2,
Language: "de",
IsPDF: true,
PageCount: 8,
DateIndicators: []string{"2023"},
}
score := scorer.Calculate(features)
// PDF with 8 pages and good content should score well
if score.PDFSpecific < 0.8 {
t.Errorf("Expected good PDF-specific score, got %f", score.PDFSpecific)
}
if score.Total < 0.5 {
t.Errorf("Expected reasonable score for PDF, got %f", score.Total)
}
}
func TestCalculate_ScoreClamping(t *testing.T) {
scorer := NewScorer()
// Even with all perfect scores, total should not exceed 1.0
features := ContentFeatures{
ContentLength: 5000,
HeadingCount: 10,
HeadingDepth: 4,
HasTOC: true,
LinkDensity: 0,
AdDensity: 0,
TextToHTMLRatio: 0.4,
HasTitle: true,
HasDescription: true,
HasCanonical: true,
Language: "de",
DateIndicators: []string{"2024"},
}
score := scorer.Calculate(features)
if score.Total > 1.0 {
t.Errorf("Score should be clamped to 1.0, got %f", score.Total)
}
if score.Total < 0 {
t.Errorf("Score should not be negative, got %f", score.Total)
}
}
func TestDefaultWeights(t *testing.T) {
weights := DefaultWeights()
// Sum should be approximately 1.0
sum := weights.ContentLength +
weights.HeadingStructure +
weights.LinkQuality +
weights.TextToHTMLRatio +
weights.MetadataPresence +
weights.LanguageClarity +
weights.ContentFreshness +
weights.PDFSpecific
if sum < 0.99 || sum > 1.01 {
t.Errorf("Default weights should sum to 1.0, got %f", sum)
}
}