All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
334 lines
7.5 KiB
Go
334 lines
7.5 KiB
Go
package quality
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestNewScorer(t *testing.T) {
|
|
scorer := NewScorer()
|
|
if scorer == nil {
|
|
t.Fatal("Expected non-nil scorer")
|
|
}
|
|
}
|
|
|
|
func TestNewScorerWithWeights(t *testing.T) {
|
|
weights := Weights{
|
|
ContentLength: 0.5,
|
|
HeadingStructure: 0.5,
|
|
}
|
|
scorer := NewScorerWithWeights(weights)
|
|
|
|
if scorer.weights.ContentLength != 0.5 {
|
|
t.Errorf("Expected weight 0.5, got %f", scorer.weights.ContentLength)
|
|
}
|
|
}
|
|
|
|
func TestCalculate_HighQualityDocument(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
features := ContentFeatures{
|
|
ContentLength: 5000,
|
|
HeadingCount: 5,
|
|
HeadingDepth: 3,
|
|
LinkDensity: 0.1,
|
|
AdDensity: 0,
|
|
TextToHTMLRatio: 0.4,
|
|
HasTitle: true,
|
|
HasDescription: true,
|
|
HasCanonical: true,
|
|
Language: "de",
|
|
DateIndicators: []string{"2024-01-15"},
|
|
}
|
|
|
|
score := scorer.Calculate(features)
|
|
|
|
if score.Total < 0.8 {
|
|
t.Errorf("Expected high quality score (>0.8), got %f", score.Total)
|
|
}
|
|
}
|
|
|
|
func TestCalculate_LowQualityDocument(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
features := ContentFeatures{
|
|
ContentLength: 100,
|
|
HeadingCount: 0,
|
|
LinkDensity: 0.5,
|
|
AdDensity: 0.2,
|
|
TextToHTMLRatio: 0.05,
|
|
HasTitle: false,
|
|
HasDescription: false,
|
|
Language: "",
|
|
}
|
|
|
|
score := scorer.Calculate(features)
|
|
|
|
if score.Total > 0.5 {
|
|
t.Errorf("Expected low quality score (<0.5), got %f", score.Total)
|
|
}
|
|
}
|
|
|
|
func TestCalculateContentLengthScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
tests := []struct {
|
|
length int
|
|
minScore float64
|
|
maxScore float64
|
|
}{
|
|
{100, 0.0, 0.2}, // very short
|
|
{500, 0.5, 0.7}, // short-medium
|
|
{2000, 0.7, 0.9}, // good
|
|
{5000, 0.9, 1.0}, // optimal
|
|
{30000, 0.6, 0.8}, // very long
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run("", func(t *testing.T) {
|
|
score := scorer.calculateContentLengthScore(tt.length)
|
|
if score < tt.minScore || score > tt.maxScore {
|
|
t.Errorf("Length %d: expected score in [%f, %f], got %f",
|
|
tt.length, tt.minScore, tt.maxScore, score)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestCalculateHeadingScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
// No headings
|
|
score := scorer.calculateHeadingScore(0, 0, false)
|
|
if score > 0.1 {
|
|
t.Errorf("Expected low score for no headings, got %f", score)
|
|
}
|
|
|
|
// Good heading structure
|
|
score = scorer.calculateHeadingScore(5, 3, true)
|
|
if score < 0.9 {
|
|
t.Errorf("Expected high score for good headings, got %f", score)
|
|
}
|
|
}
|
|
|
|
func TestCalculateLinkQualityScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
// Good: low link and ad density
|
|
score := scorer.calculateLinkQualityScore(0.1, 0)
|
|
if score < 0.9 {
|
|
t.Errorf("Expected high score for good link quality, got %f", score)
|
|
}
|
|
|
|
// Bad: high ad density
|
|
score = scorer.calculateLinkQualityScore(0.1, 0.2)
|
|
if score > 0.6 {
|
|
t.Errorf("Expected low score for high ad density, got %f", score)
|
|
}
|
|
}
|
|
|
|
func TestCalculateTextRatioScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
tests := []struct {
|
|
ratio float64
|
|
minScore float64
|
|
}{
|
|
{0.05, 0.0}, // too low
|
|
{0.3, 0.9}, // optimal
|
|
{0.9, 0.5}, // too high (plain text dump)
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
score := scorer.calculateTextRatioScore(tt.ratio)
|
|
if score < tt.minScore {
|
|
t.Errorf("Ratio %f: expected score >= %f, got %f", tt.ratio, tt.minScore, score)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCalculateMetadataScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
// All metadata present
|
|
score := scorer.calculateMetadataScore(true, true, true)
|
|
if score != 1.0 {
|
|
t.Errorf("Expected 1.0 for all metadata, got %f", score)
|
|
}
|
|
|
|
// No metadata
|
|
score = scorer.calculateMetadataScore(false, false, false)
|
|
if score != 0.0 {
|
|
t.Errorf("Expected 0.0 for no metadata, got %f", score)
|
|
}
|
|
|
|
// Only title
|
|
score = scorer.calculateMetadataScore(true, false, false)
|
|
if score != 0.5 {
|
|
t.Errorf("Expected 0.5 for only title, got %f", score)
|
|
}
|
|
}
|
|
|
|
func TestCalculateLanguageScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
tests := []struct {
|
|
language string
|
|
expected float64
|
|
}{
|
|
{"de", 1.0},
|
|
{"german", 1.0},
|
|
{"en", 0.8},
|
|
{"", 0.5},
|
|
{"fr", 0.3},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
score := scorer.calculateLanguageScore(tt.language)
|
|
if score != tt.expected {
|
|
t.Errorf("Language '%s': expected %f, got %f", tt.language, tt.expected, score)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCalculateFreshnessScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
// Recent date
|
|
score := scorer.calculateFreshnessScore([]string{"2024-06-15"})
|
|
if score < 0.9 {
|
|
t.Errorf("Expected high score for recent date, got %f", score)
|
|
}
|
|
|
|
// Older date
|
|
score = scorer.calculateFreshnessScore([]string{"2016-01-01"})
|
|
if score > 0.8 {
|
|
t.Errorf("Expected moderate score for 2016, got %f", score)
|
|
}
|
|
|
|
// No date indicators
|
|
score = scorer.calculateFreshnessScore(nil)
|
|
if score != 0.5 {
|
|
t.Errorf("Expected neutral score for no dates, got %f", score)
|
|
}
|
|
}
|
|
|
|
func TestCalculatePDFScore(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
// Multi-page PDF with good content
|
|
score := scorer.calculatePDFScore(10, 5000)
|
|
if score < 0.8 {
|
|
t.Errorf("Expected high score for good PDF, got %f", score)
|
|
}
|
|
|
|
// Single page, little content
|
|
score = scorer.calculatePDFScore(1, 50)
|
|
if score > 0.6 {
|
|
t.Errorf("Expected lower score for poor PDF, got %f", score)
|
|
}
|
|
}
|
|
|
|
func TestExtractDateIndicators(t *testing.T) {
|
|
text := "Lehrplan gültig ab 01.08.2023 - Stand: 2024-01-15. Aktualisiert 2024."
|
|
|
|
indicators := ExtractDateIndicators(text)
|
|
|
|
if len(indicators) == 0 {
|
|
t.Error("Expected to find date indicators")
|
|
}
|
|
|
|
// Should find at least the year patterns
|
|
found2024 := false
|
|
for _, ind := range indicators {
|
|
if ind == "2024" || ind == "2023" || ind == "2024-01-15" || ind == "01.08.2023" {
|
|
found2024 = true
|
|
}
|
|
}
|
|
|
|
if !found2024 {
|
|
t.Errorf("Expected to find 2024 or 2023, got: %v", indicators)
|
|
}
|
|
}
|
|
|
|
func TestExtractDateIndicators_Empty(t *testing.T) {
|
|
text := "This text has no dates whatsoever."
|
|
|
|
indicators := ExtractDateIndicators(text)
|
|
|
|
if len(indicators) != 0 {
|
|
t.Errorf("Expected no indicators, got: %v", indicators)
|
|
}
|
|
}
|
|
|
|
func TestCalculate_PDFDocument(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
features := ContentFeatures{
|
|
ContentLength: 3000,
|
|
HeadingCount: 3,
|
|
HeadingDepth: 2,
|
|
Language: "de",
|
|
IsPDF: true,
|
|
PageCount: 8,
|
|
DateIndicators: []string{"2023"},
|
|
}
|
|
|
|
score := scorer.Calculate(features)
|
|
|
|
// PDF with 8 pages and good content should score well
|
|
if score.PDFSpecific < 0.8 {
|
|
t.Errorf("Expected good PDF-specific score, got %f", score.PDFSpecific)
|
|
}
|
|
|
|
if score.Total < 0.5 {
|
|
t.Errorf("Expected reasonable score for PDF, got %f", score.Total)
|
|
}
|
|
}
|
|
|
|
func TestCalculate_ScoreClamping(t *testing.T) {
|
|
scorer := NewScorer()
|
|
|
|
// Even with all perfect scores, total should not exceed 1.0
|
|
features := ContentFeatures{
|
|
ContentLength: 5000,
|
|
HeadingCount: 10,
|
|
HeadingDepth: 4,
|
|
HasTOC: true,
|
|
LinkDensity: 0,
|
|
AdDensity: 0,
|
|
TextToHTMLRatio: 0.4,
|
|
HasTitle: true,
|
|
HasDescription: true,
|
|
HasCanonical: true,
|
|
Language: "de",
|
|
DateIndicators: []string{"2024"},
|
|
}
|
|
|
|
score := scorer.Calculate(features)
|
|
|
|
if score.Total > 1.0 {
|
|
t.Errorf("Score should be clamped to 1.0, got %f", score.Total)
|
|
}
|
|
if score.Total < 0 {
|
|
t.Errorf("Score should not be negative, got %f", score.Total)
|
|
}
|
|
}
|
|
|
|
func TestDefaultWeights(t *testing.T) {
|
|
weights := DefaultWeights()
|
|
|
|
// Sum should be approximately 1.0
|
|
sum := weights.ContentLength +
|
|
weights.HeadingStructure +
|
|
weights.LinkQuality +
|
|
weights.TextToHTMLRatio +
|
|
weights.MetadataPresence +
|
|
weights.LanguageClarity +
|
|
weights.ContentFreshness +
|
|
weights.PDFSpecific
|
|
|
|
if sum < 0.99 || sum > 1.01 {
|
|
t.Errorf("Default weights should sum to 1.0, got %f", sum)
|
|
}
|
|
}
|