A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
344 lines
10 KiB
TypeScript
344 lines
10 KiB
TypeScript
/**
|
|
* Quality Dashboard Tests
|
|
*
|
|
* Tests for the BQAS Quality Dashboard components and API integration
|
|
*/
|
|
|
|
describe('Quality Dashboard', () => {
|
|
describe('Page Structure', () => {
|
|
it('should have the quality page file', () => {
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
|
|
const pagePath = path.join(__dirname, '../app/admin/quality/page.tsx')
|
|
expect(fs.existsSync(pagePath)).toBe(true)
|
|
})
|
|
|
|
it('should have navigation entry in AdminLayout', () => {
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
|
|
const layoutPath = path.join(__dirname, '../components/admin/AdminLayout.tsx')
|
|
const content = fs.readFileSync(layoutPath, 'utf-8')
|
|
|
|
expect(content).toContain("name: 'Qualitaet'")
|
|
expect(content).toContain("href: '/admin/quality'")
|
|
})
|
|
})
|
|
|
|
describe('API Types', () => {
|
|
interface TestResult {
|
|
test_id: string
|
|
test_name: string
|
|
passed: boolean
|
|
composite_score: number
|
|
intent_accuracy: number
|
|
faithfulness: number
|
|
relevance: number
|
|
coherence: number
|
|
safety: string
|
|
reasoning: string
|
|
}
|
|
|
|
interface BQASMetrics {
|
|
total_tests: number
|
|
passed_tests: number
|
|
failed_tests: number
|
|
avg_intent_accuracy: number
|
|
avg_faithfulness: number
|
|
avg_relevance: number
|
|
avg_coherence: number
|
|
safety_pass_rate: number
|
|
avg_composite_score: number
|
|
scores_by_intent: Record<string, number>
|
|
failed_test_ids: string[]
|
|
}
|
|
|
|
interface TestRun {
|
|
id: number
|
|
timestamp: string
|
|
git_commit: string
|
|
golden_score: number
|
|
synthetic_score: number
|
|
total_tests: number
|
|
passed_tests: number
|
|
failed_tests: number
|
|
duration_seconds: number
|
|
}
|
|
|
|
it('should have valid TestResult structure', () => {
|
|
const testResult: TestResult = {
|
|
test_id: 'INT-001',
|
|
test_name: 'Test Name',
|
|
passed: true,
|
|
composite_score: 4.5,
|
|
intent_accuracy: 95,
|
|
faithfulness: 5,
|
|
relevance: 4,
|
|
coherence: 4,
|
|
safety: 'pass',
|
|
reasoning: 'Good result',
|
|
}
|
|
|
|
expect(testResult.test_id).toBe('INT-001')
|
|
expect(testResult.passed).toBe(true)
|
|
expect(testResult.composite_score).toBeGreaterThanOrEqual(0)
|
|
expect(testResult.composite_score).toBeLessThanOrEqual(5)
|
|
})
|
|
|
|
it('should have valid BQASMetrics structure', () => {
|
|
const metrics: BQASMetrics = {
|
|
total_tests: 100,
|
|
passed_tests: 90,
|
|
failed_tests: 10,
|
|
avg_intent_accuracy: 85.5,
|
|
avg_faithfulness: 4.2,
|
|
avg_relevance: 4.0,
|
|
avg_coherence: 4.1,
|
|
safety_pass_rate: 0.95,
|
|
avg_composite_score: 4.0,
|
|
scores_by_intent: {
|
|
student_observation: 4.5,
|
|
worksheet_generate: 3.8,
|
|
},
|
|
failed_test_ids: ['INT-001', 'INT-002'],
|
|
}
|
|
|
|
expect(metrics.total_tests).toBe(metrics.passed_tests + metrics.failed_tests)
|
|
expect(metrics.safety_pass_rate).toBeGreaterThanOrEqual(0)
|
|
expect(metrics.safety_pass_rate).toBeLessThanOrEqual(1)
|
|
})
|
|
|
|
it('should have valid TestRun structure', () => {
|
|
const testRun: TestRun = {
|
|
id: 1,
|
|
timestamp: '2026-01-26T10:00:00Z',
|
|
git_commit: 'abc1234',
|
|
golden_score: 4.2,
|
|
synthetic_score: 3.9,
|
|
total_tests: 100,
|
|
passed_tests: 90,
|
|
failed_tests: 10,
|
|
duration_seconds: 120.5,
|
|
}
|
|
|
|
expect(testRun.id).toBeGreaterThan(0)
|
|
expect(testRun.golden_score).toBeGreaterThanOrEqual(0)
|
|
expect(testRun.golden_score).toBeLessThanOrEqual(5)
|
|
})
|
|
})
|
|
|
|
describe('Metric Calculations', () => {
|
|
it('should calculate pass rate correctly', () => {
|
|
const metrics = {
|
|
total_tests: 100,
|
|
passed_tests: 85,
|
|
failed_tests: 15,
|
|
}
|
|
|
|
const passRate = (metrics.passed_tests / metrics.total_tests) * 100
|
|
expect(passRate).toBe(85)
|
|
})
|
|
|
|
it('should handle zero tests gracefully', () => {
|
|
const metrics = {
|
|
total_tests: 0,
|
|
passed_tests: 0,
|
|
failed_tests: 0,
|
|
}
|
|
|
|
const passRate = metrics.total_tests > 0
|
|
? (metrics.passed_tests / metrics.total_tests) * 100
|
|
: 0
|
|
|
|
expect(passRate).toBe(0)
|
|
})
|
|
|
|
it('should classify trend correctly', () => {
|
|
const classifyTrend = (scores: number[]): 'improving' | 'stable' | 'declining' | 'insufficient_data' => {
|
|
if (scores.length < 3) return 'insufficient_data'
|
|
|
|
const recentAvg = scores.slice(-3).reduce((a, b) => a + b, 0) / 3
|
|
const oldAvg = scores.slice(0, 3).reduce((a, b) => a + b, 0) / 3
|
|
|
|
const diff = recentAvg - oldAvg
|
|
|
|
if (diff > 0.1) return 'improving'
|
|
if (diff < -0.1) return 'declining'
|
|
return 'stable'
|
|
}
|
|
|
|
// Improving trend
|
|
expect(classifyTrend([3.0, 3.1, 3.2, 3.5, 3.8, 4.0])).toBe('improving')
|
|
|
|
// Declining trend
|
|
expect(classifyTrend([4.0, 3.8, 3.5, 3.2, 3.1, 3.0])).toBe('declining')
|
|
|
|
// Stable trend
|
|
expect(classifyTrend([4.0, 4.0, 4.0, 4.0, 4.0, 4.0])).toBe('stable')
|
|
|
|
// Insufficient data
|
|
expect(classifyTrend([4.0, 4.0])).toBe('insufficient_data')
|
|
})
|
|
})
|
|
|
|
describe('Score Thresholds', () => {
|
|
it('should identify passing scores correctly', () => {
|
|
const minScore = 3.5
|
|
|
|
const passingScores = [3.5, 4.0, 4.5, 5.0]
|
|
const failingScores = [0.0, 1.0, 2.0, 3.0, 3.4]
|
|
|
|
passingScores.forEach(score => {
|
|
expect(score >= minScore).toBe(true)
|
|
})
|
|
|
|
failingScores.forEach(score => {
|
|
expect(score >= minScore).toBe(false)
|
|
})
|
|
})
|
|
|
|
it('should color-code scores correctly', () => {
|
|
const getScoreColor = (score: number): string => {
|
|
if (score >= 4) return 'emerald'
|
|
if (score >= 3) return 'amber'
|
|
return 'red'
|
|
}
|
|
|
|
expect(getScoreColor(4.5)).toBe('emerald')
|
|
expect(getScoreColor(4.0)).toBe('emerald')
|
|
expect(getScoreColor(3.5)).toBe('amber')
|
|
expect(getScoreColor(3.0)).toBe('amber')
|
|
expect(getScoreColor(2.5)).toBe('red')
|
|
expect(getScoreColor(0.0)).toBe('red')
|
|
})
|
|
})
|
|
|
|
describe('API URL Configuration', () => {
|
|
it('should use correct default voice service URL', () => {
|
|
const defaultUrl = 'http://localhost:8091'
|
|
expect(defaultUrl).toContain('8091')
|
|
})
|
|
|
|
it('should construct correct API endpoints', () => {
|
|
const baseUrl = 'http://localhost:8091'
|
|
|
|
const endpoints = {
|
|
runs: `${baseUrl}/api/v1/bqas/runs`,
|
|
trend: `${baseUrl}/api/v1/bqas/trend`,
|
|
latestMetrics: `${baseUrl}/api/v1/bqas/latest-metrics`,
|
|
runGolden: `${baseUrl}/api/v1/bqas/run/golden`,
|
|
runSynthetic: `${baseUrl}/api/v1/bqas/run/synthetic`,
|
|
runRag: `${baseUrl}/api/v1/bqas/run/rag`,
|
|
}
|
|
|
|
expect(endpoints.runs).toBe('http://localhost:8091/api/v1/bqas/runs')
|
|
expect(endpoints.trend).toBe('http://localhost:8091/api/v1/bqas/trend')
|
|
})
|
|
})
|
|
|
|
describe('Component Rendering Logic', () => {
|
|
it('should format timestamps correctly', () => {
|
|
const timestamp = '2026-01-26T10:30:00Z'
|
|
const date = new Date(timestamp)
|
|
|
|
// German locale formatting
|
|
const formatted = date.toLocaleString('de-DE')
|
|
expect(formatted).toBeTruthy()
|
|
})
|
|
|
|
it('should truncate git commits correctly', () => {
|
|
const fullCommit = 'abc1234567890def'
|
|
const shortCommit = fullCommit.slice(0, 7)
|
|
|
|
expect(shortCommit).toBe('abc1234')
|
|
expect(shortCommit.length).toBe(7)
|
|
})
|
|
|
|
it('should generate correct progress bar widths', () => {
|
|
const getProgressWidth = (score: number, maxScore: number = 5): string => {
|
|
return `${(score / maxScore) * 100}%`
|
|
}
|
|
|
|
expect(getProgressWidth(4.0)).toBe('80%')
|
|
expect(getProgressWidth(5.0)).toBe('100%')
|
|
expect(getProgressWidth(2.5)).toBe('50%')
|
|
expect(getProgressWidth(0)).toBe('0%')
|
|
})
|
|
})
|
|
|
|
describe('Tab Navigation', () => {
|
|
const tabs = ['overview', 'golden', 'rag', 'synthetic', 'history'] as const
|
|
type TabId = typeof tabs[number]
|
|
|
|
it('should have all required tabs', () => {
|
|
expect(tabs).toContain('overview')
|
|
expect(tabs).toContain('golden')
|
|
expect(tabs).toContain('rag')
|
|
expect(tabs).toContain('synthetic')
|
|
expect(tabs).toContain('history')
|
|
})
|
|
|
|
it('should allow valid tab transitions', () => {
|
|
const isValidTab = (tab: string): tab is TabId => {
|
|
return tabs.includes(tab as TabId)
|
|
}
|
|
|
|
expect(isValidTab('overview')).toBe(true)
|
|
expect(isValidTab('golden')).toBe(true)
|
|
expect(isValidTab('invalid')).toBe(false)
|
|
})
|
|
})
|
|
|
|
describe('Error Handling', () => {
|
|
it('should handle API errors gracefully', async () => {
|
|
const mockFetch = jest.fn().mockRejectedValue(new Error('Network error'))
|
|
|
|
try {
|
|
await mockFetch('http://localhost:8091/api/v1/bqas/runs')
|
|
fail('Should have thrown an error')
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(Error)
|
|
expect((error as Error).message).toBe('Network error')
|
|
}
|
|
})
|
|
|
|
it('should provide fallback values for missing data', () => {
|
|
const metrics = null
|
|
|
|
const displayScore = metrics?.avg_composite_score?.toFixed(2) || '-'
|
|
const displayTotal = metrics?.total_tests || 0
|
|
|
|
expect(displayScore).toBe('-')
|
|
expect(displayTotal).toBe(0)
|
|
})
|
|
})
|
|
|
|
describe('RAG Test Categories', () => {
|
|
const categories = [
|
|
{ id: 'eh_retrieval', name: 'EH Retrieval Quality' },
|
|
{ id: 'operator_alignment', name: 'Operator Alignment' },
|
|
{ id: 'hallucination_control', name: 'Hallucination Control' },
|
|
{ id: 'citation_enforcement', name: 'Citation Enforcement' },
|
|
{ id: 'privacy_compliance', name: 'Privacy/DSGVO Compliance' },
|
|
{ id: 'namespace_isolation', name: 'Namespace Isolation' },
|
|
]
|
|
|
|
it('should have all required RAG categories', () => {
|
|
expect(categories).toHaveLength(6)
|
|
|
|
const categoryIds = categories.map(c => c.id)
|
|
expect(categoryIds).toContain('eh_retrieval')
|
|
expect(categoryIds).toContain('operator_alignment')
|
|
expect(categoryIds).toContain('privacy_compliance')
|
|
})
|
|
|
|
it('should have human-readable names for all categories', () => {
|
|
categories.forEach(category => {
|
|
expect(category.name).toBeTruthy()
|
|
expect(category.name.length).toBeGreaterThan(0)
|
|
})
|
|
})
|
|
})
|
|
})
|