feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)

This commit is contained in:
Benjamin Boenisch
2026-02-15 13:26:06 +01:00
parent a7e4500ea6
commit 1089c73b46
59 changed files with 12921 additions and 20 deletions

View File

@@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<!--
BQAS Local Scheduler - launchd plist
Fuehrt BQAS Tests taeglich um 07:00 Uhr aus.
Installation:
cp com.breakpilot.bqas.plist ~/Library/LaunchAgents/
launchctl load ~/Library/LaunchAgents/com.breakpilot.bqas.plist
Deinstallation:
launchctl unload ~/Library/LaunchAgents/com.breakpilot.bqas.plist
rm ~/Library/LaunchAgents/com.breakpilot.bqas.plist
Manueller Test:
launchctl start com.breakpilot.bqas
Status pruefen:
launchctl list | grep bqas
-->
<key>Label</key>
<string>com.breakpilot.bqas</string>
<key>ProgramArguments</key>
<array>
<string>/Users/benjaminadmin/Projekte/breakpilot-pwa/voice-service/scripts/run_bqas.sh</string>
</array>
<!-- Taeglich um 07:00 Uhr -->
<key>StartCalendarInterval</key>
<dict>
<key>Hour</key>
<integer>7</integer>
<key>Minute</key>
<integer>0</integer>
</dict>
<!-- Log-Ausgaben -->
<key>StandardOutPath</key>
<string>/var/log/bqas/stdout.log</string>
<key>StandardErrorPath</key>
<string>/var/log/bqas/stderr.log</string>
<!-- Nicht beim Login starten -->
<key>RunAtLoad</key>
<false/>
<!-- Umgebungsvariablen -->
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
<key>HOME</key>
<string>/Users/benjaminadmin</string>
<!-- Optional: Service URL ueberschreiben -->
<!-- <key>BQAS_SERVICE_URL</key>
<string>http://localhost:8091</string> -->
</dict>
<!-- Arbeitsverzeichnis -->
<key>WorkingDirectory</key>
<string>/Users/benjaminadmin/Projekte/breakpilot-pwa/voice-service</string>
<!-- Ressourcen-Limits (optional) -->
<key>ProcessType</key>
<string>Background</string>
<!-- Timeout: 30 Minuten -->
<key>TimeOut</key>
<integer>1800</integer>
</dict>
</plist>

View File

@@ -0,0 +1,318 @@
#!/bin/bash
# BQAS Scheduler Installation Script
# Installiert launchd Job fuer taegliche BQAS Tests um 7:00 Uhr
set -e
# Konfiguration
VOICE_SERVICE_DIR="/Users/benjaminadmin/Projekte/breakpilot-pwa/voice-service"
PLIST_NAME="com.breakpilot.bqas"
PLIST_PATH="${HOME}/Library/LaunchAgents/${PLIST_NAME}.plist"
LOG_DIR="/var/log/bqas"
GIT_HOOKS_DIR="/Users/benjaminadmin/Projekte/breakpilot-pwa/.git/hooks"
# Farben
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log() {
local level=$1
local message=$2
case $level in
INFO) echo -e "${BLUE}[INFO]${NC} ${message}" ;;
SUCCESS) echo -e "${GREEN}[SUCCESS]${NC} ${message}" ;;
WARNING) echo -e "${YELLOW}[WARNING]${NC} ${message}" ;;
ERROR) echo -e "${RED}[ERROR]${NC} ${message}" ;;
esac
}
# Argumente
ACTION=${1:-install}
show_usage() {
echo "Usage: $0 [install|uninstall|status|test]"
echo ""
echo "Commands:"
echo " install Installiert launchd Job und Git Hook"
echo " uninstall Entfernt launchd Job und Git Hook"
echo " status Zeigt aktuellen Status"
echo " test Fuehrt BQAS Tests manuell aus"
}
create_log_directory() {
log "INFO" "Erstelle Log-Verzeichnis..."
if [ ! -d "$LOG_DIR" ]; then
sudo mkdir -p "$LOG_DIR"
sudo chown "$USER" "$LOG_DIR"
log "SUCCESS" "Log-Verzeichnis erstellt: $LOG_DIR"
else
log "INFO" "Log-Verzeichnis existiert bereits"
fi
}
create_plist() {
log "INFO" "Erstelle launchd plist..."
cat > "$PLIST_PATH" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>${PLIST_NAME}</string>
<key>ProgramArguments</key>
<array>
<string>${VOICE_SERVICE_DIR}/scripts/run_bqas.sh</string>
</array>
<key>StartCalendarInterval</key>
<dict>
<key>Hour</key>
<integer>7</integer>
<key>Minute</key>
<integer>0</integer>
</dict>
<key>StandardOutPath</key>
<string>${LOG_DIR}/stdout.log</string>
<key>StandardErrorPath</key>
<string>${LOG_DIR}/stderr.log</string>
<key>RunAtLoad</key>
<false/>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
<key>HOME</key>
<string>${HOME}</string>
</dict>
<key>WorkingDirectory</key>
<string>${VOICE_SERVICE_DIR}</string>
</dict>
</plist>
EOF
log "SUCCESS" "plist erstellt: $PLIST_PATH"
}
load_plist() {
log "INFO" "Lade launchd Job..."
# Entlade falls bereits geladen
launchctl unload "$PLIST_PATH" 2>/dev/null || true
# Lade den Job
launchctl load "$PLIST_PATH"
log "SUCCESS" "launchd Job geladen"
}
unload_plist() {
log "INFO" "Entlade launchd Job..."
if [ -f "$PLIST_PATH" ]; then
launchctl unload "$PLIST_PATH" 2>/dev/null || true
rm -f "$PLIST_PATH"
log "SUCCESS" "launchd Job entfernt"
else
log "INFO" "Kein launchd Job gefunden"
fi
}
create_git_hook() {
log "INFO" "Erstelle Git post-commit Hook..."
# Prüfe ob .git/hooks existiert
if [ ! -d "$GIT_HOOKS_DIR" ]; then
log "WARNING" "Git hooks Verzeichnis nicht gefunden: $GIT_HOOKS_DIR"
return 1
fi
local hook_path="${GIT_HOOKS_DIR}/post-commit"
# Backup falls vorhanden
if [ -f "$hook_path" ]; then
cp "$hook_path" "${hook_path}.backup"
log "INFO" "Bestehender Hook gesichert"
fi
cat > "$hook_path" << 'EOF'
#!/bin/bash
# BQAS Post-Commit Hook
# Fuehrt schnelle Tests aus wenn voice-service geaendert wurde
# Nur ausfuehren wenn voice-service geaendert wurde
if git diff --name-only HEAD~1 2>/dev/null | grep -q "^voice-service/"; then
echo ""
echo "voice-service geaendert - starte BQAS Quick Check..."
echo ""
# Async ausfuehren (im Hintergrund)
VOICE_SERVICE_DIR="/Users/benjaminadmin/Projekte/breakpilot-pwa/voice-service"
if [ -f "${VOICE_SERVICE_DIR}/scripts/run_bqas.sh" ]; then
nohup "${VOICE_SERVICE_DIR}/scripts/run_bqas.sh" --quick > /dev/null 2>&1 &
echo "BQAS Quick Check gestartet (PID: $!)"
echo "Logs: /var/log/bqas/bqas.log"
fi
fi
EOF
chmod +x "$hook_path"
log "SUCCESS" "Git Hook erstellt: $hook_path"
}
remove_git_hook() {
log "INFO" "Entferne Git post-commit Hook..."
local hook_path="${GIT_HOOKS_DIR}/post-commit"
if [ -f "$hook_path" ]; then
# Prüfe ob es unser Hook ist
if grep -q "BQAS" "$hook_path" 2>/dev/null; then
rm -f "$hook_path"
# Restore backup falls vorhanden
if [ -f "${hook_path}.backup" ]; then
mv "${hook_path}.backup" "$hook_path"
log "INFO" "Vorheriger Hook wiederhergestellt"
fi
log "SUCCESS" "Git Hook entfernt"
else
log "WARNING" "Hook gehoert nicht zu BQAS, uebersprungen"
fi
else
log "INFO" "Kein Git Hook gefunden"
fi
}
show_status() {
echo ""
echo "=========================================="
echo "BQAS Scheduler Status"
echo "=========================================="
echo ""
# launchd Status
echo "launchd Job:"
if launchctl list | grep -q "$PLIST_NAME"; then
echo -e " ${GREEN}${NC} Geladen"
launchctl list "$PLIST_NAME" 2>/dev/null || true
else
echo -e " ${RED}${NC} Nicht geladen"
fi
echo ""
# plist Status
echo "plist Datei:"
if [ -f "$PLIST_PATH" ]; then
echo -e " ${GREEN}${NC} Vorhanden: $PLIST_PATH"
else
echo -e " ${RED}${NC} Nicht vorhanden"
fi
echo ""
# Git Hook Status
echo "Git Hook:"
local hook_path="${GIT_HOOKS_DIR}/post-commit"
if [ -f "$hook_path" ] && grep -q "BQAS" "$hook_path" 2>/dev/null; then
echo -e " ${GREEN}${NC} Installiert: $hook_path"
else
echo -e " ${RED}${NC} Nicht installiert"
fi
echo ""
# Log-Verzeichnis
echo "Log-Verzeichnis:"
if [ -d "$LOG_DIR" ]; then
echo -e " ${GREEN}${NC} Vorhanden: $LOG_DIR"
if [ -f "${LOG_DIR}/bqas.log" ]; then
echo " Letzter Eintrag:"
tail -1 "${LOG_DIR}/bqas.log" 2>/dev/null || echo " (leer)"
fi
else
echo -e " ${RED}${NC} Nicht vorhanden"
fi
echo ""
# Naechste Ausfuehrung
echo "Zeitplan: Taeglich um 07:00 Uhr"
echo ""
}
do_install() {
log "INFO" "=========================================="
log "INFO" "BQAS Scheduler Installation"
log "INFO" "=========================================="
create_log_directory
create_plist
load_plist
create_git_hook
echo ""
log "SUCCESS" "Installation abgeschlossen!"
echo ""
echo "Naechste Schritte:"
echo " 1. Manueller Test: $0 test"
echo " 2. Status pruefen: $0 status"
echo " 3. Logs anschauen: tail -f ${LOG_DIR}/bqas.log"
echo ""
}
do_uninstall() {
log "INFO" "=========================================="
log "INFO" "BQAS Scheduler Deinstallation"
log "INFO" "=========================================="
unload_plist
remove_git_hook
echo ""
log "SUCCESS" "Deinstallation abgeschlossen!"
echo ""
echo "Log-Verzeichnis wurde nicht entfernt: $LOG_DIR"
echo "Zum Entfernen: sudo rm -rf $LOG_DIR"
echo ""
}
do_test() {
log "INFO" "Starte BQAS Tests manuell..."
echo ""
if [ -f "${VOICE_SERVICE_DIR}/scripts/run_bqas.sh" ]; then
"${VOICE_SERVICE_DIR}/scripts/run_bqas.sh"
else
log "ERROR" "run_bqas.sh nicht gefunden!"
exit 1
fi
}
# Hauptlogik
case $ACTION in
install)
do_install
;;
uninstall)
do_uninstall
;;
status)
show_status
;;
test)
do_test
;;
*)
show_usage
exit 1
;;
esac

View File

@@ -0,0 +1,53 @@
#!/bin/bash
# BQAS Post-Commit Hook
# =====================
#
# Fuehrt automatisch BQAS Quick Tests aus, wenn Aenderungen
# im voice-service/ Verzeichnis committed werden.
#
# Installation:
# cp post-commit.hook /path/to/.git/hooks/post-commit
# chmod +x /path/to/.git/hooks/post-commit
#
# Oder nutze das Installations-Script:
# ./scripts/install_bqas_scheduler.sh install
# Konfiguration
VOICE_SERVICE_DIR="/Users/benjaminadmin/Projekte/breakpilot-pwa/voice-service"
RUN_ASYNC=true # Im Hintergrund ausfuehren (empfohlen)
# Farben
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m'
# Pruefen ob voice-service geaendert wurde
changed_files=$(git diff --name-only HEAD~1 2>/dev/null || true)
if echo "$changed_files" | grep -q "^voice-service/"; then
echo ""
echo -e "${YELLOW}[BQAS]${NC} voice-service geaendert - starte Quick Check..."
# Script-Pfad
BQAS_SCRIPT="${VOICE_SERVICE_DIR}/scripts/run_bqas.sh"
if [ -f "$BQAS_SCRIPT" ]; then
if [ "$RUN_ASYNC" = true ]; then
# Async im Hintergrund
nohup "$BQAS_SCRIPT" --quick > /dev/null 2>&1 &
pid=$!
echo -e "${GREEN}[BQAS]${NC} Quick Check gestartet (PID: $pid)"
echo " Logs: /var/log/bqas/bqas.log"
else
# Synchron (blockiert commit)
"$BQAS_SCRIPT" --quick
fi
else
echo -e "${YELLOW}[BQAS]${NC} run_bqas.sh nicht gefunden, uebersprungen"
fi
echo ""
fi
# Hook erfolgreich (commit nie blockieren)
exit 0

286
voice-service/scripts/run_bqas.py Executable file
View File

@@ -0,0 +1,286 @@
#!/usr/bin/env python3
"""
BQAS Runner Script
Run BQAS tests and generate reports
"""
import asyncio
import argparse
import sys
import json
from pathlib import Path
from datetime import datetime
# Add parent to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from bqas.judge import LLMJudge
from bqas.config import BQASConfig
from bqas.regression_tracker import RegressionTracker
from bqas.synthetic_generator import SyntheticGenerator
from bqas.backlog_generator import BacklogGenerator
from bqas.metrics import BQASMetrics, TestResult
async def run_golden_suite(config: BQASConfig, judge: LLMJudge) -> list:
"""Run the golden test suite."""
import yaml
results = []
golden_dir = Path(__file__).parent.parent / "tests" / "bqas" / "golden_tests"
for yaml_file in golden_dir.glob("*.yaml"):
print(f"\n📋 Loading {yaml_file.name}...")
with open(yaml_file) as f:
data = yaml.safe_load(f)
tests = data.get("tests", []) + data.get("edge_cases", [])
for test in tests:
test_id = test.get("id", "UNKNOWN")
print(f" Testing {test_id}...", end=" ", flush=True)
result = await judge.evaluate_test_case(
test_id=test_id,
test_name=test.get("name", ""),
user_input=test.get("input", ""),
expected_intent=test.get("expected_intent", "unknown"),
detected_intent=test.get("expected_intent", "unknown"), # Mock for now
response="Verstanden.",
min_score=test.get("min_score", 3.5),
)
results.append(result)
if result.passed:
print(f"{result.composite_score:.2f}")
else:
print(f"{result.composite_score:.2f} ({result.reasoning[:50]})")
return results
async def run_synthetic_tests(
config: BQASConfig,
judge: LLMJudge,
generator: SyntheticGenerator,
) -> list:
"""Run synthetic tests."""
results = []
print("\n🔄 Generating synthetic tests...")
intents = ["student_observation", "worksheet_generate", "reminder"]
for intent in intents:
print(f"\n Intent: {intent}")
variations = generator._generate_fallback(intent, count=5)
for i, var in enumerate(variations):
test_id = f"SYN-{intent[:4].upper()}-{i+1:03d}"
print(f" {test_id}...", end=" ", flush=True)
result = await judge.evaluate_test_case(
test_id=test_id,
test_name=f"Synthetic {intent}",
user_input=var.input,
expected_intent=var.expected_intent,
detected_intent=var.expected_intent,
response="Verstanden.",
min_score=3.0,
)
results.append(result)
if result.passed:
print(f"{result.composite_score:.2f}")
else:
print(f"{result.composite_score:.2f}")
return results
def generate_report(
golden_metrics: BQASMetrics,
synthetic_metrics: BQASMetrics,
output_path: Path,
):
"""Generate HTML report."""
html = f"""<!DOCTYPE html>
<html>
<head>
<title>BQAS Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}</title>
<style>
body {{ font-family: sans-serif; margin: 20px; }}
h1 {{ color: #333; }}
.summary {{ display: flex; gap: 20px; margin-bottom: 20px; }}
.card {{ background: #f5f5f5; padding: 20px; border-radius: 8px; }}
.passed {{ color: #22c55e; }}
.failed {{ color: #ef4444; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
th {{ background: #f0f0f0; }}
</style>
</head>
<body>
<h1>BQAS Test Report</h1>
<div class="summary">
<div class="card">
<h3>Golden Suite</h3>
<p>Total: {golden_metrics.total_tests}</p>
<p class="passed">Passed: {golden_metrics.passed_tests}</p>
<p class="failed">Failed: {golden_metrics.failed_tests}</p>
<p>Avg Score: {golden_metrics.avg_composite_score:.3f}</p>
</div>
<div class="card">
<h3>Synthetic Tests</h3>
<p>Total: {synthetic_metrics.total_tests}</p>
<p class="passed">Passed: {synthetic_metrics.passed_tests}</p>
<p class="failed">Failed: {synthetic_metrics.failed_tests}</p>
<p>Avg Score: {synthetic_metrics.avg_composite_score:.3f}</p>
</div>
</div>
<h2>Scores by Intent</h2>
<table>
<tr><th>Intent</th><th>Score</th></tr>
{''.join(f"<tr><td>{k}</td><td>{v:.3f}</td></tr>" for k, v in golden_metrics.scores_by_intent.items())}
</table>
<h2>Failed Tests</h2>
<ul>
{''.join(f"<li>{tid}</li>" for tid in golden_metrics.failed_test_ids[:20])}
</ul>
<footer>
<p>Generated: {datetime.now().isoformat()}</p>
</footer>
</body>
</html>"""
output_path.write_text(html)
print(f"\n📊 Report saved to: {output_path}")
async def main():
parser = argparse.ArgumentParser(description="BQAS Test Runner")
parser.add_argument("--all", action="store_true", help="Run all tests")
parser.add_argument("--golden", action="store_true", help="Run golden suite only")
parser.add_argument("--synthetic", action="store_true", help="Run synthetic tests only")
parser.add_argument("--check-regression", action="store_true", help="Check for regression")
parser.add_argument("--threshold", type=float, default=0.1, help="Regression threshold")
parser.add_argument("--create-issues", action="store_true", help="Create GitHub issues for failures")
parser.add_argument("--report", action="store_true", help="Generate HTML report")
parser.add_argument("--output", type=str, default="bqas_report.html", help="Report output path")
args = parser.parse_args()
# Default to --all if no specific test type selected
if not (args.golden or args.synthetic or args.check_regression):
args.all = True
print("=" * 60)
print("BQAS - Breakpilot Quality Assurance System")
print("=" * 60)
config = BQASConfig.from_env()
judge = LLMJudge(config=config)
tracker = RegressionTracker(config=config)
generator = SyntheticGenerator(config=config)
backlog = BacklogGenerator(config=config)
# Check if judge is available
print("\n🔍 Checking LLM availability...")
is_available = await judge.health_check()
if not is_available:
print("❌ LLM Judge not available. Make sure Ollama is running with the model.")
print(f" Expected model: {config.judge_model}")
print(f" Ollama URL: {config.ollama_base_url}")
sys.exit(1)
print("✅ LLM Judge available")
golden_results = []
synthetic_results = []
# Run tests
if args.all or args.golden:
print("\n" + "=" * 60)
print("Running Golden Suite")
print("=" * 60)
golden_results = await run_golden_suite(config, judge)
if args.all or args.synthetic:
print("\n" + "=" * 60)
print("Running Synthetic Tests")
print("=" * 60)
synthetic_results = await run_synthetic_tests(config, judge, generator)
# Calculate metrics
golden_metrics = BQASMetrics.from_results(golden_results)
synthetic_metrics = BQASMetrics.from_results(synthetic_results)
# Print summary
print("\n" + golden_metrics.summary())
# Record run
if golden_results:
run = tracker.record_run(golden_metrics, synthetic_metrics.avg_composite_score)
print(f"\n📝 Run recorded: #{run.id}")
# Check regression
if args.check_regression:
print("\n🔍 Checking for regression...")
is_regression, delta, msg = tracker.check_regression(
golden_metrics.avg_composite_score,
args.threshold,
)
print(f" {msg}")
if is_regression and args.create_issues:
print("\n📮 Creating regression alert...")
runs = tracker.get_last_runs(1)
if runs:
url = await backlog.create_regression_alert(
golden_metrics.avg_composite_score,
golden_metrics.avg_composite_score + delta,
delta,
runs[0],
)
if url:
print(f" Issue created: {url}")
# Create issues for failures
if args.create_issues and golden_metrics.failed_tests > 0:
print("\n📮 Creating issue for test failures...")
failed = [r for r in golden_results if not r.passed]
runs = tracker.get_last_runs(1)
if runs:
url = await backlog.create_issue(
runs[0],
golden_metrics,
failed,
)
if url:
print(f" Issue created: {url}")
# Generate report
if args.report:
generate_report(
golden_metrics,
synthetic_metrics,
Path(args.output),
)
# Cleanup
await judge.close()
await generator.close()
# Exit with error code if tests failed
if golden_metrics.failed_tests > 0 or synthetic_metrics.failed_tests > 0:
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())

270
voice-service/scripts/run_bqas.sh Executable file
View File

@@ -0,0 +1,270 @@
#!/bin/bash
# BQAS Local Runner - Lokale Alternative zu GitHub Actions
# Fuehrt BQAS Tests aus und benachrichtigt bei Fehlern
set -e
# Konfiguration
VOICE_SERVICE_DIR="/Users/benjaminadmin/Projekte/breakpilot-pwa/voice-service"
VOICE_SERVICE_URL="${BQAS_SERVICE_URL:-http://localhost:8091}"
LOG_DIR="/var/log/bqas"
LOG_FILE="${LOG_DIR}/bqas.log"
REGRESSION_THRESHOLD="${BQAS_REGRESSION_THRESHOLD:-0.1}"
# Farben fuer Output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Argumente
QUICK_MODE=false
GOLDEN_ONLY=false
RAG_ONLY=false
SILENT=false
usage() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --quick Nur schnelle Golden Tests (fuer Git Hooks)"
echo " --golden Nur Golden Suite"
echo " --rag Nur RAG Suite"
echo " --silent Keine Desktop-Benachrichtigungen"
echo " --help Diese Hilfe anzeigen"
echo ""
echo "Umgebungsvariablen:"
echo " BQAS_SERVICE_URL Voice Service URL (default: http://localhost:8091)"
echo " BQAS_REGRESSION_THRESHOLD Regression Schwelle (default: 0.1)"
}
while [[ $# -gt 0 ]]; do
case $1 in
--quick)
QUICK_MODE=true
shift
;;
--golden)
GOLDEN_ONLY=true
shift
;;
--rag)
RAG_ONLY=true
shift
;;
--silent)
SILENT=true
shift
;;
--help)
usage
exit 0
;;
*)
echo "Unbekannte Option: $1"
usage
exit 1
;;
esac
done
# Logging-Funktion
log() {
local level=$1
local message=$2
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
# Log-Verzeichnis erstellen falls nicht vorhanden
if [ -d "$LOG_DIR" ]; then
echo "${timestamp} [${level}] ${message}" >> "$LOG_FILE"
fi
# Console Output
case $level in
INFO)
echo -e "${BLUE}[INFO]${NC} ${message}"
;;
SUCCESS)
echo -e "${GREEN}[SUCCESS]${NC} ${message}"
;;
WARNING)
echo -e "${YELLOW}[WARNING]${NC} ${message}"
;;
ERROR)
echo -e "${RED}[ERROR]${NC} ${message}"
;;
esac
}
# Benachrichtigung senden
notify() {
local title=$1
local message=$2
local is_error=${3:-false}
if [ "$SILENT" = true ]; then
return
fi
# macOS Desktop-Benachrichtigung
if [ "$is_error" = true ]; then
osascript -e "display notification \"${message}\" with title \"${title}\" sound name \"Basso\"" 2>/dev/null || true
else
osascript -e "display notification \"${message}\" with title \"${title}\"" 2>/dev/null || true
fi
}
# Python-Notifier aufrufen (falls vorhanden)
notify_python() {
local status=$1
local message=$2
local details=$3
if [ -f "${VOICE_SERVICE_DIR}/bqas/notifier.py" ]; then
python3 "${VOICE_SERVICE_DIR}/bqas/notifier.py" \
--status "$status" \
--message "$message" \
--details "$details" 2>/dev/null || true
fi
}
# Pruefen ob Service laeuft
check_service() {
log "INFO" "Pruefe Voice Service Verfuegbarkeit..."
local health_url="${VOICE_SERVICE_URL}/health"
local response
response=$(curl -s -o /dev/null -w "%{http_code}" "$health_url" 2>/dev/null) || response="000"
if [ "$response" = "200" ]; then
log "SUCCESS" "Voice Service erreichbar"
return 0
else
log "WARNING" "Voice Service nicht erreichbar (HTTP $response)"
return 1
fi
}
# Regression Check durchfuehren
check_regression() {
log "INFO" "Pruefe auf Score-Regression..."
local regression_url="${VOICE_SERVICE_URL}/api/v1/bqas/regression-check?threshold=${REGRESSION_THRESHOLD}"
local response
response=$(curl -s "$regression_url" 2>/dev/null) || {
log "WARNING" "Regression-Check fehlgeschlagen"
return 1
}
local is_regression
is_regression=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('is_regression', False))" 2>/dev/null) || is_regression="False"
if [ "$is_regression" = "True" ]; then
local delta
delta=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('delta', 0))" 2>/dev/null) || delta="unknown"
log "ERROR" "Regression erkannt! Score-Abfall: ${delta}"
return 1
else
log "SUCCESS" "Keine Regression erkannt"
return 0
fi
}
# Tests ausfuehren
run_tests() {
local test_type=$1
local test_path=$2
local exit_code=0
log "INFO" "Starte ${test_type} Tests..."
cd "$VOICE_SERVICE_DIR"
# Aktiviere venv falls vorhanden
if [ -f "venv/bin/activate" ]; then
source venv/bin/activate
fi
# pytest ausfuehren
if python3 -m pytest "$test_path" -v --tb=short 2>&1 | tee -a "$LOG_FILE"; then
log "SUCCESS" "${test_type} Tests bestanden"
exit_code=0
else
log "ERROR" "${test_type} Tests fehlgeschlagen"
exit_code=1
fi
return $exit_code
}
# Hauptlogik
main() {
local start_time=$(date +%s)
local golden_exit=0
local rag_exit=0
local regression_exit=0
local service_available=false
log "INFO" "=========================================="
log "INFO" "BQAS Local Runner gestartet"
log "INFO" "=========================================="
# Service-Check (optional, Tests koennen auch offline laufen)
if check_service; then
service_available=true
fi
# Quick Mode: Nur schnelle Tests
if [ "$QUICK_MODE" = true ]; then
log "INFO" "Quick Mode - nur schnelle Golden Tests"
run_tests "Golden (Quick)" "tests/bqas/test_golden.py -k 'not slow'" || golden_exit=1
else
# Vollstaendige Test-Ausfuehrung
if [ "$RAG_ONLY" = false ]; then
run_tests "Golden" "tests/bqas/test_golden.py" || golden_exit=1
fi
if [ "$GOLDEN_ONLY" = false ]; then
run_tests "RAG" "tests/bqas/test_rag.py" || rag_exit=1
fi
# Regression-Check nur wenn Service verfuegbar
if [ "$service_available" = true ]; then
check_regression || regression_exit=1
fi
fi
# Zusammenfassung
local end_time=$(date +%s)
local duration=$((end_time - start_time))
log "INFO" "=========================================="
log "INFO" "BQAS Run abgeschlossen (${duration}s)"
log "INFO" "=========================================="
# Ergebnis ermitteln
local total_failures=$((golden_exit + rag_exit + regression_exit))
if [ $total_failures -eq 0 ]; then
log "SUCCESS" "Alle Tests bestanden!"
notify "BQAS" "Alle Tests bestanden" false
notify_python "success" "Alle Tests bestanden" "Dauer: ${duration}s"
return 0
else
local failure_details=""
[ $golden_exit -ne 0 ] && failure_details="${failure_details}Golden Tests fehlgeschlagen. "
[ $rag_exit -ne 0 ] && failure_details="${failure_details}RAG Tests fehlgeschlagen. "
[ $regression_exit -ne 0 ] && failure_details="${failure_details}Regression erkannt. "
log "ERROR" "Tests fehlgeschlagen: ${failure_details}"
notify "BQAS Alert" "$failure_details" true
notify_python "failure" "Tests fehlgeschlagen" "$failure_details"
return 1
fi
}
# Script ausfuehren
main