fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1 @@
"""Tests für Alerts Agent."""

View File

@@ -0,0 +1,106 @@
"""
Pytest Fixtures für Alerts Agent Tests.
Stellt eine SQLite In-Memory Datenbank für Tests bereit.
Verwendet StaticPool damit alle Connections dieselbe DB sehen.
"""
import pytest
from sqlalchemy import create_engine, event
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool
from fastapi import FastAPI
from fastapi.testclient import TestClient
# Import der Basis und Modelle - WICHTIG: Modelle müssen vor create_all importiert werden
from classroom_engine.database import Base
# Import aller Modelle damit sie bei Base registriert werden
from alerts_agent.db import models as alerts_models # noqa: F401
from alerts_agent.api.routes import router
# SQLite In-Memory für Tests mit StaticPool (dieselbe Connection für alle)
SQLALCHEMY_TEST_DATABASE_URL = "sqlite:///:memory:"
test_engine = create_engine(
SQLALCHEMY_TEST_DATABASE_URL,
connect_args={"check_same_thread": False},
poolclass=StaticPool, # Wichtig: Gleiche DB für alle Connections
)
@event.listens_for(test_engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
"""SQLite Foreign Key Constraints aktivieren."""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
TestSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)
@pytest.fixture(scope="function")
def test_db():
"""
Erstellt eine frische Test-Datenbank für jeden Test.
"""
# Tabellen erstellen
Base.metadata.create_all(bind=test_engine)
db = TestSessionLocal()
try:
yield db
finally:
db.close()
# Tabellen nach dem Test löschen
Base.metadata.drop_all(bind=test_engine)
def override_get_db():
"""Test-Database-Dependency - verwendet dieselbe Engine."""
db = TestSessionLocal()
try:
yield db
finally:
db.close()
@pytest.fixture(scope="function")
def client(test_db):
"""
TestClient mit überschriebener Datenbank-Dependency.
"""
from alerts_agent.db.database import get_db
app = FastAPI()
app.include_router(router, prefix="/api")
# Dependency Override
app.dependency_overrides[get_db] = override_get_db
with TestClient(app) as test_client:
yield test_client
# Cleanup
app.dependency_overrides.clear()
@pytest.fixture
def sample_alert_data():
"""Beispieldaten für Alert-Tests."""
return {
"title": "Neue Inklusions-Richtlinie",
"url": "https://example.com/inklusion",
"snippet": "Das Kultusministerium hat neue Richtlinien...",
"topic_label": "Inklusion Bayern",
}
@pytest.fixture
def sample_feedback_data():
"""Beispieldaten für Feedback-Tests."""
return {
"is_relevant": True,
"reason": "Sehr relevant für Schulen",
"tags": ["wichtig", "inklusion"],
}

View File

@@ -0,0 +1,183 @@
"""
Tests für AlertItem Model.
"""
import pytest
from datetime import datetime
from alerts_agent.models.alert_item import AlertItem, AlertSource, AlertStatus
class TestAlertItemCreation:
"""Tests für AlertItem Erstellung."""
def test_create_minimal_alert(self):
"""Test minimale Alert-Erstellung."""
alert = AlertItem(title="Test Alert", url="https://example.com/article")
assert alert.title == "Test Alert"
assert alert.url == "https://example.com/article"
assert alert.id is not None
assert len(alert.id) == 36 # UUID format
assert alert.status == AlertStatus.NEW
assert alert.source == AlertSource.GOOGLE_ALERTS_RSS
def test_create_full_alert(self):
"""Test vollständige Alert-Erstellung."""
alert = AlertItem(
source=AlertSource.GOOGLE_ALERTS_EMAIL,
topic_label="Inklusion Bayern",
title="Neue Inklusions-Richtlinie",
url="https://example.com/inklusion",
snippet="Die neue Richtlinie für inklusive Bildung...",
lang="de",
published_at=datetime(2024, 1, 15, 10, 30),
)
assert alert.source == AlertSource.GOOGLE_ALERTS_EMAIL
assert alert.topic_label == "Inklusion Bayern"
assert alert.lang == "de"
assert alert.published_at.year == 2024
def test_url_hash_generated(self):
"""Test dass URL Hash automatisch generiert wird."""
alert = AlertItem(
title="Test",
url="https://example.com/test"
)
assert alert.url_hash is not None
assert len(alert.url_hash) == 16 # 16 hex chars
def test_canonical_url_generated(self):
"""Test dass kanonische URL generiert wird."""
alert = AlertItem(
title="Test",
url="https://EXAMPLE.com/path/"
)
# Sollte lowercase und ohne trailing slash sein
assert alert.canonical_url == "https://example.com/path"
class TestURLNormalization:
"""Tests für URL Normalisierung."""
def test_remove_tracking_params(self):
"""Test Entfernung von Tracking-Parametern."""
alert = AlertItem(
title="Test",
url="https://example.com/article?utm_source=google&utm_medium=email&id=123"
)
# utm_source und utm_medium sollten entfernt werden, id bleibt
assert "utm_source" not in alert.canonical_url
assert "utm_medium" not in alert.canonical_url
assert "id=123" in alert.canonical_url
def test_lowercase_domain(self):
"""Test Domain wird lowercase."""
alert = AlertItem(
title="Test",
url="https://WWW.EXAMPLE.COM/Article"
)
assert "www.example.com" in alert.canonical_url
def test_remove_fragment(self):
"""Test Fragment wird entfernt."""
alert = AlertItem(
title="Test",
url="https://example.com/article#section1"
)
assert "#" not in alert.canonical_url
def test_same_url_same_hash(self):
"""Test gleiche URL produziert gleichen Hash."""
alert1 = AlertItem(title="Test", url="https://example.com/test")
alert2 = AlertItem(title="Test", url="https://example.com/test")
assert alert1.url_hash == alert2.url_hash
def test_different_url_different_hash(self):
"""Test verschiedene URLs produzieren verschiedene Hashes."""
alert1 = AlertItem(title="Test", url="https://example.com/test1")
alert2 = AlertItem(title="Test", url="https://example.com/test2")
assert alert1.url_hash != alert2.url_hash
class TestAlertSerialization:
"""Tests für Serialisierung."""
def test_to_dict(self):
"""Test Konvertierung zu Dictionary."""
alert = AlertItem(
title="Test Alert",
url="https://example.com",
topic_label="Test Topic",
)
data = alert.to_dict()
assert data["title"] == "Test Alert"
assert data["url"] == "https://example.com"
assert data["topic_label"] == "Test Topic"
assert data["source"] == "google_alerts_rss"
assert data["status"] == "new"
def test_from_dict(self):
"""Test Erstellung aus Dictionary."""
data = {
"id": "test-id-123",
"title": "Test Alert",
"url": "https://example.com",
"source": "google_alerts_email",
"status": "scored",
"relevance_score": 0.85,
}
alert = AlertItem.from_dict(data)
assert alert.id == "test-id-123"
assert alert.title == "Test Alert"
assert alert.source == AlertSource.GOOGLE_ALERTS_EMAIL
assert alert.status == AlertStatus.SCORED
assert alert.relevance_score == 0.85
def test_round_trip(self):
"""Test Serialisierung und Deserialisierung."""
original = AlertItem(
title="Round Trip Test",
url="https://example.com/roundtrip",
topic_label="Testing",
relevance_score=0.75,
relevance_decision="KEEP",
)
data = original.to_dict()
restored = AlertItem.from_dict(data)
assert restored.title == original.title
assert restored.url == original.url
assert restored.relevance_score == original.relevance_score
class TestAlertStatus:
"""Tests für Alert Status."""
def test_status_enum_values(self):
"""Test Status Enum Werte."""
assert AlertStatus.NEW.value == "new"
assert AlertStatus.PROCESSED.value == "processed"
assert AlertStatus.DUPLICATE.value == "duplicate"
assert AlertStatus.SCORED.value == "scored"
assert AlertStatus.REVIEWED.value == "reviewed"
assert AlertStatus.ARCHIVED.value == "archived"
def test_source_enum_values(self):
"""Test Source Enum Werte."""
assert AlertSource.GOOGLE_ALERTS_RSS.value == "google_alerts_rss"
assert AlertSource.GOOGLE_ALERTS_EMAIL.value == "google_alerts_email"
assert AlertSource.MANUAL.value == "manual"

View File

@@ -0,0 +1,594 @@
"""
Tests für Alerts Agent API Routes.
Testet alle Endpoints: ingest, run, inbox, feedback, profile, stats.
"""
import pytest
from datetime import datetime
from fastapi import FastAPI
from fastapi.testclient import TestClient
from alerts_agent.api.routes import router, _alerts_store, _profile_store
from alerts_agent.models.alert_item import AlertStatus
# Test App erstellen
app = FastAPI()
app.include_router(router, prefix="/api")
class TestIngestEndpoint:
"""Tests für POST /alerts/ingest."""
def setup_method(self):
"""Setup für jeden Test."""
_alerts_store.clear()
_profile_store.clear()
self.client = TestClient(app)
def test_ingest_minimal(self):
"""Test minimaler Alert-Import."""
response = self.client.post(
"/api/alerts/ingest",
json={
"title": "Test Alert",
"url": "https://example.com/test",
},
)
assert response.status_code == 200
data = response.json()
assert data["status"] == "created"
assert "id" in data
assert len(data["id"]) == 36 # UUID
def test_ingest_full(self):
"""Test vollständiger Alert-Import."""
response = self.client.post(
"/api/alerts/ingest",
json={
"title": "Neue Inklusions-Richtlinie",
"url": "https://example.com/inklusion",
"snippet": "Das Kultusministerium hat neue Richtlinien...",
"topic_label": "Inklusion Bayern",
"published_at": "2024-01-15T10:30:00",
},
)
assert response.status_code == 200
data = response.json()
assert "Inklusions-Richtlinie" in data["message"]
def test_ingest_stores_alert(self):
"""Test dass Alert gespeichert wird."""
response = self.client.post(
"/api/alerts/ingest",
json={
"title": "Stored Alert",
"url": "https://example.com/stored",
},
)
alert_id = response.json()["id"]
assert alert_id in _alerts_store
assert _alerts_store[alert_id].title == "Stored Alert"
def test_ingest_validation_missing_title(self):
"""Test Validierung: Titel fehlt."""
response = self.client.post(
"/api/alerts/ingest",
json={
"url": "https://example.com/test",
},
)
assert response.status_code == 422
def test_ingest_validation_missing_url(self):
"""Test Validierung: URL fehlt."""
response = self.client.post(
"/api/alerts/ingest",
json={
"title": "Test",
},
)
assert response.status_code == 422
def test_ingest_validation_empty_title(self):
"""Test Validierung: Leerer Titel."""
response = self.client.post(
"/api/alerts/ingest",
json={
"title": "",
"url": "https://example.com",
},
)
assert response.status_code == 422
class TestRunEndpoint:
"""Tests für POST /alerts/run."""
def setup_method(self):
"""Setup für jeden Test."""
_alerts_store.clear()
_profile_store.clear()
self.client = TestClient(app)
def test_run_empty(self):
"""Test Scoring ohne Alerts."""
response = self.client.post(
"/api/alerts/run",
json={"limit": 10},
)
assert response.status_code == 200
data = response.json()
assert data["processed"] == 0
assert data["keep"] == 0
assert data["drop"] == 0
def test_run_scores_alerts(self):
"""Test Scoring bewertet Alerts."""
# Alerts importieren
self.client.post("/api/alerts/ingest", json={
"title": "Inklusion in Schulen",
"url": "https://example.com/1",
})
self.client.post("/api/alerts/ingest", json={
"title": "Stellenanzeige Lehrer",
"url": "https://example.com/2",
})
# Scoring starten
response = self.client.post(
"/api/alerts/run",
json={"limit": 10},
)
assert response.status_code == 200
data = response.json()
assert data["processed"] == 2
assert data["keep"] + data["drop"] + data["review"] == 2
def test_run_keyword_scoring_keep(self):
"""Test Keyword-Scoring: Priorität → KEEP."""
# Explizit "Datenschutz Schule" als Snippet für besseren Match
self.client.post("/api/alerts/ingest", json={
"title": "Neue Datenschutz-Regelung für Schulen",
"url": "https://example.com/datenschutz",
"snippet": "Datenschutz Schule DSGVO Regelung",
})
response = self.client.post("/api/alerts/run", json={"limit": 10})
data = response.json()
# Sollte als KEEP oder REVIEW bewertet werden (nicht DROP)
assert data["drop"] == 0
assert data["keep"] + data["review"] == 1
def test_run_keyword_scoring_drop(self):
"""Test Keyword-Scoring: Ausschluss → DROP."""
self.client.post("/api/alerts/ingest", json={
"title": "Stellenanzeige: Schulleiter gesucht",
"url": "https://example.com/job",
})
response = self.client.post("/api/alerts/run", json={"limit": 10})
data = response.json()
assert data["drop"] == 1
assert data["keep"] == 0
def test_run_skip_scored(self):
"""Test bereits bewertete werden übersprungen."""
self.client.post("/api/alerts/ingest", json={
"title": "Test Alert",
"url": "https://example.com/test",
})
# Erstes Scoring
self.client.post("/api/alerts/run", json={"limit": 10})
# Zweites Scoring mit skip_scored=true
response = self.client.post(
"/api/alerts/run",
json={"limit": 10, "skip_scored": True},
)
data = response.json()
assert data["processed"] == 0
def test_run_rescore(self):
"""Test Re-Scoring mit skip_scored=false."""
self.client.post("/api/alerts/ingest", json={
"title": "Test Alert",
"url": "https://example.com/test",
})
# Erstes Scoring
self.client.post("/api/alerts/run", json={"limit": 10})
# Zweites Scoring mit skip_scored=false
response = self.client.post(
"/api/alerts/run",
json={"limit": 10, "skip_scored": False},
)
data = response.json()
assert data["processed"] == 1
def test_run_limit(self):
"""Test Limit Parameter."""
# 5 Alerts importieren
for i in range(5):
self.client.post("/api/alerts/ingest", json={
"title": f"Alert {i}",
"url": f"https://example.com/{i}",
})
# Nur 2 scoren
response = self.client.post(
"/api/alerts/run",
json={"limit": 2},
)
data = response.json()
assert data["processed"] == 2
def test_run_returns_duration(self):
"""Test Duration wird zurückgegeben."""
response = self.client.post("/api/alerts/run", json={"limit": 10})
data = response.json()
assert "duration_ms" in data
assert isinstance(data["duration_ms"], int)
class TestInboxEndpoint:
"""Tests für GET /alerts/inbox."""
def setup_method(self):
"""Setup für jeden Test."""
_alerts_store.clear()
_profile_store.clear()
self.client = TestClient(app)
def _create_and_score_alerts(self):
"""Helfer: Erstelle und score Test-Alerts."""
# KEEP Alert
self.client.post("/api/alerts/ingest", json={
"title": "Inklusion Regelung",
"url": "https://example.com/keep",
})
# DROP Alert
self.client.post("/api/alerts/ingest", json={
"title": "Stellenanzeige",
"url": "https://example.com/drop",
})
# Scoring
self.client.post("/api/alerts/run", json={"limit": 10})
def test_inbox_empty(self):
"""Test leere Inbox."""
response = self.client.get("/api/alerts/inbox")
assert response.status_code == 200
data = response.json()
assert data["items"] == []
assert data["total"] == 0
def test_inbox_shows_keep_and_review(self):
"""Test Inbox zeigt KEEP und REVIEW."""
self._create_and_score_alerts()
response = self.client.get("/api/alerts/inbox")
data = response.json()
# Nur KEEP sollte angezeigt werden (Stellenanzeige ist DROP)
assert data["total"] == 1
assert data["items"][0]["relevance_decision"] == "KEEP"
def test_inbox_filter_by_decision(self):
"""Test Inbox Filter nach Decision."""
self._create_and_score_alerts()
# Nur DROP
response = self.client.get("/api/alerts/inbox?decision=DROP")
data = response.json()
assert data["total"] == 1
assert data["items"][0]["relevance_decision"] == "DROP"
def test_inbox_pagination(self):
"""Test Inbox Pagination."""
# 5 KEEP Alerts
for i in range(5):
self.client.post("/api/alerts/ingest", json={
"title": f"Inklusion Alert {i}",
"url": f"https://example.com/{i}",
})
self.client.post("/api/alerts/run", json={"limit": 10})
# Erste Seite
response = self.client.get("/api/alerts/inbox?page=1&page_size=2")
data = response.json()
assert data["total"] == 5
assert len(data["items"]) == 2
assert data["page"] == 1
assert data["page_size"] == 2
# Zweite Seite
response = self.client.get("/api/alerts/inbox?page=2&page_size=2")
data = response.json()
assert len(data["items"]) == 2
def test_inbox_item_fields(self):
"""Test Inbox Items haben alle Felder."""
self.client.post("/api/alerts/ingest", json={
"title": "Test Alert",
"url": "https://example.com/test",
"snippet": "Test snippet",
"topic_label": "Test Topic",
})
self.client.post("/api/alerts/run", json={"limit": 10})
response = self.client.get("/api/alerts/inbox?decision=REVIEW")
data = response.json()
if data["items"]:
item = data["items"][0]
assert "id" in item
assert "title" in item
assert "url" in item
assert "snippet" in item
assert "topic_label" in item
assert "relevance_score" in item
assert "relevance_decision" in item
assert "status" in item
class TestFeedbackEndpoint:
"""Tests für POST /alerts/feedback."""
def setup_method(self):
"""Setup für jeden Test."""
_alerts_store.clear()
_profile_store.clear()
self.client = TestClient(app)
def _create_alert(self):
"""Helfer: Erstelle Test-Alert."""
response = self.client.post("/api/alerts/ingest", json={
"title": "Test Alert",
"url": "https://example.com/test",
})
return response.json()["id"]
def test_feedback_positive(self):
"""Test positives Feedback."""
alert_id = self._create_alert()
response = self.client.post(
"/api/alerts/feedback",
json={
"alert_id": alert_id,
"is_relevant": True,
"reason": "Sehr relevant",
},
)
assert response.status_code == 200
data = response.json()
assert data["success"] is True
assert data["profile_updated"] is True
def test_feedback_negative(self):
"""Test negatives Feedback."""
alert_id = self._create_alert()
response = self.client.post(
"/api/alerts/feedback",
json={
"alert_id": alert_id,
"is_relevant": False,
"reason": "Werbung",
},
)
assert response.status_code == 200
assert response.json()["success"] is True
def test_feedback_updates_alert_status(self):
"""Test Feedback aktualisiert Alert-Status."""
alert_id = self._create_alert()
self.client.post("/api/alerts/feedback", json={
"alert_id": alert_id,
"is_relevant": True,
})
assert _alerts_store[alert_id].status == AlertStatus.REVIEWED
def test_feedback_updates_profile(self):
"""Test Feedback aktualisiert Profil."""
alert_id = self._create_alert()
# Positives Feedback
self.client.post("/api/alerts/feedback", json={
"alert_id": alert_id,
"is_relevant": True,
"reason": "Wichtig",
})
profile = _profile_store.get("default")
assert profile is not None
assert profile.total_kept == 1
assert len(profile.positive_examples) == 1
def test_feedback_not_found(self):
"""Test Feedback für nicht existierenden Alert."""
response = self.client.post(
"/api/alerts/feedback",
json={
"alert_id": "non-existent-id",
"is_relevant": True,
},
)
assert response.status_code == 404
def test_feedback_with_tags(self):
"""Test Feedback mit Tags."""
alert_id = self._create_alert()
response = self.client.post(
"/api/alerts/feedback",
json={
"alert_id": alert_id,
"is_relevant": True,
"tags": ["wichtig", "inklusion"],
},
)
assert response.status_code == 200
class TestProfileEndpoint:
"""Tests für GET/PUT /alerts/profile."""
def setup_method(self):
"""Setup für jeden Test."""
_alerts_store.clear()
_profile_store.clear()
self.client = TestClient(app)
def test_get_profile_default(self):
"""Test Default-Profil abrufen."""
response = self.client.get("/api/alerts/profile")
assert response.status_code == 200
data = response.json()
assert "id" in data
assert "priorities" in data
assert "exclusions" in data
assert len(data["priorities"]) > 0 # Default hat Prioritäten
def test_get_profile_creates_default(self):
"""Test Profil wird automatisch erstellt."""
assert "default" not in _profile_store
self.client.get("/api/alerts/profile")
assert "default" in _profile_store
def test_update_profile_priorities(self):
"""Test Prioritäten aktualisieren."""
response = self.client.put(
"/api/alerts/profile",
json={
"priorities": [
{"label": "Neue Priorität", "weight": 0.9},
{"label": "Zweite Priorität", "weight": 0.7},
],
},
)
assert response.status_code == 200
data = response.json()
assert len(data["priorities"]) == 2
assert data["priorities"][0]["label"] == "Neue Priorität"
def test_update_profile_exclusions(self):
"""Test Ausschlüsse aktualisieren."""
response = self.client.put(
"/api/alerts/profile",
json={
"exclusions": ["Spam", "Werbung", "Newsletter"],
},
)
assert response.status_code == 200
data = response.json()
assert "Spam" in data["exclusions"]
assert len(data["exclusions"]) == 3
def test_update_profile_policies(self):
"""Test Policies aktualisieren."""
response = self.client.put(
"/api/alerts/profile",
json={
"policies": {
"max_age_days": 14,
"prefer_german_sources": True,
},
},
)
assert response.status_code == 200
data = response.json()
assert data["policies"]["max_age_days"] == 14
def test_profile_stats(self):
"""Test Profil enthält Statistiken."""
response = self.client.get("/api/alerts/profile")
data = response.json()
assert "total_scored" in data
assert "total_kept" in data
assert "total_dropped" in data
class TestStatsEndpoint:
"""Tests für GET /alerts/stats."""
def setup_method(self):
"""Setup für jeden Test."""
_alerts_store.clear()
_profile_store.clear()
self.client = TestClient(app)
def test_stats_empty(self):
"""Test Stats ohne Alerts."""
response = self.client.get("/api/alerts/stats")
assert response.status_code == 200
data = response.json()
assert data["total_alerts"] == 0
def test_stats_with_alerts(self):
"""Test Stats mit Alerts."""
# Alerts erstellen und scoren
self.client.post("/api/alerts/ingest", json={
"title": "Inklusion",
"url": "https://example.com/1",
})
self.client.post("/api/alerts/ingest", json={
"title": "Stellenanzeige",
"url": "https://example.com/2",
})
self.client.post("/api/alerts/run", json={"limit": 10})
response = self.client.get("/api/alerts/stats")
data = response.json()
assert data["total_alerts"] == 2
assert "by_status" in data
assert "by_decision" in data
assert "scored" in data["by_status"]
def test_stats_avg_score(self):
"""Test Durchschnittlicher Score."""
self.client.post("/api/alerts/ingest", json={
"title": "Test",
"url": "https://example.com/1",
})
self.client.post("/api/alerts/run", json={"limit": 10})
response = self.client.get("/api/alerts/stats")
data = response.json()
assert "avg_score" in data
assert data["avg_score"] is not None

View File

@@ -0,0 +1,224 @@
"""
Tests für Deduplication Module.
"""
import pytest
from alerts_agent.processing.dedup import (
compute_simhash,
hamming_distance,
are_similar,
find_duplicates,
exact_url_duplicates,
)
from alerts_agent.models.alert_item import AlertItem
class TestSimHash:
"""Tests für SimHash Berechnung."""
def test_compute_simhash_returns_hex(self):
"""Test SimHash gibt Hex-String zurück."""
text = "Dies ist ein Test für SimHash Berechnung"
result = compute_simhash(text)
assert isinstance(result, str)
assert len(result) == 16
# Prüfe dass es gültiges Hex ist
int(result, 16)
def test_empty_text_returns_zeros(self):
"""Test leerer Text gibt Null-Hash."""
assert compute_simhash("") == "0" * 16
assert compute_simhash(None) == "0" * 16
def test_identical_texts_same_hash(self):
"""Test identische Texte haben gleichen Hash."""
text = "Inklusion in bayerischen Schulen wird verstärkt"
hash1 = compute_simhash(text)
hash2 = compute_simhash(text)
assert hash1 == hash2
def test_similar_texts_similar_hash(self):
"""Test ähnliche Texte haben ähnlichen Hash."""
text1 = "Inklusion in bayerischen Schulen wird verstärkt"
text2 = "Inklusion in bayerischen Schulen wurde verstärkt"
hash1 = compute_simhash(text1)
hash2 = compute_simhash(text2)
# Ähnliche Texte sollten geringe Hamming-Distanz haben
distance = hamming_distance(hash1, hash2)
assert distance < 20 # Relativ ähnlich
def test_different_texts_different_hash(self):
"""Test verschiedene Texte haben verschiedenen Hash."""
text1 = "Inklusion in bayerischen Schulen"
text2 = "Fußball Bundesliga Spieltag"
hash1 = compute_simhash(text1)
hash2 = compute_simhash(text2)
assert hash1 != hash2
def test_stopwords_ignored(self):
"""Test Stoppwörter werden ignoriert."""
text1 = "Die neue Regelung für Inklusion"
text2 = "Eine neue Regelung für die Inklusion"
hash1 = compute_simhash(text1)
hash2 = compute_simhash(text2)
# Trotz unterschiedlicher Stoppwörter ähnlich
distance = hamming_distance(hash1, hash2)
assert distance < 10
class TestHammingDistance:
"""Tests für Hamming-Distanz."""
def test_identical_hashes_zero_distance(self):
"""Test identische Hashes haben Distanz 0."""
hash1 = "abcdef0123456789"
hash2 = "abcdef0123456789"
assert hamming_distance(hash1, hash2) == 0
def test_completely_different_max_distance(self):
"""Test komplett verschiedene Hashes haben max Distanz."""
hash1 = "0000000000000000"
hash2 = "ffffffffffffffff"
assert hamming_distance(hash1, hash2) == 64
def test_one_bit_difference(self):
"""Test ein Bit Unterschied."""
hash1 = "0000000000000000"
hash2 = "0000000000000001"
assert hamming_distance(hash1, hash2) == 1
def test_invalid_hash_returns_max(self):
"""Test ungültiger Hash gibt maximale Distanz."""
assert hamming_distance("", "abc") == 64
assert hamming_distance("invalid", "abc") == 64
def test_symmetric(self):
"""Test Hamming-Distanz ist symmetrisch."""
hash1 = "abcd1234abcd1234"
hash2 = "1234abcd1234abcd"
assert hamming_distance(hash1, hash2) == hamming_distance(hash2, hash1)
class TestAreSimilar:
"""Tests für Ähnlichkeitsprüfung."""
def test_identical_are_similar(self):
"""Test identische Hashes sind ähnlich."""
hash1 = "abcdef0123456789"
assert are_similar(hash1, hash1)
def test_threshold_respected(self):
"""Test Schwellenwert wird respektiert."""
hash1 = "0000000000000000"
hash2 = "0000000000000003" # 2 Bits unterschiedlich
assert are_similar(hash1, hash2, threshold=5)
assert are_similar(hash1, hash2, threshold=2)
assert not are_similar(hash1, hash2, threshold=1)
class TestFindDuplicates:
"""Tests für Duplikat-Erkennung."""
def test_no_duplicates(self):
"""Test keine Duplikate wenn alle verschieden."""
items = [
AlertItem(title="Unique 1", url="https://example.com/1"),
AlertItem(title="Unique 2", url="https://example.com/2"),
]
# Setze verschiedene Hashes
items[0].content_hash = "0000000000000000"
items[1].content_hash = "ffffffffffffffff"
duplicates = find_duplicates(items)
assert len(duplicates) == 0
def test_finds_duplicates(self):
"""Test findet Duplikate mit ähnlichen Hashes."""
items = [
AlertItem(title="Original", url="https://example.com/1"),
AlertItem(title="Duplicate", url="https://example.com/2"),
AlertItem(title="Different", url="https://example.com/3"),
]
# Setze ähnliche Hashes für die ersten beiden
items[0].content_hash = "0000000000000000"
items[1].content_hash = "0000000000000001" # 1 Bit unterschiedlich
items[2].content_hash = "ffffffffffffffff" # Komplett anders
duplicates = find_duplicates(items, threshold=3)
# Beide sollten im gleichen Cluster sein
assert len(duplicates) == 2
assert duplicates[items[0].id] == duplicates[items[1].id]
def test_empty_list(self):
"""Test leere Liste."""
duplicates = find_duplicates([])
assert len(duplicates) == 0
def test_items_without_hash_skipped(self):
"""Test Items ohne Hash werden übersprungen."""
items = [
AlertItem(title="No Hash", url="https://example.com/1"),
]
# content_hash bleibt None
duplicates = find_duplicates(items)
assert len(duplicates) == 0
class TestExactUrlDuplicates:
"""Tests für exakte URL Duplikate."""
def test_finds_exact_duplicates(self):
"""Test findet exakte URL Duplikate."""
items = [
AlertItem(title="First", url="https://example.com/article"),
AlertItem(title="Second", url="https://example.com/article"), # Duplikat
AlertItem(title="Third", url="https://example.com/other"),
]
duplicates = exact_url_duplicates(items)
assert len(duplicates) == 1
assert items[1].id in duplicates
assert items[0].id not in duplicates # Original, nicht Duplikat
def test_no_duplicates(self):
"""Test keine Duplikate bei verschiedenen URLs."""
items = [
AlertItem(title="First", url="https://example.com/1"),
AlertItem(title="Second", url="https://example.com/2"),
]
duplicates = exact_url_duplicates(items)
assert len(duplicates) == 0
def test_multiple_duplicates(self):
"""Test mehrere Duplikate der gleichen URL."""
items = [
AlertItem(title="First", url="https://example.com/same"),
AlertItem(title="Second", url="https://example.com/same"),
AlertItem(title="Third", url="https://example.com/same"),
]
duplicates = exact_url_duplicates(items)
# Zweites und drittes sollten als Duplikate markiert sein
assert len(duplicates) == 2
assert items[0].id not in duplicates
assert items[1].id in duplicates
assert items[2].id in duplicates

View File

@@ -0,0 +1,262 @@
"""
Tests für den Feedback-Learning-Mechanismus.
Testet wie das System aus Nutzer-Feedback lernt und das Profil anpasst.
"""
import pytest
from datetime import datetime
from alerts_agent.models.relevance_profile import RelevanceProfile, PriorityItem
from alerts_agent.models.alert_item import AlertItem
class TestFeedbackLearning:
"""Tests für den Feedback-Learning-Mechanismus."""
def test_positive_feedback_adds_example(self):
"""Test positives Feedback fügt Beispiel hinzu."""
profile = RelevanceProfile()
profile.update_from_feedback(
alert_title="Wichtiger Artikel zur Inklusion",
alert_url="https://example.com/inklusion",
is_relevant=True,
reason="Sehr relevant für meine Arbeit",
)
assert len(profile.positive_examples) == 1
assert profile.positive_examples[0]["title"] == "Wichtiger Artikel zur Inklusion"
assert profile.positive_examples[0]["reason"] == "Sehr relevant für meine Arbeit"
def test_negative_feedback_adds_example(self):
"""Test negatives Feedback fügt Beispiel hinzu."""
profile = RelevanceProfile()
profile.update_from_feedback(
alert_title="Stellenanzeige Lehrer",
alert_url="https://example.com/job",
is_relevant=False,
reason="Nur Werbung",
)
assert len(profile.negative_examples) == 1
assert profile.negative_examples[0]["title"] == "Stellenanzeige Lehrer"
def test_feedback_updates_counters(self):
"""Test Feedback aktualisiert Zähler."""
profile = RelevanceProfile()
# 3 positive, 2 negative
for i in range(3):
profile.update_from_feedback(f"Good {i}", f"url{i}", True)
for i in range(2):
profile.update_from_feedback(f"Bad {i}", f"url{i}", False)
assert profile.total_scored == 5
assert profile.total_kept == 3
assert profile.total_dropped == 2
def test_examples_limited_to_20(self):
"""Test Beispiele werden auf 20 begrenzt."""
profile = RelevanceProfile()
# 25 Beispiele hinzufügen
for i in range(25):
profile.update_from_feedback(
f"Example {i}",
f"https://example.com/{i}",
is_relevant=True,
)
assert len(profile.positive_examples) == 20
# Die neuesten sollten behalten werden
titles = [ex["title"] for ex in profile.positive_examples]
assert "Example 24" in titles
assert "Example 0" not in titles # Ältestes sollte weg sein
def test_examples_in_prompt_context(self):
"""Test Beispiele erscheinen im Prompt-Kontext."""
profile = RelevanceProfile()
profile.update_from_feedback(
"Relevanter Artikel",
"https://example.com/good",
is_relevant=True,
reason="Wichtig",
)
profile.update_from_feedback(
"Irrelevanter Artikel",
"https://example.com/bad",
is_relevant=False,
reason="Spam",
)
context = profile.get_prompt_context()
assert "Relevanter Artikel" in context
assert "Irrelevanter Artikel" in context
assert "relevante Alerts" in context
assert "irrelevante Alerts" in context
class TestProfileEvolution:
"""Tests für die Evolution des Profils über Zeit."""
def test_profile_learns_from_feedback_pattern(self):
"""Test Profil lernt aus Feedback-Mustern."""
profile = RelevanceProfile()
# Simuliere Feedback-Muster: Inklusions-Artikel sind relevant
inklusion_articles = [
("Neue Inklusions-Verordnung", "https://example.com/1"),
("Inklusion in Bayern verstärkt", "https://example.com/2"),
("Förderbedarf: Neue Richtlinien", "https://example.com/3"),
]
for title, url in inklusion_articles:
profile.update_from_feedback(title, url, is_relevant=True, reason="Inklusion")
# Simuliere irrelevante Artikel
spam_articles = [
("Newsletter Dezember", "https://example.com/spam1"),
("Pressemitteilung", "https://example.com/spam2"),
]
for title, url in spam_articles:
profile.update_from_feedback(title, url, is_relevant=False, reason="Spam")
# Prompt-Kontext sollte die Muster reflektieren
context = profile.get_prompt_context()
# Alle positiven Beispiele sollten Inklusions-bezogen sein
for title, _ in inklusion_articles:
assert title in context
# Negative Beispiele sollten auch vorhanden sein
for title, _ in spam_articles:
assert title in context
def test_profile_statistics_reflect_decisions(self):
"""Test Profil-Statistiken reflektieren Entscheidungen."""
profile = RelevanceProfile()
# 70% relevant, 30% irrelevant
for i in range(70):
profile.update_from_feedback(f"Good {i}", f"url{i}", True)
for i in range(30):
profile.update_from_feedback(f"Bad {i}", f"url{i}", False)
assert profile.total_scored == 100
assert profile.total_kept == 70
assert profile.total_dropped == 30
# Keep-Rate sollte 70% sein
keep_rate = profile.total_kept / profile.total_scored
assert keep_rate == 0.7
class TestFeedbackWithPriorities:
"""Tests für Feedback in Kombination mit Prioritäten."""
def test_priority_keywords_in_feedback(self):
"""Test Feedback-Beispiele ergänzen Prioritäts-Keywords."""
profile = RelevanceProfile()
profile.add_priority(
"Inklusion",
weight=0.9,
keywords=["Förderbedarf", "inklusiv"],
)
# Feedback mit zusätzlichem Kontext
profile.update_from_feedback(
"Nachteilsausgleich für Schüler mit Förderbedarf",
"https://example.com/nachteilsausgleich",
is_relevant=True,
reason="Nachteilsausgleich ist wichtig für Inklusion",
)
# Das Feedback-Beispiel sollte im Kontext erscheinen
context = profile.get_prompt_context()
assert "Nachteilsausgleich" in context
def test_exclusion_patterns_from_feedback(self):
"""Test Ausschlüsse werden durch Feedback-Muster erkannt."""
profile = RelevanceProfile()
# Mehrere Stellenanzeigen als irrelevant markieren
for i in range(5):
profile.update_from_feedback(
f"Stellenanzeige: Position {i}",
f"https://example.com/job{i}",
is_relevant=False,
reason="Stellenanzeige",
)
# Das Muster sollte in negativen Beispielen sichtbar sein
assert len(profile.negative_examples) == 5
assert all("Stellenanzeige" in ex["title"] for ex in profile.negative_examples)
class TestDefaultProfileFeedback:
"""Tests für Feedback mit dem Default-Bildungsprofil."""
def test_default_profile_with_feedback(self):
"""Test Default-Profil kann Feedback verarbeiten."""
profile = RelevanceProfile.create_default_education_profile()
# Starte mit Default-Werten
initial_examples = len(profile.positive_examples)
# Füge Feedback hinzu
profile.update_from_feedback(
"Datenschutz an Schulen: Neue DSGVO-Richtlinien",
"https://example.com/dsgvo",
is_relevant=True,
reason="DSGVO-relevant",
)
assert len(profile.positive_examples) == initial_examples + 1
assert profile.total_kept == 1
def test_default_priorities_preserved_after_feedback(self):
"""Test Default-Prioritäten bleiben nach Feedback erhalten."""
profile = RelevanceProfile.create_default_education_profile()
original_priorities = len(profile.priorities)
# Feedback sollte Prioritäten nicht ändern
profile.update_from_feedback("Test", "https://test.com", True)
assert len(profile.priorities) == original_priorities
class TestFeedbackTimestamps:
"""Tests für Feedback-Zeitstempel."""
def test_feedback_has_timestamp(self):
"""Test Feedback-Beispiele haben Zeitstempel."""
profile = RelevanceProfile()
profile.update_from_feedback(
"Test Article",
"https://example.com",
is_relevant=True,
)
example = profile.positive_examples[0]
assert "added_at" in example
# Sollte ein ISO-Format Datum sein
datetime.fromisoformat(example["added_at"])
def test_profile_updated_at_changes(self):
"""Test updated_at ändert sich nach Feedback."""
profile = RelevanceProfile()
original_updated = profile.updated_at
# Kurz warten und Feedback geben
import time
time.sleep(0.01)
profile.update_from_feedback("Test", "https://test.com", True)
assert profile.updated_at > original_updated

View File

@@ -0,0 +1,296 @@
"""
Tests für RelevanceProfile Model.
"""
import pytest
from datetime import datetime
from alerts_agent.models.relevance_profile import RelevanceProfile, PriorityItem
class TestPriorityItem:
"""Tests für PriorityItem."""
def test_create_minimal(self):
"""Test minimale Erstellung."""
item = PriorityItem(label="Test Topic")
assert item.label == "Test Topic"
assert item.weight == 0.5 # Default
assert item.keywords == []
assert item.description is None
def test_create_full(self):
"""Test vollständige Erstellung."""
item = PriorityItem(
label="Inklusion",
weight=0.9,
keywords=["inklusiv", "Förderbedarf"],
description="Inklusive Bildung in Schulen",
)
assert item.label == "Inklusion"
assert item.weight == 0.9
assert "inklusiv" in item.keywords
assert item.description is not None
def test_to_dict(self):
"""Test Serialisierung."""
item = PriorityItem(label="Test", weight=0.8, keywords=["kw1", "kw2"])
data = item.to_dict()
assert data["label"] == "Test"
assert data["weight"] == 0.8
assert data["keywords"] == ["kw1", "kw2"]
def test_from_dict(self):
"""Test Deserialisierung."""
data = {"label": "Test", "weight": 0.7, "keywords": ["test"]}
item = PriorityItem.from_dict(data)
assert item.label == "Test"
assert item.weight == 0.7
class TestRelevanceProfile:
"""Tests für RelevanceProfile."""
def test_create_empty(self):
"""Test leeres Profil."""
profile = RelevanceProfile()
assert profile.id is not None
assert profile.priorities == []
assert profile.exclusions == []
assert profile.positive_examples == []
assert profile.negative_examples == []
def test_add_priority(self):
"""Test Priorität hinzufügen."""
profile = RelevanceProfile()
profile.add_priority("Datenschutz", weight=0.85)
assert len(profile.priorities) == 1
assert profile.priorities[0].label == "Datenschutz"
assert profile.priorities[0].weight == 0.85
def test_add_exclusion(self):
"""Test Ausschluss hinzufügen."""
profile = RelevanceProfile()
profile.add_exclusion("Stellenanzeige")
profile.add_exclusion("Werbung")
assert len(profile.exclusions) == 2
assert "Stellenanzeige" in profile.exclusions
assert "Werbung" in profile.exclusions
def test_no_duplicate_exclusions(self):
"""Test keine doppelten Ausschlüsse."""
profile = RelevanceProfile()
profile.add_exclusion("Test")
profile.add_exclusion("Test")
assert len(profile.exclusions) == 1
def test_add_positive_example(self):
"""Test positives Beispiel hinzufügen."""
profile = RelevanceProfile()
profile.add_positive_example(
title="Gutes Beispiel",
url="https://example.com",
reason="Relevant für Thema X",
)
assert len(profile.positive_examples) == 1
assert profile.positive_examples[0]["title"] == "Gutes Beispiel"
assert profile.positive_examples[0]["reason"] == "Relevant für Thema X"
def test_add_negative_example(self):
"""Test negatives Beispiel hinzufügen."""
profile = RelevanceProfile()
profile.add_negative_example(
title="Schlechtes Beispiel",
url="https://example.com",
reason="Werbung",
)
assert len(profile.negative_examples) == 1
def test_examples_limited_to_20(self):
"""Test Beispiele auf 20 begrenzt."""
profile = RelevanceProfile()
for i in range(25):
profile.add_positive_example(
title=f"Example {i}",
url=f"https://example.com/{i}",
)
assert len(profile.positive_examples) == 20
# Sollte die neuesten behalten
assert "Example 24" in profile.positive_examples[-1]["title"]
def test_update_from_feedback_positive(self):
"""Test Feedback Update (positiv)."""
profile = RelevanceProfile()
profile.update_from_feedback(
alert_title="Relevant Article",
alert_url="https://example.com",
is_relevant=True,
reason="Sehr relevant",
)
assert len(profile.positive_examples) == 1
assert profile.total_kept == 1
assert profile.total_scored == 1
def test_update_from_feedback_negative(self):
"""Test Feedback Update (negativ)."""
profile = RelevanceProfile()
profile.update_from_feedback(
alert_title="Irrelevant Article",
alert_url="https://example.com",
is_relevant=False,
reason="Werbung",
)
assert len(profile.negative_examples) == 1
assert profile.total_dropped == 1
assert profile.total_scored == 1
class TestPromptContext:
"""Tests für Prompt-Kontext Generierung."""
def test_empty_profile_prompt(self):
"""Test Prompt für leeres Profil."""
profile = RelevanceProfile()
context = profile.get_prompt_context()
assert "Relevanzprofil" in context
# Leeres Profil hat keine Prioritäten/Ausschlüsse
assert "Prioritäten" not in context
def test_priorities_in_prompt(self):
"""Test Prioritäten im Prompt."""
profile = RelevanceProfile()
profile.add_priority("Inklusion", weight=0.9, description="Sehr wichtig")
context = profile.get_prompt_context()
assert "Inklusion" in context
assert "Sehr wichtig" in context
def test_exclusions_in_prompt(self):
"""Test Ausschlüsse im Prompt."""
profile = RelevanceProfile()
profile.add_exclusion("Stellenanzeige")
profile.add_exclusion("Werbung")
context = profile.get_prompt_context()
assert "Stellenanzeige" in context
assert "Werbung" in context
assert "Ausschlüsse" in context
def test_examples_in_prompt(self):
"""Test Beispiele im Prompt."""
profile = RelevanceProfile()
profile.add_positive_example(
title="Gutes Beispiel",
url="https://example.com",
reason="Relevant",
)
context = profile.get_prompt_context()
assert "Gutes Beispiel" in context
assert "relevante Alerts" in context
class TestDefaultEducationProfile:
"""Tests für das Standard-Bildungsprofil."""
def test_create_default_profile(self):
"""Test Default-Profil Erstellung."""
profile = RelevanceProfile.create_default_education_profile()
assert len(profile.priorities) > 0
assert len(profile.exclusions) > 0
assert len(profile.policies) > 0
def test_default_priorities(self):
"""Test Default-Prioritäten enthalten Bildungsthemen."""
profile = RelevanceProfile.create_default_education_profile()
labels = [p.label for p in profile.priorities]
assert "Inklusion" in labels
assert "Datenschutz Schule" in labels
assert "Schulrecht Bayern" in labels
def test_default_exclusions(self):
"""Test Default-Ausschlüsse."""
profile = RelevanceProfile.create_default_education_profile()
assert "Stellenanzeige" in profile.exclusions
assert "Werbung" in profile.exclusions
def test_default_policies(self):
"""Test Default-Policies."""
profile = RelevanceProfile.create_default_education_profile()
assert profile.policies.get("prefer_german_sources") is True
assert "max_age_days" in profile.policies
class TestSerialization:
"""Tests für Serialisierung."""
def test_to_dict(self):
"""Test Konvertierung zu Dict."""
profile = RelevanceProfile()
profile.add_priority("Test", weight=0.7)
profile.add_exclusion("Exclude")
data = profile.to_dict()
assert "id" in data
assert len(data["priorities"]) == 1
assert "Exclude" in data["exclusions"]
assert "created_at" in data
def test_from_dict(self):
"""Test Erstellung aus Dict."""
data = {
"id": "test-id",
"priorities": [{"label": "Test", "weight": 0.8, "keywords": [], "description": None}],
"exclusions": ["Exclude"],
"positive_examples": [],
"negative_examples": [],
"policies": {"key": "value"},
"created_at": "2024-01-15T10:00:00",
"updated_at": "2024-01-15T10:00:00",
"total_scored": 100,
"total_kept": 60,
"total_dropped": 40,
"accuracy_estimate": None,
}
profile = RelevanceProfile.from_dict(data)
assert profile.id == "test-id"
assert len(profile.priorities) == 1
assert profile.total_scored == 100
def test_round_trip(self):
"""Test Serialisierung/Deserialisierung Roundtrip."""
original = RelevanceProfile.create_default_education_profile()
original.add_positive_example("Test", "https://test.com")
data = original.to_dict()
restored = RelevanceProfile.from_dict(data)
assert restored.id == original.id
assert len(restored.priorities) == len(original.priorities)
assert len(restored.positive_examples) == len(original.positive_examples)

View File

@@ -0,0 +1,403 @@
"""
Tests für RelevanceScorer.
Testet sowohl die LLM-Integration als auch das Response-Parsing.
"""
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from datetime import datetime
from alerts_agent.processing.relevance_scorer import (
RelevanceScorer,
RelevanceDecision,
ScoringResult,
RELEVANCE_SYSTEM_PROMPT,
)
from alerts_agent.models.alert_item import AlertItem, AlertStatus
from alerts_agent.models.relevance_profile import RelevanceProfile
class TestScoringResult:
"""Tests für ScoringResult Dataclass."""
def test_create_result(self):
"""Test ScoringResult Erstellung."""
result = ScoringResult(
alert_id="test-123",
score=0.85,
decision=RelevanceDecision.KEEP,
reason_codes=["matches_priority"],
summary="Relevant für Inklusion",
)
assert result.alert_id == "test-123"
assert result.score == 0.85
assert result.decision == RelevanceDecision.KEEP
def test_result_to_dict(self):
"""Test Serialisierung."""
result = ScoringResult(
alert_id="test-123",
score=0.5,
decision=RelevanceDecision.REVIEW,
)
data = result.to_dict()
assert data["alert_id"] == "test-123"
assert data["decision"] == "REVIEW"
assert "scored_at" in data
def test_decision_enum(self):
"""Test RelevanceDecision Enum."""
assert RelevanceDecision.KEEP.value == "KEEP"
assert RelevanceDecision.DROP.value == "DROP"
assert RelevanceDecision.REVIEW.value == "REVIEW"
class TestRelevanceScorerInit:
"""Tests für RelevanceScorer Initialisierung."""
def test_default_config(self):
"""Test Default-Konfiguration."""
scorer = RelevanceScorer()
assert scorer.gateway_url == "http://localhost:8000/llm"
assert scorer.model == "breakpilot-teacher-8b"
assert scorer.keep_threshold == 0.7
assert scorer.drop_threshold == 0.4
def test_custom_config(self):
"""Test Custom-Konfiguration."""
scorer = RelevanceScorer(
gateway_url="http://custom:8080/llm",
api_key="test-key",
model="custom-model",
timeout=60,
)
assert scorer.gateway_url == "http://custom:8080/llm"
assert scorer.api_key == "test-key"
assert scorer.model == "custom-model"
assert scorer.timeout == 60
class TestPromptBuilding:
"""Tests für Prompt-Erstellung."""
def test_build_user_prompt(self):
"""Test User-Prompt Erstellung."""
scorer = RelevanceScorer()
alert = AlertItem(
title="Neue Inklusions-Richtlinie",
url="https://example.com/inklusion",
snippet="Das Kultusministerium hat...",
topic_label="Inklusion Bayern",
)
prompt = scorer._build_user_prompt(alert)
assert "Neue Inklusions-Richtlinie" in prompt
assert "Inklusion Bayern" in prompt
assert "https://example.com/inklusion" in prompt
assert "Kultusministerium" in prompt
def test_build_user_prompt_long_snippet(self):
"""Test Snippet wird gekürzt."""
scorer = RelevanceScorer()
alert = AlertItem(
title="Test",
url="https://example.com",
snippet="x" * 1000, # Langer Snippet
)
prompt = scorer._build_user_prompt(alert)
# Sollte auf 500 Zeichen + "..." gekürzt sein
assert "..." in prompt
assert len(prompt) < 1000
def test_build_system_prompt_without_profile(self):
"""Test System-Prompt ohne Profil."""
scorer = RelevanceScorer()
prompt = scorer._build_system_prompt(None)
assert "Relevanz-Filter" in prompt
assert "KEEP" in prompt
assert "DROP" in prompt
assert "JSON" in prompt
def test_build_system_prompt_with_profile(self):
"""Test System-Prompt mit Profil."""
scorer = RelevanceScorer()
profile = RelevanceProfile()
profile.add_priority("Inklusion", weight=0.9)
profile.add_exclusion("Stellenanzeige")
prompt = scorer._build_system_prompt(profile)
assert "Relevanzprofil" in prompt
assert "Inklusion" in prompt
assert "Stellenanzeige" in prompt
class TestResponseParsing:
"""Tests für LLM Response Parsing."""
def test_parse_valid_json(self):
"""Test Parse gültiges JSON."""
scorer = RelevanceScorer()
response = '''{"score": 0.85, "decision": "KEEP", "reason_codes": ["matches_priority"], "summary": "Relevant"}'''
result = scorer._parse_response(response, "test-id")
assert result.score == 0.85
assert result.decision == RelevanceDecision.KEEP
assert "matches_priority" in result.reason_codes
assert result.summary == "Relevant"
def test_parse_json_in_markdown(self):
"""Test Parse JSON in Markdown Code-Block."""
scorer = RelevanceScorer()
response = '''Hier ist meine Bewertung:
```json
{"score": 0.3, "decision": "DROP", "reason_codes": ["exclusion"]}
```
'''
result = scorer._parse_response(response, "test-id")
assert result.score == 0.3
assert result.decision == RelevanceDecision.DROP
def test_parse_invalid_json(self):
"""Test Parse ungültiges JSON."""
scorer = RelevanceScorer()
response = "Das ist kein JSON"
result = scorer._parse_response(response, "test-id")
assert result.score == 0.5 # Default
assert result.decision == RelevanceDecision.REVIEW
# Error reason code (could be "parse_error" or "error")
assert any(code in result.reason_codes for code in ["parse_error", "error"])
def test_parse_score_clamping(self):
"""Test Score wird auf 0-1 begrenzt."""
scorer = RelevanceScorer()
# Score > 1
result = scorer._parse_response('{"score": 1.5, "decision": "KEEP"}', "test")
assert result.score == 1.0
# Score < 0
result = scorer._parse_response('{"score": -0.5, "decision": "DROP"}', "test")
assert result.score == 0.0
def test_parse_invalid_decision_fallback(self):
"""Test Fallback bei ungültiger Decision."""
scorer = RelevanceScorer()
# Hoher Score → KEEP
result = scorer._parse_response('{"score": 0.9, "decision": "INVALID"}', "test")
assert result.decision == RelevanceDecision.KEEP
# Niedriger Score → DROP
result = scorer._parse_response('{"score": 0.1, "decision": "INVALID"}', "test")
assert result.decision == RelevanceDecision.DROP
# Mittlerer Score → REVIEW
result = scorer._parse_response('{"score": 0.5, "decision": "INVALID"}', "test")
assert result.decision == RelevanceDecision.REVIEW
class TestScoreAlert:
"""Tests für score_alert Methode."""
@pytest.mark.asyncio
async def test_score_alert_success(self):
"""Test erfolgreiches Scoring."""
scorer = RelevanceScorer(api_key="test-key")
alert = AlertItem(
title="Inklusion in Bayern",
url="https://example.com",
)
# Mock HTTP Response
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {
"content": '{"score": 0.9, "decision": "KEEP", "reason_codes": ["priority"], "summary": "Relevant"}'
}
}]
}
with patch.object(scorer, "_get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.post.return_value = mock_response
mock_get_client.return_value = mock_client
result = await scorer.score_alert(alert)
assert result.score == 0.9
assert result.decision == RelevanceDecision.KEEP
assert alert.relevance_score == 0.9
assert alert.status == AlertStatus.SCORED
@pytest.mark.asyncio
async def test_score_alert_http_error(self):
"""Test HTTP Error Handling."""
import httpx
scorer = RelevanceScorer(api_key="test-key")
alert = AlertItem(title="Test", url="https://example.com")
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.text = "Internal Server Error"
with patch.object(scorer, "_get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.post.side_effect = httpx.HTTPStatusError(
"Error", request=MagicMock(), response=mock_response
)
mock_get_client.return_value = mock_client
result = await scorer.score_alert(alert)
assert result.decision == RelevanceDecision.REVIEW
assert "gateway_error" in result.reason_codes
assert result.error is not None
@pytest.mark.asyncio
async def test_score_alert_with_profile(self):
"""Test Scoring mit Profil."""
scorer = RelevanceScorer(api_key="test-key")
alert = AlertItem(title="Test", url="https://example.com")
profile = RelevanceProfile()
profile.add_priority("Test Topic", weight=0.9)
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": '{"score": 0.8, "decision": "KEEP"}'}}]
}
with patch.object(scorer, "_get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.post.return_value = mock_response
mock_get_client.return_value = mock_client
result = await scorer.score_alert(alert, profile=profile)
# Prüfe dass Profil im Request verwendet wurde
call_args = mock_client.post.call_args
request_body = call_args[1]["json"]
system_prompt = request_body["messages"][0]["content"]
assert "Test Topic" in system_prompt
class TestScoreBatch:
"""Tests für score_batch Methode."""
@pytest.mark.asyncio
async def test_score_batch(self):
"""Test Batch-Scoring."""
scorer = RelevanceScorer(api_key="test-key")
alerts = [
AlertItem(title="Alert 1", url="https://example.com/1"),
AlertItem(title="Alert 2", url="https://example.com/2"),
]
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": '{"score": 0.7, "decision": "KEEP"}'}}]
}
with patch.object(scorer, "_get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.post.return_value = mock_response
mock_get_client.return_value = mock_client
results = await scorer.score_batch(alerts)
assert len(results) == 2
assert all(r.decision == RelevanceDecision.KEEP for r in results)
@pytest.mark.asyncio
async def test_score_batch_skip_scored(self):
"""Test Batch-Scoring überspringt bereits bewertete."""
scorer = RelevanceScorer(api_key="test-key")
alert1 = AlertItem(title="New", url="https://example.com/1")
alert2 = AlertItem(title="Scored", url="https://example.com/2")
alert2.status = AlertStatus.SCORED
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": '{"score": 0.5, "decision": "REVIEW"}'}}]
}
with patch.object(scorer, "_get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.post.return_value = mock_response
mock_get_client.return_value = mock_client
results = await scorer.score_batch([alert1, alert2], skip_scored=True)
assert len(results) == 1
class TestScorerStats:
"""Tests für Scorer Statistiken."""
def test_get_stats(self):
"""Test Stats Berechnung."""
scorer = RelevanceScorer()
results = [
ScoringResult("1", 0.9, RelevanceDecision.KEEP),
ScoringResult("2", 0.8, RelevanceDecision.KEEP),
ScoringResult("3", 0.2, RelevanceDecision.DROP),
ScoringResult("4", 0.5, RelevanceDecision.REVIEW),
ScoringResult("5", 0.5, RelevanceDecision.REVIEW, error="Test Error"),
]
stats = scorer.get_stats(results)
assert stats["total"] == 5
assert stats["keep"] == 2
assert stats["drop"] == 1
assert stats["review"] == 2
assert stats["errors"] == 1
assert stats["keep_rate"] == 0.4
assert stats["avg_score"] == pytest.approx(0.58, rel=0.01)
def test_get_stats_empty(self):
"""Test Stats für leere Liste."""
scorer = RelevanceScorer()
stats = scorer.get_stats([])
assert stats["total"] == 0
class TestScorerClose:
"""Tests für Scorer Cleanup."""
@pytest.mark.asyncio
async def test_close(self):
"""Test Close schließt Client."""
scorer = RelevanceScorer()
# Erstelle Client
await scorer._get_client()
assert scorer._client is not None
# Close
await scorer.close()
assert scorer._client is None