Files
breakpilot-lehrer/backend-lehrer/alerts_agent/ingestion/scheduler.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

280 lines
7.5 KiB
Python

"""
Scheduler für automatisches Feed-Fetching.
Verwendet APScheduler für periodische Jobs basierend auf Topic-Konfiguration.
"""
import logging
from datetime import datetime
from typing import Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.jobstores.memory import MemoryJobStore
from sqlalchemy.orm import Session
from alerts_agent.db.database import SessionLocal
from alerts_agent.db.repository import TopicRepository
from alerts_agent.ingestion.rss_fetcher import fetch_and_store_feed
logger = logging.getLogger(__name__)
# Globaler Scheduler (Singleton)
_scheduler: Optional[AsyncIOScheduler] = None
def get_scheduler() -> AsyncIOScheduler:
"""Gibt den globalen Scheduler zurück, erstellt ihn bei Bedarf."""
global _scheduler
if _scheduler is None:
_scheduler = AsyncIOScheduler(
jobstores={"default": MemoryJobStore()},
job_defaults={
"coalesce": True, # Verpasste Jobs zusammenfassen
"max_instances": 1, # Nur eine Instanz pro Job
"misfire_grace_time": 60, # 60s Toleranz für verpasste Jobs
},
)
return _scheduler
async def fetch_topic_job(topic_id: str, feed_url: str) -> None:
"""
Job-Funktion für das Fetchen eines einzelnen Topics.
Wird vom Scheduler aufgerufen.
"""
db = SessionLocal()
try:
logger.info(f"Scheduler: Fetching topic {topic_id}")
result = await fetch_and_store_feed(
topic_id=topic_id,
feed_url=feed_url,
db=db,
)
logger.info(
f"Scheduler: Topic {topic_id} - {result['new_items']} new, "
f"{result['duplicates_skipped']} skipped"
)
except Exception as e:
logger.error(f"Scheduler: Error fetching topic {topic_id}: {e}")
# Fehler im Topic speichern
repo = TopicRepository(db)
repo.update(topic_id, last_fetch_error=str(e))
finally:
db.close()
def schedule_topic(
topic_id: str,
feed_url: str,
interval_minutes: int = 60,
) -> str:
"""
Plant einen periodischen Fetch-Job für ein Topic.
Args:
topic_id: ID des Topics
feed_url: URL des RSS-Feeds
interval_minutes: Intervall in Minuten
Returns:
Job-ID für spätere Referenz
"""
scheduler = get_scheduler()
job_id = f"fetch_topic_{topic_id}"
# Existierenden Job entfernen falls vorhanden
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
# Neuen Job hinzufügen
scheduler.add_job(
fetch_topic_job,
trigger=IntervalTrigger(minutes=interval_minutes),
id=job_id,
name=f"Fetch Topic {topic_id}",
kwargs={"topic_id": topic_id, "feed_url": feed_url},
replace_existing=True,
)
logger.info(f"Scheduled topic {topic_id} every {interval_minutes} minutes")
return job_id
def unschedule_topic(topic_id: str) -> bool:
"""
Entfernt den Fetch-Job für ein Topic.
Args:
topic_id: ID des Topics
Returns:
True wenn Job entfernt wurde, False wenn nicht gefunden
"""
scheduler = get_scheduler()
job_id = f"fetch_topic_{topic_id}"
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
logger.info(f"Unscheduled topic {topic_id}")
return True
return False
def reschedule_topic(
topic_id: str,
feed_url: str,
interval_minutes: int,
) -> str:
"""
Aktualisiert das Intervall für einen Topic-Job.
Args:
topic_id: ID des Topics
feed_url: URL des RSS-Feeds (falls geändert)
interval_minutes: Neues Intervall
Returns:
Job-ID
"""
return schedule_topic(topic_id, feed_url, interval_minutes)
def sync_scheduler_with_db() -> dict:
"""
Synchronisiert den Scheduler mit der Datenbank.
Lädt alle aktiven Topics und plant/entfernt Jobs entsprechend.
Returns:
Dict mit scheduled und unscheduled Counts
"""
db = SessionLocal()
scheduler = get_scheduler()
try:
repo = TopicRepository(db)
topics = repo.get_all()
scheduled = 0
unscheduled = 0
# Aktuelle Job-IDs sammeln
expected_job_ids = set()
for topic in topics:
job_id = f"fetch_topic_{topic.id}"
if topic.is_active and topic.feed_url:
# Topic sollte geplant sein
expected_job_ids.add(job_id)
schedule_topic(
topic_id=topic.id,
feed_url=topic.feed_url,
interval_minutes=topic.fetch_interval_minutes,
)
scheduled += 1
else:
# Topic sollte nicht geplant sein
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
unscheduled += 1
# Orphan-Jobs entfernen (Topics die gelöscht wurden)
for job in scheduler.get_jobs():
if job.id.startswith("fetch_topic_") and job.id not in expected_job_ids:
scheduler.remove_job(job.id)
unscheduled += 1
logger.info(f"Removed orphan job: {job.id}")
return {"scheduled": scheduled, "unscheduled": unscheduled}
finally:
db.close()
def start_scheduler() -> None:
"""
Startet den Scheduler.
Sollte beim App-Start aufgerufen werden.
"""
scheduler = get_scheduler()
if not scheduler.running:
scheduler.start()
logger.info("Alert scheduler started")
# Initial mit DB synchronisieren
result = sync_scheduler_with_db()
logger.info(
f"Scheduler synced: {result['scheduled']} topics scheduled, "
f"{result['unscheduled']} removed"
)
def stop_scheduler() -> None:
"""
Stoppt den Scheduler.
Sollte beim App-Shutdown aufgerufen werden.
"""
scheduler = get_scheduler()
if scheduler.running:
scheduler.shutdown(wait=False)
logger.info("Alert scheduler stopped")
def get_scheduler_status() -> dict:
"""
Gibt den Status des Schedulers zurück.
Returns:
Dict mit running, jobs_count und job_details
"""
scheduler = get_scheduler()
jobs = []
for job in scheduler.get_jobs():
jobs.append({
"id": job.id,
"name": job.name,
"next_run": job.next_run_time.isoformat() if job.next_run_time else None,
"trigger": str(job.trigger),
})
return {
"running": scheduler.running,
"jobs_count": len(jobs),
"jobs": jobs,
}
# Convenience-Funktion für Topic-Aktivierung
async def on_topic_activated(topic_id: str, feed_url: str, interval_minutes: int) -> None:
"""Hook für Topic-Aktivierung - plant den Fetch-Job."""
schedule_topic(topic_id, feed_url, interval_minutes)
async def on_topic_deactivated(topic_id: str) -> None:
"""Hook für Topic-Deaktivierung - entfernt den Fetch-Job."""
unschedule_topic(topic_id)
async def on_topic_updated(
topic_id: str,
feed_url: str,
interval_minutes: int,
is_active: bool,
) -> None:
"""Hook für Topic-Update - aktualisiert oder entfernt den Fetch-Job."""
if is_active and feed_url:
reschedule_topic(topic_id, feed_url, interval_minutes)
else:
unschedule_topic(topic_id)
async def on_topic_deleted(topic_id: str) -> None:
"""Hook für Topic-Löschung - entfernt den Fetch-Job."""
unschedule_topic(topic_id)